2 This file is part of systemd.
4 Copyright 2011 Lennart Poettering
6 systemd is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public License
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
21 #include <selinux/selinux.h>
23 #include <sys/ioctl.h>
25 #include <sys/signalfd.h>
26 #include <sys/statvfs.h>
27 #include <linux/sockios.h>
30 #include "sd-daemon.h"
31 #include "sd-journal.h"
32 #include "sd-messages.h"
35 #include "alloc-util.h"
36 #include "audit-util.h"
37 #include "cgroup-util.h"
38 #include "conf-parser.h"
39 #include "dirent-util.h"
40 #include "extract-word.h"
43 #include "formats-util.h"
46 #include "hostname-util.h"
47 #include "id128-util.h"
49 #include "journal-authenticate.h"
50 #include "journal-file.h"
51 #include "journal-internal.h"
52 #include "journal-vacuum.h"
53 #include "journald-audit.h"
54 #include "journald-kmsg.h"
55 #include "journald-native.h"
56 #include "journald-rate-limit.h"
57 #include "journald-server.h"
58 #include "journald-stream.h"
59 #include "journald-syslog.h"
63 #include "parse-util.h"
64 #include "proc-cmdline.h"
65 #include "process-util.h"
67 #include "selinux-util.h"
68 #include "signal-util.h"
69 #include "socket-util.h"
70 #include "stdio-util.h"
71 #include "string-table.h"
72 #include "string-util.h"
73 #include "user-util.h"
74 #include "syslog-util.h"
76 #define USER_JOURNALS_MAX 1024
78 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
79 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
80 #define DEFAULT_RATE_LIMIT_BURST 1000
81 #define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
83 #define RECHECK_SPACE_USEC (30*USEC_PER_SEC)
85 #define NOTIFY_SNDBUF_SIZE (8*1024*1024)
87 /* The period to insert between posting changes for coalescing */
88 #define POST_CHANGE_TIMER_INTERVAL_USEC (250*USEC_PER_MSEC)
90 static int determine_path_usage(Server
*s
, const char *path
, uint64_t *ret_used
, uint64_t *ret_free
) {
91 _cleanup_closedir_
DIR *d
= NULL
;
100 return log_full_errno(errno
== ENOENT
? LOG_DEBUG
: LOG_ERR
,
101 errno
, "Failed to open %s: %m", path
);
103 if (fstatvfs(dirfd(d
), &ss
) < 0)
104 return log_error_errno(errno
, "Failed to fstatvfs(%s): %m", path
);
106 *ret_free
= ss
.f_bsize
* ss
.f_bavail
;
108 FOREACH_DIRENT_ALL(de
, d
, break) {
111 if (!endswith(de
->d_name
, ".journal") &&
112 !endswith(de
->d_name
, ".journal~"))
115 if (fstatat(dirfd(d
), de
->d_name
, &st
, AT_SYMLINK_NOFOLLOW
) < 0) {
116 log_debug_errno(errno
, "Failed to stat %s/%s, ignoring: %m", path
, de
->d_name
);
120 if (!S_ISREG(st
.st_mode
))
123 *ret_used
+= (uint64_t) st
.st_blocks
* 512UL;
129 static void cache_space_invalidate(JournalStorageSpace
*space
) {
130 memset(space
, 0, sizeof(*space
));
133 static int cache_space_refresh(Server
*s
, JournalStorage
*storage
) {
135 _cleanup_closedir_
DIR *d
= NULL
;
136 JournalStorageSpace
*space
;
137 JournalMetrics
*metrics
;
138 uint64_t vfs_used
, vfs_avail
, avail
;
144 metrics
= &storage
->metrics
;
145 space
= &storage
->space
;
147 ts
= now(CLOCK_MONOTONIC
);
149 if (space
->timestamp
+ RECHECK_SPACE_USEC
> ts
)
152 r
= determine_path_usage(s
, storage
->path
, &vfs_used
, &vfs_avail
);
156 space
->vfs_used
= vfs_used
;
157 space
->vfs_available
= vfs_avail
;
159 avail
= LESS_BY(vfs_avail
, metrics
->keep_free
);
161 space
->limit
= MIN(MAX(vfs_used
+ avail
, metrics
->min_use
), metrics
->max_use
);
162 space
->available
= LESS_BY(space
->limit
, vfs_used
);
163 space
->timestamp
= ts
;
167 static void patch_min_use(JournalStorage
*storage
) {
170 /* Let's bump the min_use limit to the current usage on disk. We do
171 * this when starting up and first opening the journal files. This way
172 * sudden spikes in disk usage will not cause journald to vacuum files
173 * without bounds. Note that this means that only a restart of journald
174 * will make it reset this value. */
176 storage
->metrics
.min_use
= MAX(storage
->metrics
.min_use
, storage
->space
.vfs_used
);
180 static int determine_space(Server
*s
, uint64_t *available
, uint64_t *limit
) {
186 js
= s
->system_journal
? &s
->system_storage
: &s
->runtime_storage
;
188 r
= cache_space_refresh(s
, js
);
191 *available
= js
->space
.available
;
193 *limit
= js
->space
.limit
;
198 void server_space_usage_message(Server
*s
, JournalStorage
*storage
) {
199 char fb1
[FORMAT_BYTES_MAX
], fb2
[FORMAT_BYTES_MAX
], fb3
[FORMAT_BYTES_MAX
],
200 fb4
[FORMAT_BYTES_MAX
], fb5
[FORMAT_BYTES_MAX
], fb6
[FORMAT_BYTES_MAX
];
201 JournalMetrics
*metrics
;
206 storage
= s
->system_journal
? &s
->system_storage
: &s
->runtime_storage
;
208 if (cache_space_refresh(s
, storage
) < 0)
211 metrics
= &storage
->metrics
;
212 format_bytes(fb1
, sizeof(fb1
), storage
->space
.vfs_used
);
213 format_bytes(fb2
, sizeof(fb2
), metrics
->max_use
);
214 format_bytes(fb3
, sizeof(fb3
), metrics
->keep_free
);
215 format_bytes(fb4
, sizeof(fb4
), storage
->space
.vfs_available
);
216 format_bytes(fb5
, sizeof(fb5
), storage
->space
.limit
);
217 format_bytes(fb6
, sizeof(fb6
), storage
->space
.available
);
219 server_driver_message(s
, SD_MESSAGE_JOURNAL_USAGE
,
220 LOG_MESSAGE("%s (%s) is %s, max %s, %s free.",
221 storage
->name
, storage
->path
, fb1
, fb5
, fb6
),
222 "JOURNAL_NAME=%s", storage
->name
,
223 "JOURNAL_PATH=%s", storage
->path
,
224 "CURRENT_USE=%"PRIu64
, storage
->space
.vfs_used
,
225 "CURRENT_USE_PRETTY=%s", fb1
,
226 "MAX_USE=%"PRIu64
, metrics
->max_use
,
227 "MAX_USE_PRETTY=%s", fb2
,
228 "DISK_KEEP_FREE=%"PRIu64
, metrics
->keep_free
,
229 "DISK_KEEP_FREE_PRETTY=%s", fb3
,
230 "DISK_AVAILABLE=%"PRIu64
, storage
->space
.vfs_available
,
231 "DISK_AVAILABLE_PRETTY=%s", fb4
,
232 "LIMIT=%"PRIu64
, storage
->space
.limit
,
233 "LIMIT_PRETTY=%s", fb5
,
234 "AVAILABLE=%"PRIu64
, storage
->space
.available
,
235 "AVAILABLE_PRETTY=%s", fb6
,
239 static void server_add_acls(JournalFile
*f
, uid_t uid
) {
246 if (uid
<= SYSTEM_UID_MAX
)
249 r
= add_acls_for_user(f
->fd
, uid
);
251 log_warning_errno(r
, "Failed to set ACL on %s, ignoring: %m", f
->path
);
255 static int open_journal(
261 JournalMetrics
*metrics
,
271 r
= journal_file_open_reliably(fname
, flags
, 0640, s
->compress
, seal
, metrics
, s
->mmap
, s
->deferred_closes
, NULL
, &f
);
273 r
= journal_file_open(-1, fname
, flags
, 0640, s
->compress
, seal
, metrics
, s
->mmap
, s
->deferred_closes
, NULL
, &f
);
277 r
= journal_file_enable_post_change_timer(f
, s
->event
, POST_CHANGE_TIMER_INTERVAL_USEC
);
279 (void) journal_file_close(f
);
287 static bool flushed_flag_is_set(void) {
288 return (access("/run/systemd/journal/flushed", F_OK
) >= 0);
291 static int system_journal_open(Server
*s
, bool flush_requested
) {
292 bool flushed
= false;
296 if (!s
->system_journal
&&
297 (s
->storage
== STORAGE_PERSISTENT
|| s
->storage
== STORAGE_AUTO
) &&
298 (flush_requested
|| (flushed
= flushed_flag_is_set()))) {
300 /* If in auto mode: first try to create the machine
301 * path, but not the prefix.
303 * If in persistent mode: create /var/log/journal and
304 * the machine path */
306 if (s
->storage
== STORAGE_PERSISTENT
)
307 (void) mkdir_p("/var/log/journal/", 0755);
309 (void) mkdir(s
->system_storage
.path
, 0755);
311 fn
= strjoina(s
->system_storage
.path
, "/system.journal");
312 r
= open_journal(s
, true, fn
, O_RDWR
|O_CREAT
, s
->seal
, &s
->system_storage
.metrics
, &s
->system_journal
);
314 server_add_acls(s
->system_journal
, 0);
315 (void) cache_space_refresh(s
, &s
->system_storage
);
316 patch_min_use(&s
->system_storage
);
318 if (r
!= -ENOENT
&& r
!= -EROFS
)
319 log_warning_errno(r
, "Failed to open system journal: %m");
324 /* If the runtime journal is open, and we're post-flush, we're
325 * recovering from a failed system journal rotate (ENOSPC)
326 * for which the runtime journal was reopened.
328 * Perform an implicit flush to var, leaving the runtime
329 * journal closed, now that the system journal is back.
331 if (s
->runtime_journal
&& flushed
)
332 (void) server_flush_to_var(s
);
335 if (!s
->runtime_journal
&&
336 (s
->storage
!= STORAGE_NONE
)) {
338 fn
= strjoina(s
->runtime_storage
.path
, "/system.journal");
340 if (s
->system_journal
) {
342 /* Try to open the runtime journal, but only
343 * if it already exists, so that we can flush
344 * it into the system journal */
346 r
= open_journal(s
, false, fn
, O_RDWR
, false, &s
->runtime_storage
.metrics
, &s
->runtime_journal
);
349 log_warning_errno(r
, "Failed to open runtime journal: %m");
356 /* OK, we really need the runtime journal, so create
357 * it if necessary. */
359 (void) mkdir("/run/log", 0755);
360 (void) mkdir("/run/log/journal", 0755);
361 (void) mkdir_parents(fn
, 0750);
363 r
= open_journal(s
, true, fn
, O_RDWR
|O_CREAT
, false, &s
->runtime_storage
.metrics
, &s
->runtime_journal
);
365 return log_error_errno(r
, "Failed to open runtime journal: %m");
368 if (s
->runtime_journal
) {
369 server_add_acls(s
->runtime_journal
, 0);
370 (void) cache_space_refresh(s
, &s
->runtime_storage
);
371 patch_min_use(&s
->runtime_storage
);
378 static JournalFile
* find_journal(Server
*s
, uid_t uid
) {
379 _cleanup_free_
char *p
= NULL
;
386 /* A rotate that fails to create the new journal (ENOSPC) leaves the
387 * rotated journal as NULL. Unless we revisit opening, even after
388 * space is made available we'll continue to return NULL indefinitely.
390 * system_journal_open() is a noop if the journals are already open, so
391 * we can just call it here to recover from failed rotates (or anything
392 * else that's left the journals as NULL).
394 * Fixes https://github.com/systemd/systemd/issues/3968 */
395 (void) system_journal_open(s
, false);
397 /* We split up user logs only on /var, not on /run. If the
398 * runtime file is open, we write to it exclusively, in order
399 * to guarantee proper order as soon as we flush /run to
400 * /var and close the runtime file. */
402 if (s
->runtime_journal
)
403 return s
->runtime_journal
;
405 if (uid
<= SYSTEM_UID_MAX
|| uid_is_dynamic(uid
))
406 return s
->system_journal
;
408 r
= sd_id128_get_machine(&machine
);
410 return s
->system_journal
;
412 f
= ordered_hashmap_get(s
->user_journals
, UID_TO_PTR(uid
));
416 if (asprintf(&p
, "/var/log/journal/" SD_ID128_FORMAT_STR
"/user-"UID_FMT
".journal",
417 SD_ID128_FORMAT_VAL(machine
), uid
) < 0)
418 return s
->system_journal
;
420 while (ordered_hashmap_size(s
->user_journals
) >= USER_JOURNALS_MAX
) {
421 /* Too many open? Then let's close one */
422 f
= ordered_hashmap_steal_first(s
->user_journals
);
424 (void) journal_file_close(f
);
427 r
= open_journal(s
, true, p
, O_RDWR
|O_CREAT
, s
->seal
, &s
->system_storage
.metrics
, &f
);
429 return s
->system_journal
;
431 server_add_acls(f
, uid
);
433 r
= ordered_hashmap_put(s
->user_journals
, UID_TO_PTR(uid
), f
);
435 (void) journal_file_close(f
);
436 return s
->system_journal
;
442 static int do_rotate(
455 r
= journal_file_rotate(f
, s
->compress
, seal
, s
->deferred_closes
);
458 log_error_errno(r
, "Failed to rotate %s: %m", (*f
)->path
);
460 log_error_errno(r
, "Failed to create new %s journal: %m", name
);
462 server_add_acls(*f
, uid
);
467 void server_rotate(Server
*s
) {
473 log_debug("Rotating...");
475 (void) do_rotate(s
, &s
->runtime_journal
, "runtime", false, 0);
476 (void) do_rotate(s
, &s
->system_journal
, "system", s
->seal
, 0);
478 ORDERED_HASHMAP_FOREACH_KEY(f
, k
, s
->user_journals
, i
) {
479 r
= do_rotate(s
, &f
, "user", s
->seal
, PTR_TO_UID(k
));
481 ordered_hashmap_replace(s
->user_journals
, k
, f
);
483 /* Old file has been closed and deallocated */
484 ordered_hashmap_remove(s
->user_journals
, k
);
487 /* Perform any deferred closes which aren't still offlining. */
488 SET_FOREACH(f
, s
->deferred_closes
, i
)
489 if (!journal_file_is_offlining(f
)) {
490 (void) set_remove(s
->deferred_closes
, f
);
491 (void) journal_file_close(f
);
495 void server_sync(Server
*s
) {
500 if (s
->system_journal
) {
501 r
= journal_file_set_offline(s
->system_journal
, false);
503 log_warning_errno(r
, "Failed to sync system journal, ignoring: %m");
506 ORDERED_HASHMAP_FOREACH(f
, s
->user_journals
, i
) {
507 r
= journal_file_set_offline(f
, false);
509 log_warning_errno(r
, "Failed to sync user journal, ignoring: %m");
512 if (s
->sync_event_source
) {
513 r
= sd_event_source_set_enabled(s
->sync_event_source
, SD_EVENT_OFF
);
515 log_error_errno(r
, "Failed to disable sync timer source: %m");
518 s
->sync_scheduled
= false;
521 static void do_vacuum(Server
*s
, JournalStorage
*storage
, bool verbose
) {
528 (void) cache_space_refresh(s
, storage
);
531 server_space_usage_message(s
, storage
);
533 r
= journal_directory_vacuum(storage
->path
, storage
->space
.limit
,
534 storage
->metrics
.n_max_files
, s
->max_retention_usec
,
535 &s
->oldest_file_usec
, verbose
);
536 if (r
< 0 && r
!= -ENOENT
)
537 log_warning_errno(r
, "Failed to vacuum %s, ignoring: %m", storage
->path
);
539 cache_space_invalidate(&storage
->space
);
542 int server_vacuum(Server
*s
, bool verbose
) {
545 log_debug("Vacuuming...");
547 s
->oldest_file_usec
= 0;
549 if (s
->system_journal
)
550 do_vacuum(s
, &s
->system_storage
, verbose
);
551 if (s
->runtime_journal
)
552 do_vacuum(s
, &s
->runtime_storage
, verbose
);
557 static void server_cache_machine_id(Server
*s
) {
563 r
= sd_id128_get_machine(&id
);
567 sd_id128_to_string(id
, stpcpy(s
->machine_id_field
, "_MACHINE_ID="));
570 static void server_cache_boot_id(Server
*s
) {
576 r
= sd_id128_get_boot(&id
);
580 sd_id128_to_string(id
, stpcpy(s
->boot_id_field
, "_BOOT_ID="));
583 static void server_cache_hostname(Server
*s
) {
584 _cleanup_free_
char *t
= NULL
;
589 t
= gethostname_malloc();
593 x
= strappend("_HOSTNAME=", t
);
597 free(s
->hostname_field
);
598 s
->hostname_field
= x
;
601 static bool shall_try_append_again(JournalFile
*f
, int r
) {
604 case -E2BIG
: /* Hit configured limit */
605 case -EFBIG
: /* Hit fs limit */
606 case -EDQUOT
: /* Quota limit hit */
607 case -ENOSPC
: /* Disk full */
608 log_debug("%s: Allocation limit reached, rotating.", f
->path
);
611 case -EIO
: /* I/O error of some kind (mmap) */
612 log_warning("%s: IO error, rotating.", f
->path
);
615 case -EHOSTDOWN
: /* Other machine */
616 log_info("%s: Journal file from other machine, rotating.", f
->path
);
619 case -EBUSY
: /* Unclean shutdown */
620 log_info("%s: Unclean shutdown, rotating.", f
->path
);
623 case -EPROTONOSUPPORT
: /* Unsupported feature */
624 log_info("%s: Unsupported feature, rotating.", f
->path
);
627 case -EBADMSG
: /* Corrupted */
628 case -ENODATA
: /* Truncated */
629 case -ESHUTDOWN
: /* Already archived */
630 log_warning("%s: Journal file corrupted, rotating.", f
->path
);
633 case -EIDRM
: /* Journal file has been deleted */
634 log_warning("%s: Journal file has been deleted, rotating.", f
->path
);
637 case -ETXTBSY
: /* Journal file is from the future */
638 log_warning("%s: Journal file is from the future, rotating.", f
->path
);
646 static void write_to_journal(Server
*s
, uid_t uid
, struct iovec
*iovec
, unsigned n
, int priority
) {
647 bool vacuumed
= false, rotate
= false;
648 struct dual_timestamp ts
;
656 /* Get the closest, linearized time we have for this log event from the event loop. (Note that we do not use
657 * the source time, and not even the time the event was originally seen, but instead simply the time we started
658 * processing it, as we want strictly linear ordering in what we write out.) */
659 assert_se(sd_event_now(s
->event
, CLOCK_REALTIME
, &ts
.realtime
) >= 0);
660 assert_se(sd_event_now(s
->event
, CLOCK_MONOTONIC
, &ts
.monotonic
) >= 0);
662 if (ts
.realtime
< s
->last_realtime_clock
) {
663 /* When the time jumps backwards, let's immediately rotate. Of course, this should not happen during
664 * regular operation. However, when it does happen, then we should make sure that we start fresh files
665 * to ensure that the entries in the journal files are strictly ordered by time, in order to ensure
666 * bisection works correctly. */
668 log_debug("Time jumped backwards, rotating.");
672 f
= find_journal(s
, uid
);
676 if (journal_file_rotate_suggested(f
, s
->max_file_usec
)) {
677 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f
->path
);
684 server_vacuum(s
, false);
687 f
= find_journal(s
, uid
);
692 s
->last_realtime_clock
= ts
.realtime
;
694 r
= journal_file_append_entry(f
, &ts
, iovec
, n
, &s
->seqnum
, NULL
, NULL
);
696 server_schedule_sync(s
, priority
);
700 if (vacuumed
|| !shall_try_append_again(f
, r
)) {
701 log_error_errno(r
, "Failed to write entry (%d items, %zu bytes), ignoring: %m", n
, IOVEC_TOTAL_SIZE(iovec
, n
));
706 server_vacuum(s
, false);
708 f
= find_journal(s
, uid
);
712 log_debug("Retrying write.");
713 r
= journal_file_append_entry(f
, &ts
, iovec
, n
, &s
->seqnum
, NULL
, NULL
);
715 log_error_errno(r
, "Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %m", n
, IOVEC_TOTAL_SIZE(iovec
, n
));
717 server_schedule_sync(s
, priority
);
720 static int get_invocation_id(const char *cgroup_root
, const char *slice
, const char *unit
, char **ret
) {
721 _cleanup_free_
char *escaped
= NULL
, *slice_path
= NULL
, *p
= NULL
;
722 char *copy
, ids
[SD_ID128_STRING_MAX
];
725 /* Read the invocation ID of a unit off a unit. It's stored in the "trusted.invocation_id" extended attribute
726 * on the cgroup path. */
728 r
= cg_slice_to_path(slice
, &slice_path
);
732 escaped
= cg_escape(unit
);
736 p
= strjoin(cgroup_root
, "/", slice_path
, "/", escaped
, NULL
);
740 r
= cg_get_xattr(SYSTEMD_CGROUP_CONTROLLER
, p
, "trusted.invocation_id", ids
, 32);
747 if (!id128_is_valid(ids
))
758 static void dispatch_message_real(
760 struct iovec
*iovec
, unsigned n
, unsigned m
,
761 const struct ucred
*ucred
,
762 const struct timeval
*tv
,
763 const char *label
, size_t label_len
,
768 char pid
[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t
)],
769 uid
[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t
)],
770 gid
[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t
)],
771 owner_uid
[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t
)],
772 source_time
[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t
)],
773 o_uid
[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t
)],
774 o_gid
[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t
)],
775 o_owner_uid
[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t
)];
781 uid_t realuid
= 0, owner
= 0, journal_uid
;
782 bool owner_valid
= false;
784 char audit_session
[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
785 audit_loginuid
[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t
)],
786 o_audit_session
[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
787 o_audit_loginuid
[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t
)];
796 assert(n
+ N_IOVEC_META_FIELDS
+ (object_pid
> 0 ? N_IOVEC_OBJECT_FIELDS
: 0) <= m
);
799 realuid
= ucred
->uid
;
801 sprintf(pid
, "_PID="PID_FMT
, ucred
->pid
);
802 IOVEC_SET_STRING(iovec
[n
++], pid
);
804 sprintf(uid
, "_UID="UID_FMT
, ucred
->uid
);
805 IOVEC_SET_STRING(iovec
[n
++], uid
);
807 sprintf(gid
, "_GID="GID_FMT
, ucred
->gid
);
808 IOVEC_SET_STRING(iovec
[n
++], gid
);
810 r
= get_process_comm(ucred
->pid
, &t
);
812 x
= strjoina("_COMM=", t
);
814 IOVEC_SET_STRING(iovec
[n
++], x
);
817 r
= get_process_exe(ucred
->pid
, &t
);
819 x
= strjoina("_EXE=", t
);
821 IOVEC_SET_STRING(iovec
[n
++], x
);
824 r
= get_process_cmdline(ucred
->pid
, 0, false, &t
);
826 x
= strjoina("_CMDLINE=", t
);
828 IOVEC_SET_STRING(iovec
[n
++], x
);
831 r
= get_process_capeff(ucred
->pid
, &t
);
833 x
= strjoina("_CAP_EFFECTIVE=", t
);
835 IOVEC_SET_STRING(iovec
[n
++], x
);
839 r
= audit_session_from_pid(ucred
->pid
, &audit
);
841 sprintf(audit_session
, "_AUDIT_SESSION=%"PRIu32
, audit
);
842 IOVEC_SET_STRING(iovec
[n
++], audit_session
);
845 r
= audit_loginuid_from_pid(ucred
->pid
, &loginuid
);
847 sprintf(audit_loginuid
, "_AUDIT_LOGINUID="UID_FMT
, loginuid
);
848 IOVEC_SET_STRING(iovec
[n
++], audit_loginuid
);
852 r
= cg_pid_get_path_shifted(ucred
->pid
, s
->cgroup_root
, &c
);
854 _cleanup_free_
char *raw_unit
= NULL
, *raw_slice
= NULL
;
855 char *session
= NULL
;
857 x
= strjoina("_SYSTEMD_CGROUP=", c
);
858 IOVEC_SET_STRING(iovec
[n
++], x
);
860 r
= cg_path_get_session(c
, &t
);
862 session
= strjoina("_SYSTEMD_SESSION=", t
);
864 IOVEC_SET_STRING(iovec
[n
++], session
);
867 if (cg_path_get_owner_uid(c
, &owner
) >= 0) {
870 sprintf(owner_uid
, "_SYSTEMD_OWNER_UID="UID_FMT
, owner
);
871 IOVEC_SET_STRING(iovec
[n
++], owner_uid
);
874 if (cg_path_get_unit(c
, &raw_unit
) >= 0) {
875 x
= strjoina("_SYSTEMD_UNIT=", raw_unit
);
876 IOVEC_SET_STRING(iovec
[n
++], x
);
877 } else if (unit_id
&& !session
) {
878 x
= strjoina("_SYSTEMD_UNIT=", unit_id
);
879 IOVEC_SET_STRING(iovec
[n
++], x
);
882 if (cg_path_get_user_unit(c
, &t
) >= 0) {
883 x
= strjoina("_SYSTEMD_USER_UNIT=", t
);
885 IOVEC_SET_STRING(iovec
[n
++], x
);
886 } else if (unit_id
&& session
) {
887 x
= strjoina("_SYSTEMD_USER_UNIT=", unit_id
);
888 IOVEC_SET_STRING(iovec
[n
++], x
);
891 if (cg_path_get_slice(c
, &raw_slice
) >= 0) {
892 x
= strjoina("_SYSTEMD_SLICE=", raw_slice
);
893 IOVEC_SET_STRING(iovec
[n
++], x
);
896 if (cg_path_get_user_slice(c
, &t
) >= 0) {
897 x
= strjoina("_SYSTEMD_USER_SLICE=", t
);
899 IOVEC_SET_STRING(iovec
[n
++], x
);
902 if (raw_slice
&& raw_unit
) {
903 if (get_invocation_id(s
->cgroup_root
, raw_slice
, raw_unit
, &t
) >= 0) {
904 x
= strjoina("_SYSTEMD_INVOCATION_ID=", t
);
906 IOVEC_SET_STRING(iovec
[n
++], x
);
911 } else if (unit_id
) {
912 x
= strjoina("_SYSTEMD_UNIT=", unit_id
);
913 IOVEC_SET_STRING(iovec
[n
++], x
);
917 if (mac_selinux_have()) {
919 x
= alloca(strlen("_SELINUX_CONTEXT=") + label_len
+ 1);
921 *((char*) mempcpy(stpcpy(x
, "_SELINUX_CONTEXT="), label
, label_len
)) = 0;
922 IOVEC_SET_STRING(iovec
[n
++], x
);
926 if (getpidcon(ucred
->pid
, &con
) >= 0) {
927 x
= strjoina("_SELINUX_CONTEXT=", con
);
930 IOVEC_SET_STRING(iovec
[n
++], x
);
939 r
= get_process_uid(object_pid
, &object_uid
);
941 sprintf(o_uid
, "OBJECT_UID="UID_FMT
, object_uid
);
942 IOVEC_SET_STRING(iovec
[n
++], o_uid
);
945 r
= get_process_gid(object_pid
, &object_gid
);
947 sprintf(o_gid
, "OBJECT_GID="GID_FMT
, object_gid
);
948 IOVEC_SET_STRING(iovec
[n
++], o_gid
);
951 r
= get_process_comm(object_pid
, &t
);
953 x
= strjoina("OBJECT_COMM=", t
);
955 IOVEC_SET_STRING(iovec
[n
++], x
);
958 r
= get_process_exe(object_pid
, &t
);
960 x
= strjoina("OBJECT_EXE=", t
);
962 IOVEC_SET_STRING(iovec
[n
++], x
);
965 r
= get_process_cmdline(object_pid
, 0, false, &t
);
967 x
= strjoina("OBJECT_CMDLINE=", t
);
969 IOVEC_SET_STRING(iovec
[n
++], x
);
973 r
= audit_session_from_pid(object_pid
, &audit
);
975 sprintf(o_audit_session
, "OBJECT_AUDIT_SESSION=%"PRIu32
, audit
);
976 IOVEC_SET_STRING(iovec
[n
++], o_audit_session
);
979 r
= audit_loginuid_from_pid(object_pid
, &loginuid
);
981 sprintf(o_audit_loginuid
, "OBJECT_AUDIT_LOGINUID="UID_FMT
, loginuid
);
982 IOVEC_SET_STRING(iovec
[n
++], o_audit_loginuid
);
986 r
= cg_pid_get_path_shifted(object_pid
, s
->cgroup_root
, &c
);
988 x
= strjoina("OBJECT_SYSTEMD_CGROUP=", c
);
989 IOVEC_SET_STRING(iovec
[n
++], x
);
991 r
= cg_path_get_session(c
, &t
);
993 x
= strjoina("OBJECT_SYSTEMD_SESSION=", t
);
995 IOVEC_SET_STRING(iovec
[n
++], x
);
998 if (cg_path_get_owner_uid(c
, &owner
) >= 0) {
999 sprintf(o_owner_uid
, "OBJECT_SYSTEMD_OWNER_UID="UID_FMT
, owner
);
1000 IOVEC_SET_STRING(iovec
[n
++], o_owner_uid
);
1003 if (cg_path_get_unit(c
, &t
) >= 0) {
1004 x
= strjoina("OBJECT_SYSTEMD_UNIT=", t
);
1006 IOVEC_SET_STRING(iovec
[n
++], x
);
1009 if (cg_path_get_user_unit(c
, &t
) >= 0) {
1010 x
= strjoina("OBJECT_SYSTEMD_USER_UNIT=", t
);
1012 IOVEC_SET_STRING(iovec
[n
++], x
);
1015 if (cg_path_get_slice(c
, &t
) >= 0) {
1016 x
= strjoina("OBJECT_SYSTEMD_SLICE=", t
);
1018 IOVEC_SET_STRING(iovec
[n
++], x
);
1021 if (cg_path_get_user_slice(c
, &t
) >= 0) {
1022 x
= strjoina("OBJECT_SYSTEMD_USER_SLICE=", t
);
1024 IOVEC_SET_STRING(iovec
[n
++], x
);
1033 sprintf(source_time
, "_SOURCE_REALTIME_TIMESTAMP=" USEC_FMT
, timeval_load(tv
));
1034 IOVEC_SET_STRING(iovec
[n
++], source_time
);
1037 /* Note that strictly speaking storing the boot id here is
1038 * redundant since the entry includes this in-line
1039 * anyway. However, we need this indexed, too. */
1040 if (!isempty(s
->boot_id_field
))
1041 IOVEC_SET_STRING(iovec
[n
++], s
->boot_id_field
);
1043 if (!isempty(s
->machine_id_field
))
1044 IOVEC_SET_STRING(iovec
[n
++], s
->machine_id_field
);
1046 if (!isempty(s
->hostname_field
))
1047 IOVEC_SET_STRING(iovec
[n
++], s
->hostname_field
);
1051 if (s
->split_mode
== SPLIT_UID
&& realuid
> 0)
1052 /* Split up strictly by any UID */
1053 journal_uid
= realuid
;
1054 else if (s
->split_mode
== SPLIT_LOGIN
&& realuid
> 0 && owner_valid
&& owner
> 0)
1055 /* Split up by login UIDs. We do this only if the
1056 * realuid is not root, in order not to accidentally
1057 * leak privileged information to the user that is
1058 * logged by a privileged process that is part of an
1059 * unprivileged session. */
1060 journal_uid
= owner
;
1064 write_to_journal(s
, journal_uid
, iovec
, n
, priority
);
1067 void server_driver_message(Server
*s
, sd_id128_t message_id
, const char *format
, ...) {
1068 char mid
[11 + 32 + 1];
1069 struct iovec iovec
[N_IOVEC_META_FIELDS
+ 5 + N_IOVEC_PAYLOAD_FIELDS
];
1073 struct ucred ucred
= {};
1078 assert_cc(3 == LOG_FAC(LOG_DAEMON
));
1079 IOVEC_SET_STRING(iovec
[n
++], "SYSLOG_FACILITY=3");
1080 IOVEC_SET_STRING(iovec
[n
++], "SYSLOG_IDENTIFIER=systemd-journald");
1082 IOVEC_SET_STRING(iovec
[n
++], "_TRANSPORT=driver");
1083 assert_cc(6 == LOG_INFO
);
1084 IOVEC_SET_STRING(iovec
[n
++], "PRIORITY=6");
1086 if (!sd_id128_is_null(message_id
)) {
1087 snprintf(mid
, sizeof(mid
), LOG_MESSAGE_ID(message_id
));
1088 IOVEC_SET_STRING(iovec
[n
++], mid
);
1093 va_start(ap
, format
);
1094 r
= log_format_iovec(iovec
, ELEMENTSOF(iovec
), &n
, false, 0, format
, ap
);
1095 /* Error handling below */
1098 ucred
.pid
= getpid();
1099 ucred
.uid
= getuid();
1100 ucred
.gid
= getgid();
1103 dispatch_message_real(s
, iovec
, n
, ELEMENTSOF(iovec
), &ucred
, NULL
, NULL
, 0, NULL
, LOG_INFO
, 0);
1106 free(iovec
[m
++].iov_base
);
1109 /* We failed to format the message. Emit a warning instead. */
1112 xsprintf(buf
, "MESSAGE=Entry printing failed: %s", strerror(-r
));
1115 IOVEC_SET_STRING(iovec
[n
++], "PRIORITY=4");
1116 IOVEC_SET_STRING(iovec
[n
++], buf
);
1117 dispatch_message_real(s
, iovec
, n
, ELEMENTSOF(iovec
), &ucred
, NULL
, NULL
, 0, NULL
, LOG_INFO
, 0);
1121 void server_dispatch_message(
1123 struct iovec
*iovec
, unsigned n
, unsigned m
,
1124 const struct ucred
*ucred
,
1125 const struct timeval
*tv
,
1126 const char *label
, size_t label_len
,
1127 const char *unit_id
,
1132 _cleanup_free_
char *path
= NULL
;
1133 uint64_t available
= 0;
1137 assert(iovec
|| n
== 0);
1142 if (LOG_PRI(priority
) > s
->max_level_store
)
1145 /* Stop early in case the information will not be stored
1147 if (s
->storage
== STORAGE_NONE
)
1153 r
= cg_pid_get_path_shifted(ucred
->pid
, s
->cgroup_root
, &path
);
1157 /* example: /user/lennart/3/foobar
1158 * /system/dbus.service/foobar
1160 * So let's cut of everything past the third /, since that is
1161 * where user directories start */
1163 c
= strchr(path
, '/');
1165 c
= strchr(c
+1, '/');
1167 c
= strchr(c
+1, '/');
1173 (void) determine_space(s
, &available
, NULL
);
1174 rl
= journal_rate_limit_test(s
->rate_limit
, path
, priority
& LOG_PRIMASK
, available
);
1178 /* Write a suppression message if we suppressed something */
1180 server_driver_message(s
, SD_MESSAGE_JOURNAL_DROPPED
,
1181 LOG_MESSAGE("Suppressed %u messages from %s", rl
- 1, path
),
1185 dispatch_message_real(s
, iovec
, n
, m
, ucred
, tv
, label
, label_len
, unit_id
, priority
, object_pid
);
1188 int server_flush_to_var(Server
*s
) {
1190 sd_journal
*j
= NULL
;
1191 char ts
[FORMAT_TIMESPAN_MAX
];
1198 if (s
->storage
!= STORAGE_AUTO
&&
1199 s
->storage
!= STORAGE_PERSISTENT
)
1202 if (!s
->runtime_journal
)
1205 (void) system_journal_open(s
, true);
1207 if (!s
->system_journal
)
1210 log_debug("Flushing to /var...");
1212 start
= now(CLOCK_MONOTONIC
);
1214 r
= sd_id128_get_machine(&machine
);
1218 r
= sd_journal_open(&j
, SD_JOURNAL_RUNTIME_ONLY
);
1220 return log_error_errno(r
, "Failed to read runtime journal: %m");
1222 sd_journal_set_data_threshold(j
, 0);
1224 SD_JOURNAL_FOREACH(j
) {
1228 f
= j
->current_file
;
1229 assert(f
&& f
->current_offset
> 0);
1233 r
= journal_file_move_to_object(f
, OBJECT_ENTRY
, f
->current_offset
, &o
);
1235 log_error_errno(r
, "Can't read entry: %m");
1239 r
= journal_file_copy_entry(f
, s
->system_journal
, o
, f
->current_offset
, NULL
, NULL
, NULL
);
1243 if (!shall_try_append_again(s
->system_journal
, r
)) {
1244 log_error_errno(r
, "Can't write entry: %m");
1249 server_vacuum(s
, false);
1251 if (!s
->system_journal
) {
1252 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1257 log_debug("Retrying write.");
1258 r
= journal_file_copy_entry(f
, s
->system_journal
, o
, f
->current_offset
, NULL
, NULL
, NULL
);
1260 log_error_errno(r
, "Can't write entry: %m");
1268 journal_file_post_change(s
->system_journal
);
1270 s
->runtime_journal
= journal_file_close(s
->runtime_journal
);
1273 (void) rm_rf("/run/log/journal", REMOVE_ROOT
);
1275 sd_journal_close(j
);
1277 server_driver_message(s
, SD_ID128_NULL
,
1278 LOG_MESSAGE("Time spent on flushing to /var is %s for %u entries.",
1279 format_timespan(ts
, sizeof(ts
), now(CLOCK_MONOTONIC
) - start
, 0),
1286 int server_process_datagram(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1287 Server
*s
= userdata
;
1288 struct ucred
*ucred
= NULL
;
1289 struct timeval
*tv
= NULL
;
1290 struct cmsghdr
*cmsg
;
1292 size_t label_len
= 0, m
;
1295 int *fds
= NULL
, v
= 0;
1299 struct cmsghdr cmsghdr
;
1301 /* We use NAME_MAX space for the SELinux label
1302 * here. The kernel currently enforces no
1303 * limit, but according to suggestions from
1304 * the SELinux people this will change and it
1305 * will probably be identical to NAME_MAX. For
1306 * now we use that, but this should be updated
1307 * one day when the final limit is known. */
1308 uint8_t buf
[CMSG_SPACE(sizeof(struct ucred
)) +
1309 CMSG_SPACE(sizeof(struct timeval
)) +
1310 CMSG_SPACE(sizeof(int)) + /* fd */
1311 CMSG_SPACE(NAME_MAX
)]; /* selinux label */
1314 union sockaddr_union sa
= {};
1316 struct msghdr msghdr
= {
1319 .msg_control
= &control
,
1320 .msg_controllen
= sizeof(control
),
1322 .msg_namelen
= sizeof(sa
),
1326 assert(fd
== s
->native_fd
|| fd
== s
->syslog_fd
|| fd
== s
->audit_fd
);
1328 if (revents
!= EPOLLIN
) {
1329 log_error("Got invalid event from epoll for datagram fd: %"PRIx32
, revents
);
1333 /* Try to get the right size, if we can. (Not all
1334 * sockets support SIOCINQ, hence we just try, but
1335 * don't rely on it. */
1336 (void) ioctl(fd
, SIOCINQ
, &v
);
1338 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1339 m
= PAGE_ALIGN(MAX3((size_t) v
+ 1,
1341 ALIGN(sizeof(struct nlmsghdr
)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH
)) + 1);
1343 if (!GREEDY_REALLOC(s
->buffer
, s
->buffer_size
, m
))
1346 iovec
.iov_base
= s
->buffer
;
1347 iovec
.iov_len
= s
->buffer_size
- 1; /* Leave room for trailing NUL we add later */
1349 n
= recvmsg(fd
, &msghdr
, MSG_DONTWAIT
|MSG_CMSG_CLOEXEC
);
1351 if (errno
== EINTR
|| errno
== EAGAIN
)
1354 return log_error_errno(errno
, "recvmsg() failed: %m");
1357 CMSG_FOREACH(cmsg
, &msghdr
) {
1359 if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1360 cmsg
->cmsg_type
== SCM_CREDENTIALS
&&
1361 cmsg
->cmsg_len
== CMSG_LEN(sizeof(struct ucred
)))
1362 ucred
= (struct ucred
*) CMSG_DATA(cmsg
);
1363 else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1364 cmsg
->cmsg_type
== SCM_SECURITY
) {
1365 label
= (char*) CMSG_DATA(cmsg
);
1366 label_len
= cmsg
->cmsg_len
- CMSG_LEN(0);
1367 } else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1368 cmsg
->cmsg_type
== SO_TIMESTAMP
&&
1369 cmsg
->cmsg_len
== CMSG_LEN(sizeof(struct timeval
)))
1370 tv
= (struct timeval
*) CMSG_DATA(cmsg
);
1371 else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1372 cmsg
->cmsg_type
== SCM_RIGHTS
) {
1373 fds
= (int*) CMSG_DATA(cmsg
);
1374 n_fds
= (cmsg
->cmsg_len
- CMSG_LEN(0)) / sizeof(int);
1378 /* And a trailing NUL, just in case */
1381 if (fd
== s
->syslog_fd
) {
1382 if (n
> 0 && n_fds
== 0)
1383 server_process_syslog_message(s
, strstrip(s
->buffer
), ucred
, tv
, label
, label_len
);
1385 log_warning("Got file descriptors via syslog socket. Ignoring.");
1387 } else if (fd
== s
->native_fd
) {
1388 if (n
> 0 && n_fds
== 0)
1389 server_process_native_message(s
, s
->buffer
, n
, ucred
, tv
, label
, label_len
);
1390 else if (n
== 0 && n_fds
== 1)
1391 server_process_native_file(s
, fds
[0], ucred
, tv
, label
, label_len
);
1393 log_warning("Got too many file descriptors via native socket. Ignoring.");
1396 assert(fd
== s
->audit_fd
);
1398 if (n
> 0 && n_fds
== 0)
1399 server_process_audit_message(s
, s
->buffer
, n
, ucred
, &sa
, msghdr
.msg_namelen
);
1401 log_warning("Got file descriptors via audit socket. Ignoring.");
1404 close_many(fds
, n_fds
);
1408 static int dispatch_sigusr1(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1409 Server
*s
= userdata
;
1414 log_info("Received request to flush runtime journal from PID " PID_FMT
, si
->ssi_pid
);
1416 (void) server_flush_to_var(s
);
1418 server_vacuum(s
, false);
1420 r
= touch("/run/systemd/journal/flushed");
1422 log_warning_errno(r
, "Failed to touch /run/systemd/journal/flushed, ignoring: %m");
1424 server_space_usage_message(s
, NULL
);
1428 static int dispatch_sigusr2(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1429 Server
*s
= userdata
;
1434 log_info("Received request to rotate journal from PID " PID_FMT
, si
->ssi_pid
);
1436 server_vacuum(s
, true);
1438 if (s
->system_journal
)
1439 patch_min_use(&s
->system_storage
);
1440 if (s
->runtime_journal
)
1441 patch_min_use(&s
->runtime_storage
);
1443 /* Let clients know when the most recent rotation happened. */
1444 r
= write_timestamp_file_atomic("/run/systemd/journal/rotated", now(CLOCK_MONOTONIC
));
1446 log_warning_errno(r
, "Failed to write /run/systemd/journal/rotated, ignoring: %m");
1451 static int dispatch_sigterm(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1452 Server
*s
= userdata
;
1456 log_received_signal(LOG_INFO
, si
);
1458 sd_event_exit(s
->event
, 0);
1462 static int dispatch_sigrtmin1(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1463 Server
*s
= userdata
;
1468 log_debug("Received request to sync from PID " PID_FMT
, si
->ssi_pid
);
1472 /* Let clients know when the most recent sync happened. */
1473 r
= write_timestamp_file_atomic("/run/systemd/journal/synced", now(CLOCK_MONOTONIC
));
1475 log_warning_errno(r
, "Failed to write /run/systemd/journal/synced, ignoring: %m");
1480 static int setup_signals(Server
*s
) {
1485 assert(sigprocmask_many(SIG_SETMASK
, NULL
, SIGINT
, SIGTERM
, SIGUSR1
, SIGUSR2
, SIGRTMIN
+1, -1) >= 0);
1487 r
= sd_event_add_signal(s
->event
, &s
->sigusr1_event_source
, SIGUSR1
, dispatch_sigusr1
, s
);
1491 r
= sd_event_add_signal(s
->event
, &s
->sigusr2_event_source
, SIGUSR2
, dispatch_sigusr2
, s
);
1495 r
= sd_event_add_signal(s
->event
, &s
->sigterm_event_source
, SIGTERM
, dispatch_sigterm
, s
);
1499 /* Let's process SIGTERM late, so that we flush all queued
1500 * messages to disk before we exit */
1501 r
= sd_event_source_set_priority(s
->sigterm_event_source
, SD_EVENT_PRIORITY_NORMAL
+20);
1505 /* When journald is invoked on the terminal (when debugging),
1506 * it's useful if C-c is handled equivalent to SIGTERM. */
1507 r
= sd_event_add_signal(s
->event
, &s
->sigint_event_source
, SIGINT
, dispatch_sigterm
, s
);
1511 r
= sd_event_source_set_priority(s
->sigint_event_source
, SD_EVENT_PRIORITY_NORMAL
+20);
1515 /* SIGRTMIN+1 causes an immediate sync. We process this very
1516 * late, so that everything else queued at this point is
1517 * really written to disk. Clients can watch
1518 * /run/systemd/journal/synced with inotify until its mtime
1519 * changes to see when a sync happened. */
1520 r
= sd_event_add_signal(s
->event
, &s
->sigrtmin1_event_source
, SIGRTMIN
+1, dispatch_sigrtmin1
, s
);
1524 r
= sd_event_source_set_priority(s
->sigrtmin1_event_source
, SD_EVENT_PRIORITY_NORMAL
+15);
1531 static int parse_proc_cmdline_item(const char *key
, const char *value
, void *data
) {
1537 if (streq(key
, "systemd.journald.forward_to_syslog")) {
1538 r
= value
? parse_boolean(value
) : true;
1540 log_warning("Failed to parse forward to syslog switch \"%s\". Ignoring.", value
);
1542 s
->forward_to_syslog
= r
;
1543 } else if (streq(key
, "systemd.journald.forward_to_kmsg")) {
1544 r
= value
? parse_boolean(value
) : true;
1546 log_warning("Failed to parse forward to kmsg switch \"%s\". Ignoring.", value
);
1548 s
->forward_to_kmsg
= r
;
1549 } else if (streq(key
, "systemd.journald.forward_to_console")) {
1550 r
= value
? parse_boolean(value
) : true;
1552 log_warning("Failed to parse forward to console switch \"%s\". Ignoring.", value
);
1554 s
->forward_to_console
= r
;
1555 } else if (streq(key
, "systemd.journald.forward_to_wall")) {
1556 r
= value
? parse_boolean(value
) : true;
1558 log_warning("Failed to parse forward to wall switch \"%s\". Ignoring.", value
);
1560 s
->forward_to_wall
= r
;
1561 } else if (streq(key
, "systemd.journald.max_level_console") && value
) {
1562 r
= log_level_from_string(value
);
1564 log_warning("Failed to parse max level console value \"%s\". Ignoring.", value
);
1566 s
->max_level_console
= r
;
1567 } else if (streq(key
, "systemd.journald.max_level_store") && value
) {
1568 r
= log_level_from_string(value
);
1570 log_warning("Failed to parse max level store value \"%s\". Ignoring.", value
);
1572 s
->max_level_store
= r
;
1573 } else if (streq(key
, "systemd.journald.max_level_syslog") && value
) {
1574 r
= log_level_from_string(value
);
1576 log_warning("Failed to parse max level syslog value \"%s\". Ignoring.", value
);
1578 s
->max_level_syslog
= r
;
1579 } else if (streq(key
, "systemd.journald.max_level_kmsg") && value
) {
1580 r
= log_level_from_string(value
);
1582 log_warning("Failed to parse max level kmsg value \"%s\". Ignoring.", value
);
1584 s
->max_level_kmsg
= r
;
1585 } else if (streq(key
, "systemd.journald.max_level_wall") && value
) {
1586 r
= log_level_from_string(value
);
1588 log_warning("Failed to parse max level wall value \"%s\". Ignoring.", value
);
1590 s
->max_level_wall
= r
;
1591 } else if (startswith(key
, "systemd.journald"))
1592 log_warning("Unknown journald kernel command line option \"%s\". Ignoring.", key
);
1594 /* do not warn about state here, since probably systemd already did */
1598 static int server_parse_config_file(Server
*s
) {
1601 return config_parse_many_nulstr(PKGSYSCONFDIR
"/journald.conf",
1602 CONF_PATHS_NULSTR("systemd/journald.conf.d"),
1604 config_item_perf_lookup
, journald_gperf_lookup
,
1608 static int server_dispatch_sync(sd_event_source
*es
, usec_t t
, void *userdata
) {
1609 Server
*s
= userdata
;
1617 int server_schedule_sync(Server
*s
, int priority
) {
1622 if (priority
<= LOG_CRIT
) {
1623 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1628 if (s
->sync_scheduled
)
1631 if (s
->sync_interval_usec
> 0) {
1634 r
= sd_event_now(s
->event
, CLOCK_MONOTONIC
, &when
);
1638 when
+= s
->sync_interval_usec
;
1640 if (!s
->sync_event_source
) {
1641 r
= sd_event_add_time(
1643 &s
->sync_event_source
,
1646 server_dispatch_sync
, s
);
1650 r
= sd_event_source_set_priority(s
->sync_event_source
, SD_EVENT_PRIORITY_IMPORTANT
);
1652 r
= sd_event_source_set_time(s
->sync_event_source
, when
);
1656 r
= sd_event_source_set_enabled(s
->sync_event_source
, SD_EVENT_ONESHOT
);
1661 s
->sync_scheduled
= true;
1667 static int dispatch_hostname_change(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1668 Server
*s
= userdata
;
1672 server_cache_hostname(s
);
1676 static int server_open_hostname(Server
*s
) {
1681 s
->hostname_fd
= open("/proc/sys/kernel/hostname", O_RDONLY
|O_CLOEXEC
|O_NDELAY
|O_NOCTTY
);
1682 if (s
->hostname_fd
< 0)
1683 return log_error_errno(errno
, "Failed to open /proc/sys/kernel/hostname: %m");
1685 r
= sd_event_add_io(s
->event
, &s
->hostname_event_source
, s
->hostname_fd
, 0, dispatch_hostname_change
, s
);
1687 /* kernels prior to 3.2 don't support polling this file. Ignore
1690 log_warning_errno(r
, "Failed to register hostname fd in event loop, ignoring: %m");
1691 s
->hostname_fd
= safe_close(s
->hostname_fd
);
1695 return log_error_errno(r
, "Failed to register hostname fd in event loop: %m");
1698 r
= sd_event_source_set_priority(s
->hostname_event_source
, SD_EVENT_PRIORITY_IMPORTANT
-10);
1700 return log_error_errno(r
, "Failed to adjust priority of host name event source: %m");
1705 static int dispatch_notify_event(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1706 Server
*s
= userdata
;
1710 assert(s
->notify_event_source
== es
);
1711 assert(s
->notify_fd
== fd
);
1713 /* The $NOTIFY_SOCKET is writable again, now send exactly one
1714 * message on it. Either it's the watchdog event, the initial
1715 * READY=1 event or an stdout stream event. If there's nothing
1716 * to write anymore, turn our event source off. The next time
1717 * there's something to send it will be turned on again. */
1719 if (!s
->sent_notify_ready
) {
1720 static const char p
[] =
1722 "STATUS=Processing requests...";
1725 l
= send(s
->notify_fd
, p
, strlen(p
), MSG_DONTWAIT
);
1727 if (errno
== EAGAIN
)
1730 return log_error_errno(errno
, "Failed to send READY=1 notification message: %m");
1733 s
->sent_notify_ready
= true;
1734 log_debug("Sent READY=1 notification.");
1736 } else if (s
->send_watchdog
) {
1738 static const char p
[] =
1743 l
= send(s
->notify_fd
, p
, strlen(p
), MSG_DONTWAIT
);
1745 if (errno
== EAGAIN
)
1748 return log_error_errno(errno
, "Failed to send WATCHDOG=1 notification message: %m");
1751 s
->send_watchdog
= false;
1752 log_debug("Sent WATCHDOG=1 notification.");
1754 } else if (s
->stdout_streams_notify_queue
)
1755 /* Dispatch one stream notification event */
1756 stdout_stream_send_notify(s
->stdout_streams_notify_queue
);
1758 /* Leave us enabled if there's still more to do. */
1759 if (s
->send_watchdog
|| s
->stdout_streams_notify_queue
)
1762 /* There was nothing to do anymore, let's turn ourselves off. */
1763 r
= sd_event_source_set_enabled(es
, SD_EVENT_OFF
);
1765 return log_error_errno(r
, "Failed to turn off notify event source: %m");
1770 static int dispatch_watchdog(sd_event_source
*es
, uint64_t usec
, void *userdata
) {
1771 Server
*s
= userdata
;
1776 s
->send_watchdog
= true;
1778 r
= sd_event_source_set_enabled(s
->notify_event_source
, SD_EVENT_ON
);
1780 log_warning_errno(r
, "Failed to turn on notify event source: %m");
1782 r
= sd_event_source_set_time(s
->watchdog_event_source
, usec
+ s
->watchdog_usec
/ 2);
1784 return log_error_errno(r
, "Failed to restart watchdog event source: %m");
1786 r
= sd_event_source_set_enabled(s
->watchdog_event_source
, SD_EVENT_ON
);
1788 return log_error_errno(r
, "Failed to enable watchdog event source: %m");
1793 static int server_connect_notify(Server
*s
) {
1794 union sockaddr_union sa
= {
1795 .un
.sun_family
= AF_UNIX
,
1801 assert(s
->notify_fd
< 0);
1802 assert(!s
->notify_event_source
);
1805 So here's the problem: we'd like to send notification
1806 messages to PID 1, but we cannot do that via sd_notify(),
1807 since that's synchronous, and we might end up blocking on
1808 it. Specifically: given that PID 1 might block on
1809 dbus-daemon during IPC, and dbus-daemon is logging to us,
1810 and might hence block on us, we might end up in a deadlock
1811 if we block on sending PID 1 notification messages — by
1812 generating a full blocking circle. To avoid this, let's
1813 create a non-blocking socket, and connect it to the
1814 notification socket, and then wait for POLLOUT before we
1815 send anything. This should efficiently avoid any deadlocks,
1816 as we'll never block on PID 1, hence PID 1 can safely block
1817 on dbus-daemon which can safely block on us again.
1819 Don't think that this issue is real? It is, see:
1820 https://github.com/systemd/systemd/issues/1505
1823 e
= getenv("NOTIFY_SOCKET");
1827 if ((e
[0] != '@' && e
[0] != '/') || e
[1] == 0) {
1828 log_error("NOTIFY_SOCKET set to an invalid value: %s", e
);
1832 if (strlen(e
) > sizeof(sa
.un
.sun_path
)) {
1833 log_error("NOTIFY_SOCKET path too long: %s", e
);
1837 s
->notify_fd
= socket(AF_UNIX
, SOCK_DGRAM
|SOCK_CLOEXEC
|SOCK_NONBLOCK
, 0);
1838 if (s
->notify_fd
< 0)
1839 return log_error_errno(errno
, "Failed to create notify socket: %m");
1841 (void) fd_inc_sndbuf(s
->notify_fd
, NOTIFY_SNDBUF_SIZE
);
1843 strncpy(sa
.un
.sun_path
, e
, sizeof(sa
.un
.sun_path
));
1844 if (sa
.un
.sun_path
[0] == '@')
1845 sa
.un
.sun_path
[0] = 0;
1847 r
= connect(s
->notify_fd
, &sa
.sa
, SOCKADDR_UN_LEN(sa
.un
));
1849 return log_error_errno(errno
, "Failed to connect to notify socket: %m");
1851 r
= sd_event_add_io(s
->event
, &s
->notify_event_source
, s
->notify_fd
, EPOLLOUT
, dispatch_notify_event
, s
);
1853 return log_error_errno(r
, "Failed to watch notification socket: %m");
1855 if (sd_watchdog_enabled(false, &s
->watchdog_usec
) > 0) {
1856 s
->send_watchdog
= true;
1858 r
= sd_event_add_time(s
->event
, &s
->watchdog_event_source
, CLOCK_MONOTONIC
, now(CLOCK_MONOTONIC
) + s
->watchdog_usec
/2, s
->watchdog_usec
/4, dispatch_watchdog
, s
);
1860 return log_error_errno(r
, "Failed to add watchdog time event: %m");
1863 /* This should fire pretty soon, which we'll use to send the
1869 int server_init(Server
*s
) {
1870 _cleanup_fdset_free_ FDSet
*fds
= NULL
;
1877 s
->syslog_fd
= s
->native_fd
= s
->stdout_fd
= s
->dev_kmsg_fd
= s
->audit_fd
= s
->hostname_fd
= s
->notify_fd
= -1;
1881 s
->watchdog_usec
= USEC_INFINITY
;
1883 s
->sync_interval_usec
= DEFAULT_SYNC_INTERVAL_USEC
;
1884 s
->sync_scheduled
= false;
1886 s
->rate_limit_interval
= DEFAULT_RATE_LIMIT_INTERVAL
;
1887 s
->rate_limit_burst
= DEFAULT_RATE_LIMIT_BURST
;
1889 s
->forward_to_wall
= true;
1891 s
->max_file_usec
= DEFAULT_MAX_FILE_USEC
;
1893 s
->max_level_store
= LOG_DEBUG
;
1894 s
->max_level_syslog
= LOG_DEBUG
;
1895 s
->max_level_kmsg
= LOG_NOTICE
;
1896 s
->max_level_console
= LOG_INFO
;
1897 s
->max_level_wall
= LOG_EMERG
;
1899 journal_reset_metrics(&s
->system_storage
.metrics
);
1900 journal_reset_metrics(&s
->runtime_storage
.metrics
);
1902 server_parse_config_file(s
);
1903 parse_proc_cmdline(parse_proc_cmdline_item
, s
);
1905 if (!!s
->rate_limit_interval
^ !!s
->rate_limit_burst
) {
1906 log_debug("Setting both rate limit interval and burst from "USEC_FMT
",%u to 0,0",
1907 s
->rate_limit_interval
, s
->rate_limit_burst
);
1908 s
->rate_limit_interval
= s
->rate_limit_burst
= 0;
1911 (void) mkdir_p("/run/systemd/journal", 0755);
1913 s
->user_journals
= ordered_hashmap_new(NULL
);
1914 if (!s
->user_journals
)
1917 s
->mmap
= mmap_cache_new();
1921 s
->deferred_closes
= set_new(NULL
);
1922 if (!s
->deferred_closes
)
1925 r
= sd_event_default(&s
->event
);
1927 return log_error_errno(r
, "Failed to create event loop: %m");
1929 n
= sd_listen_fds(true);
1931 return log_error_errno(n
, "Failed to read listening file descriptors from environment: %m");
1933 for (fd
= SD_LISTEN_FDS_START
; fd
< SD_LISTEN_FDS_START
+ n
; fd
++) {
1935 if (sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/run/systemd/journal/socket", 0) > 0) {
1937 if (s
->native_fd
>= 0) {
1938 log_error("Too many native sockets passed.");
1944 } else if (sd_is_socket_unix(fd
, SOCK_STREAM
, 1, "/run/systemd/journal/stdout", 0) > 0) {
1946 if (s
->stdout_fd
>= 0) {
1947 log_error("Too many stdout sockets passed.");
1953 } else if (sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/dev/log", 0) > 0 ||
1954 sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/run/systemd/journal/dev-log", 0) > 0) {
1956 if (s
->syslog_fd
>= 0) {
1957 log_error("Too many /dev/log sockets passed.");
1963 } else if (sd_is_socket(fd
, AF_NETLINK
, SOCK_RAW
, -1) > 0) {
1965 if (s
->audit_fd
>= 0) {
1966 log_error("Too many audit sockets passed.");
1980 r
= fdset_put(fds
, fd
);
1986 /* Try to restore streams, but don't bother if this fails */
1987 (void) server_restore_streams(s
, fds
);
1989 if (fdset_size(fds
) > 0) {
1990 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds
));
1991 fds
= fdset_free(fds
);
1994 no_sockets
= s
->native_fd
< 0 && s
->stdout_fd
< 0 && s
->syslog_fd
< 0 && s
->audit_fd
< 0;
1996 /* always open stdout, syslog, native, and kmsg sockets */
1998 /* systemd-journald.socket: /run/systemd/journal/stdout */
1999 r
= server_open_stdout_socket(s
);
2003 /* systemd-journald-dev-log.socket: /run/systemd/journal/dev-log */
2004 r
= server_open_syslog_socket(s
);
2008 /* systemd-journald.socket: /run/systemd/journal/socket */
2009 r
= server_open_native_socket(s
);
2014 r
= server_open_dev_kmsg(s
);
2018 /* Unless we got *some* sockets and not audit, open audit socket */
2019 if (s
->audit_fd
>= 0 || no_sockets
) {
2020 r
= server_open_audit(s
);
2025 r
= server_open_kernel_seqnum(s
);
2029 r
= server_open_hostname(s
);
2033 r
= setup_signals(s
);
2037 s
->udev
= udev_new();
2041 s
->rate_limit
= journal_rate_limit_new(s
->rate_limit_interval
, s
->rate_limit_burst
);
2045 r
= cg_get_root_path(&s
->cgroup_root
);
2049 server_cache_hostname(s
);
2050 server_cache_boot_id(s
);
2051 server_cache_machine_id(s
);
2053 s
->runtime_storage
.name
= "Runtime journal";
2054 s
->system_storage
.name
= "System journal";
2056 s
->runtime_storage
.path
= strjoin("/run/log/journal/", SERVER_MACHINE_ID(s
), NULL
);
2057 s
->system_storage
.path
= strjoin("/var/log/journal/", SERVER_MACHINE_ID(s
), NULL
);
2058 if (!s
->runtime_storage
.path
|| !s
->system_storage
.path
)
2061 (void) server_connect_notify(s
);
2063 return system_journal_open(s
, false);
2066 void server_maybe_append_tags(Server
*s
) {
2072 n
= now(CLOCK_REALTIME
);
2074 if (s
->system_journal
)
2075 journal_file_maybe_append_tag(s
->system_journal
, n
);
2077 ORDERED_HASHMAP_FOREACH(f
, s
->user_journals
, i
)
2078 journal_file_maybe_append_tag(f
, n
);
2082 void server_done(Server
*s
) {
2086 if (s
->deferred_closes
) {
2087 journal_file_close_set(s
->deferred_closes
);
2088 set_free(s
->deferred_closes
);
2091 while (s
->stdout_streams
)
2092 stdout_stream_free(s
->stdout_streams
);
2094 if (s
->system_journal
)
2095 (void) journal_file_close(s
->system_journal
);
2097 if (s
->runtime_journal
)
2098 (void) journal_file_close(s
->runtime_journal
);
2100 while ((f
= ordered_hashmap_steal_first(s
->user_journals
)))
2101 (void) journal_file_close(f
);
2103 ordered_hashmap_free(s
->user_journals
);
2105 sd_event_source_unref(s
->syslog_event_source
);
2106 sd_event_source_unref(s
->native_event_source
);
2107 sd_event_source_unref(s
->stdout_event_source
);
2108 sd_event_source_unref(s
->dev_kmsg_event_source
);
2109 sd_event_source_unref(s
->audit_event_source
);
2110 sd_event_source_unref(s
->sync_event_source
);
2111 sd_event_source_unref(s
->sigusr1_event_source
);
2112 sd_event_source_unref(s
->sigusr2_event_source
);
2113 sd_event_source_unref(s
->sigterm_event_source
);
2114 sd_event_source_unref(s
->sigint_event_source
);
2115 sd_event_source_unref(s
->sigrtmin1_event_source
);
2116 sd_event_source_unref(s
->hostname_event_source
);
2117 sd_event_source_unref(s
->notify_event_source
);
2118 sd_event_source_unref(s
->watchdog_event_source
);
2119 sd_event_unref(s
->event
);
2121 safe_close(s
->syslog_fd
);
2122 safe_close(s
->native_fd
);
2123 safe_close(s
->stdout_fd
);
2124 safe_close(s
->dev_kmsg_fd
);
2125 safe_close(s
->audit_fd
);
2126 safe_close(s
->hostname_fd
);
2127 safe_close(s
->notify_fd
);
2130 journal_rate_limit_free(s
->rate_limit
);
2132 if (s
->kernel_seqnum
)
2133 munmap(s
->kernel_seqnum
, sizeof(uint64_t));
2137 free(s
->cgroup_root
);
2138 free(s
->hostname_field
);
2141 mmap_cache_unref(s
->mmap
);
2143 udev_unref(s
->udev
);
2146 static const char* const storage_table
[_STORAGE_MAX
] = {
2147 [STORAGE_AUTO
] = "auto",
2148 [STORAGE_VOLATILE
] = "volatile",
2149 [STORAGE_PERSISTENT
] = "persistent",
2150 [STORAGE_NONE
] = "none"
2153 DEFINE_STRING_TABLE_LOOKUP(storage
, Storage
);
2154 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage
, storage
, Storage
, "Failed to parse storage setting");
2156 static const char* const split_mode_table
[_SPLIT_MAX
] = {
2157 [SPLIT_LOGIN
] = "login",
2158 [SPLIT_UID
] = "uid",
2159 [SPLIT_NONE
] = "none",
2162 DEFINE_STRING_TABLE_LOOKUP(split_mode
, SplitMode
);
2163 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode
, split_mode
, SplitMode
, "Failed to parse split mode setting");