2 This file is part of systemd.
4 Copyright 2011 Lennart Poettering
6 systemd is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public License
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
21 #include <selinux/selinux.h>
23 #include <sys/ioctl.h>
25 #include <sys/signalfd.h>
26 #include <sys/statvfs.h>
27 #include <linux/sockios.h>
30 #include "sd-daemon.h"
31 #include "sd-journal.h"
32 #include "sd-messages.h"
35 #include "alloc-util.h"
36 #include "audit-util.h"
37 #include "cgroup-util.h"
38 #include "conf-parser.h"
39 #include "dirent-util.h"
40 #include "extract-word.h"
43 #include "format-util.h"
46 #include "hostname-util.h"
47 #include "id128-util.h"
49 #include "journal-authenticate.h"
50 #include "journal-file.h"
51 #include "journal-internal.h"
52 #include "journal-vacuum.h"
53 #include "journald-audit.h"
54 #include "journald-kmsg.h"
55 #include "journald-native.h"
56 #include "journald-rate-limit.h"
57 #include "journald-server.h"
58 #include "journald-stream.h"
59 #include "journald-syslog.h"
63 #include "parse-util.h"
64 #include "proc-cmdline.h"
65 #include "process-util.h"
67 #include "selinux-util.h"
68 #include "signal-util.h"
69 #include "socket-util.h"
70 #include "stdio-util.h"
71 #include "string-table.h"
72 #include "string-util.h"
73 #include "user-util.h"
74 #include "syslog-util.h"
76 #define USER_JOURNALS_MAX 1024
78 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
79 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
80 #define DEFAULT_RATE_LIMIT_BURST 1000
81 #define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
83 #define RECHECK_SPACE_USEC (30*USEC_PER_SEC)
85 #define NOTIFY_SNDBUF_SIZE (8*1024*1024)
87 /* The period to insert between posting changes for coalescing */
88 #define POST_CHANGE_TIMER_INTERVAL_USEC (250*USEC_PER_MSEC)
90 static int determine_path_usage(Server
*s
, const char *path
, uint64_t *ret_used
, uint64_t *ret_free
) {
91 _cleanup_closedir_
DIR *d
= NULL
;
100 return log_full_errno(errno
== ENOENT
? LOG_DEBUG
: LOG_ERR
,
101 errno
, "Failed to open %s: %m", path
);
103 if (fstatvfs(dirfd(d
), &ss
) < 0)
104 return log_error_errno(errno
, "Failed to fstatvfs(%s): %m", path
);
106 *ret_free
= ss
.f_bsize
* ss
.f_bavail
;
108 FOREACH_DIRENT_ALL(de
, d
, break) {
111 if (!endswith(de
->d_name
, ".journal") &&
112 !endswith(de
->d_name
, ".journal~"))
115 if (fstatat(dirfd(d
), de
->d_name
, &st
, AT_SYMLINK_NOFOLLOW
) < 0) {
116 log_debug_errno(errno
, "Failed to stat %s/%s, ignoring: %m", path
, de
->d_name
);
120 if (!S_ISREG(st
.st_mode
))
123 *ret_used
+= (uint64_t) st
.st_blocks
* 512UL;
129 static void cache_space_invalidate(JournalStorageSpace
*space
) {
130 memset(space
, 0, sizeof(*space
));
133 static int cache_space_refresh(Server
*s
, JournalStorage
*storage
) {
134 JournalStorageSpace
*space
;
135 JournalMetrics
*metrics
;
136 uint64_t vfs_used
, vfs_avail
, avail
;
142 metrics
= &storage
->metrics
;
143 space
= &storage
->space
;
145 ts
= now(CLOCK_MONOTONIC
);
147 if (space
->timestamp
!= 0 && space
->timestamp
+ RECHECK_SPACE_USEC
> ts
)
150 r
= determine_path_usage(s
, storage
->path
, &vfs_used
, &vfs_avail
);
154 space
->vfs_used
= vfs_used
;
155 space
->vfs_available
= vfs_avail
;
157 avail
= LESS_BY(vfs_avail
, metrics
->keep_free
);
159 space
->limit
= MIN(MAX(vfs_used
+ avail
, metrics
->min_use
), metrics
->max_use
);
160 space
->available
= LESS_BY(space
->limit
, vfs_used
);
161 space
->timestamp
= ts
;
165 static void patch_min_use(JournalStorage
*storage
) {
168 /* Let's bump the min_use limit to the current usage on disk. We do
169 * this when starting up and first opening the journal files. This way
170 * sudden spikes in disk usage will not cause journald to vacuum files
171 * without bounds. Note that this means that only a restart of journald
172 * will make it reset this value. */
174 storage
->metrics
.min_use
= MAX(storage
->metrics
.min_use
, storage
->space
.vfs_used
);
178 static int determine_space(Server
*s
, uint64_t *available
, uint64_t *limit
) {
184 js
= s
->system_journal
? &s
->system_storage
: &s
->runtime_storage
;
186 r
= cache_space_refresh(s
, js
);
189 *available
= js
->space
.available
;
191 *limit
= js
->space
.limit
;
196 void server_space_usage_message(Server
*s
, JournalStorage
*storage
) {
197 char fb1
[FORMAT_BYTES_MAX
], fb2
[FORMAT_BYTES_MAX
], fb3
[FORMAT_BYTES_MAX
],
198 fb4
[FORMAT_BYTES_MAX
], fb5
[FORMAT_BYTES_MAX
], fb6
[FORMAT_BYTES_MAX
];
199 JournalMetrics
*metrics
;
204 storage
= s
->system_journal
? &s
->system_storage
: &s
->runtime_storage
;
206 if (cache_space_refresh(s
, storage
) < 0)
209 metrics
= &storage
->metrics
;
210 format_bytes(fb1
, sizeof(fb1
), storage
->space
.vfs_used
);
211 format_bytes(fb2
, sizeof(fb2
), metrics
->max_use
);
212 format_bytes(fb3
, sizeof(fb3
), metrics
->keep_free
);
213 format_bytes(fb4
, sizeof(fb4
), storage
->space
.vfs_available
);
214 format_bytes(fb5
, sizeof(fb5
), storage
->space
.limit
);
215 format_bytes(fb6
, sizeof(fb6
), storage
->space
.available
);
217 server_driver_message(s
, "MESSAGE_ID=" SD_MESSAGE_JOURNAL_USAGE_STR
,
218 LOG_MESSAGE("%s (%s) is %s, max %s, %s free.",
219 storage
->name
, storage
->path
, fb1
, fb5
, fb6
),
220 "JOURNAL_NAME=%s", storage
->name
,
221 "JOURNAL_PATH=%s", storage
->path
,
222 "CURRENT_USE=%"PRIu64
, storage
->space
.vfs_used
,
223 "CURRENT_USE_PRETTY=%s", fb1
,
224 "MAX_USE=%"PRIu64
, metrics
->max_use
,
225 "MAX_USE_PRETTY=%s", fb2
,
226 "DISK_KEEP_FREE=%"PRIu64
, metrics
->keep_free
,
227 "DISK_KEEP_FREE_PRETTY=%s", fb3
,
228 "DISK_AVAILABLE=%"PRIu64
, storage
->space
.vfs_available
,
229 "DISK_AVAILABLE_PRETTY=%s", fb4
,
230 "LIMIT=%"PRIu64
, storage
->space
.limit
,
231 "LIMIT_PRETTY=%s", fb5
,
232 "AVAILABLE=%"PRIu64
, storage
->space
.available
,
233 "AVAILABLE_PRETTY=%s", fb6
,
237 static void server_add_acls(JournalFile
*f
, uid_t uid
) {
244 if (uid
<= SYSTEM_UID_MAX
)
247 r
= add_acls_for_user(f
->fd
, uid
);
249 log_warning_errno(r
, "Failed to set ACL on %s, ignoring: %m", f
->path
);
253 static int open_journal(
259 JournalMetrics
*metrics
,
269 r
= journal_file_open_reliably(fname
, flags
, 0640, s
->compress
, seal
, metrics
, s
->mmap
, s
->deferred_closes
, NULL
, &f
);
271 r
= journal_file_open(-1, fname
, flags
, 0640, s
->compress
, seal
, metrics
, s
->mmap
, s
->deferred_closes
, NULL
, &f
);
275 r
= journal_file_enable_post_change_timer(f
, s
->event
, POST_CHANGE_TIMER_INTERVAL_USEC
);
277 (void) journal_file_close(f
);
285 static bool flushed_flag_is_set(void) {
286 return access("/run/systemd/journal/flushed", F_OK
) >= 0;
289 static int system_journal_open(Server
*s
, bool flush_requested
) {
293 if (!s
->system_journal
&&
294 IN_SET(s
->storage
, STORAGE_PERSISTENT
, STORAGE_AUTO
) &&
295 (flush_requested
|| flushed_flag_is_set())) {
297 /* If in auto mode: first try to create the machine
298 * path, but not the prefix.
300 * If in persistent mode: create /var/log/journal and
301 * the machine path */
303 if (s
->storage
== STORAGE_PERSISTENT
)
304 (void) mkdir_p("/var/log/journal/", 0755);
306 (void) mkdir(s
->system_storage
.path
, 0755);
308 fn
= strjoina(s
->system_storage
.path
, "/system.journal");
309 r
= open_journal(s
, true, fn
, O_RDWR
|O_CREAT
, s
->seal
, &s
->system_storage
.metrics
, &s
->system_journal
);
311 server_add_acls(s
->system_journal
, 0);
312 (void) cache_space_refresh(s
, &s
->system_storage
);
313 patch_min_use(&s
->system_storage
);
315 if (r
!= -ENOENT
&& r
!= -EROFS
)
316 log_warning_errno(r
, "Failed to open system journal: %m");
321 /* If the runtime journal is open, and we're post-flush, we're
322 * recovering from a failed system journal rotate (ENOSPC)
323 * for which the runtime journal was reopened.
325 * Perform an implicit flush to var, leaving the runtime
326 * journal closed, now that the system journal is back.
328 if (!flush_requested
)
329 (void) server_flush_to_var(s
, true);
332 if (!s
->runtime_journal
&&
333 (s
->storage
!= STORAGE_NONE
)) {
335 fn
= strjoina(s
->runtime_storage
.path
, "/system.journal");
337 if (s
->system_journal
) {
339 /* Try to open the runtime journal, but only
340 * if it already exists, so that we can flush
341 * it into the system journal */
343 r
= open_journal(s
, false, fn
, O_RDWR
, false, &s
->runtime_storage
.metrics
, &s
->runtime_journal
);
346 log_warning_errno(r
, "Failed to open runtime journal: %m");
353 /* OK, we really need the runtime journal, so create
354 * it if necessary. */
356 (void) mkdir("/run/log", 0755);
357 (void) mkdir("/run/log/journal", 0755);
358 (void) mkdir_parents(fn
, 0750);
360 r
= open_journal(s
, true, fn
, O_RDWR
|O_CREAT
, false, &s
->runtime_storage
.metrics
, &s
->runtime_journal
);
362 return log_error_errno(r
, "Failed to open runtime journal: %m");
365 if (s
->runtime_journal
) {
366 server_add_acls(s
->runtime_journal
, 0);
367 (void) cache_space_refresh(s
, &s
->runtime_storage
);
368 patch_min_use(&s
->runtime_storage
);
375 static JournalFile
* find_journal(Server
*s
, uid_t uid
) {
376 _cleanup_free_
char *p
= NULL
;
383 /* A rotate that fails to create the new journal (ENOSPC) leaves the
384 * rotated journal as NULL. Unless we revisit opening, even after
385 * space is made available we'll continue to return NULL indefinitely.
387 * system_journal_open() is a noop if the journals are already open, so
388 * we can just call it here to recover from failed rotates (or anything
389 * else that's left the journals as NULL).
391 * Fixes https://github.com/systemd/systemd/issues/3968 */
392 (void) system_journal_open(s
, false);
394 /* We split up user logs only on /var, not on /run. If the
395 * runtime file is open, we write to it exclusively, in order
396 * to guarantee proper order as soon as we flush /run to
397 * /var and close the runtime file. */
399 if (s
->runtime_journal
)
400 return s
->runtime_journal
;
402 if (uid
<= SYSTEM_UID_MAX
|| uid_is_dynamic(uid
))
403 return s
->system_journal
;
405 r
= sd_id128_get_machine(&machine
);
407 return s
->system_journal
;
409 f
= ordered_hashmap_get(s
->user_journals
, UID_TO_PTR(uid
));
413 if (asprintf(&p
, "/var/log/journal/" SD_ID128_FORMAT_STR
"/user-"UID_FMT
".journal",
414 SD_ID128_FORMAT_VAL(machine
), uid
) < 0)
415 return s
->system_journal
;
417 while (ordered_hashmap_size(s
->user_journals
) >= USER_JOURNALS_MAX
) {
418 /* Too many open? Then let's close one */
419 f
= ordered_hashmap_steal_first(s
->user_journals
);
421 (void) journal_file_close(f
);
424 r
= open_journal(s
, true, p
, O_RDWR
|O_CREAT
, s
->seal
, &s
->system_storage
.metrics
, &f
);
426 return s
->system_journal
;
428 server_add_acls(f
, uid
);
430 r
= ordered_hashmap_put(s
->user_journals
, UID_TO_PTR(uid
), f
);
432 (void) journal_file_close(f
);
433 return s
->system_journal
;
439 static int do_rotate(
452 r
= journal_file_rotate(f
, s
->compress
, seal
, s
->deferred_closes
);
455 log_error_errno(r
, "Failed to rotate %s: %m", (*f
)->path
);
457 log_error_errno(r
, "Failed to create new %s journal: %m", name
);
459 server_add_acls(*f
, uid
);
464 void server_rotate(Server
*s
) {
470 log_debug("Rotating...");
472 (void) do_rotate(s
, &s
->runtime_journal
, "runtime", false, 0);
473 (void) do_rotate(s
, &s
->system_journal
, "system", s
->seal
, 0);
475 ORDERED_HASHMAP_FOREACH_KEY(f
, k
, s
->user_journals
, i
) {
476 r
= do_rotate(s
, &f
, "user", s
->seal
, PTR_TO_UID(k
));
478 ordered_hashmap_replace(s
->user_journals
, k
, f
);
480 /* Old file has been closed and deallocated */
481 ordered_hashmap_remove(s
->user_journals
, k
);
484 /* Perform any deferred closes which aren't still offlining. */
485 SET_FOREACH(f
, s
->deferred_closes
, i
)
486 if (!journal_file_is_offlining(f
)) {
487 (void) set_remove(s
->deferred_closes
, f
);
488 (void) journal_file_close(f
);
492 void server_sync(Server
*s
) {
497 if (s
->system_journal
) {
498 r
= journal_file_set_offline(s
->system_journal
, false);
500 log_warning_errno(r
, "Failed to sync system journal, ignoring: %m");
503 ORDERED_HASHMAP_FOREACH(f
, s
->user_journals
, i
) {
504 r
= journal_file_set_offline(f
, false);
506 log_warning_errno(r
, "Failed to sync user journal, ignoring: %m");
509 if (s
->sync_event_source
) {
510 r
= sd_event_source_set_enabled(s
->sync_event_source
, SD_EVENT_OFF
);
512 log_error_errno(r
, "Failed to disable sync timer source: %m");
515 s
->sync_scheduled
= false;
518 static void do_vacuum(Server
*s
, JournalStorage
*storage
, bool verbose
) {
525 (void) cache_space_refresh(s
, storage
);
528 server_space_usage_message(s
, storage
);
530 r
= journal_directory_vacuum(storage
->path
, storage
->space
.limit
,
531 storage
->metrics
.n_max_files
, s
->max_retention_usec
,
532 &s
->oldest_file_usec
, verbose
);
533 if (r
< 0 && r
!= -ENOENT
)
534 log_warning_errno(r
, "Failed to vacuum %s, ignoring: %m", storage
->path
);
536 cache_space_invalidate(&storage
->space
);
539 int server_vacuum(Server
*s
, bool verbose
) {
542 log_debug("Vacuuming...");
544 s
->oldest_file_usec
= 0;
546 if (s
->system_journal
)
547 do_vacuum(s
, &s
->system_storage
, verbose
);
548 if (s
->runtime_journal
)
549 do_vacuum(s
, &s
->runtime_storage
, verbose
);
554 static void server_cache_machine_id(Server
*s
) {
560 r
= sd_id128_get_machine(&id
);
564 sd_id128_to_string(id
, stpcpy(s
->machine_id_field
, "_MACHINE_ID="));
567 static void server_cache_boot_id(Server
*s
) {
573 r
= sd_id128_get_boot(&id
);
577 sd_id128_to_string(id
, stpcpy(s
->boot_id_field
, "_BOOT_ID="));
580 static void server_cache_hostname(Server
*s
) {
581 _cleanup_free_
char *t
= NULL
;
586 t
= gethostname_malloc();
590 x
= strappend("_HOSTNAME=", t
);
594 free(s
->hostname_field
);
595 s
->hostname_field
= x
;
598 static bool shall_try_append_again(JournalFile
*f
, int r
) {
601 case -E2BIG
: /* Hit configured limit */
602 case -EFBIG
: /* Hit fs limit */
603 case -EDQUOT
: /* Quota limit hit */
604 case -ENOSPC
: /* Disk full */
605 log_debug("%s: Allocation limit reached, rotating.", f
->path
);
608 case -EIO
: /* I/O error of some kind (mmap) */
609 log_warning("%s: IO error, rotating.", f
->path
);
612 case -EHOSTDOWN
: /* Other machine */
613 log_info("%s: Journal file from other machine, rotating.", f
->path
);
616 case -EBUSY
: /* Unclean shutdown */
617 log_info("%s: Unclean shutdown, rotating.", f
->path
);
620 case -EPROTONOSUPPORT
: /* Unsupported feature */
621 log_info("%s: Unsupported feature, rotating.", f
->path
);
624 case -EBADMSG
: /* Corrupted */
625 case -ENODATA
: /* Truncated */
626 case -ESHUTDOWN
: /* Already archived */
627 log_warning("%s: Journal file corrupted, rotating.", f
->path
);
630 case -EIDRM
: /* Journal file has been deleted */
631 log_warning("%s: Journal file has been deleted, rotating.", f
->path
);
634 case -ETXTBSY
: /* Journal file is from the future */
635 log_warning("%s: Journal file is from the future, rotating.", f
->path
);
643 static void write_to_journal(Server
*s
, uid_t uid
, struct iovec
*iovec
, unsigned n
, int priority
) {
644 bool vacuumed
= false, rotate
= false;
645 struct dual_timestamp ts
;
653 /* Get the closest, linearized time we have for this log event from the event loop. (Note that we do not use
654 * the source time, and not even the time the event was originally seen, but instead simply the time we started
655 * processing it, as we want strictly linear ordering in what we write out.) */
656 assert_se(sd_event_now(s
->event
, CLOCK_REALTIME
, &ts
.realtime
) >= 0);
657 assert_se(sd_event_now(s
->event
, CLOCK_MONOTONIC
, &ts
.monotonic
) >= 0);
659 if (ts
.realtime
< s
->last_realtime_clock
) {
660 /* When the time jumps backwards, let's immediately rotate. Of course, this should not happen during
661 * regular operation. However, when it does happen, then we should make sure that we start fresh files
662 * to ensure that the entries in the journal files are strictly ordered by time, in order to ensure
663 * bisection works correctly. */
665 log_debug("Time jumped backwards, rotating.");
669 f
= find_journal(s
, uid
);
673 if (journal_file_rotate_suggested(f
, s
->max_file_usec
)) {
674 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f
->path
);
681 server_vacuum(s
, false);
684 f
= find_journal(s
, uid
);
689 s
->last_realtime_clock
= ts
.realtime
;
691 r
= journal_file_append_entry(f
, &ts
, iovec
, n
, &s
->seqnum
, NULL
, NULL
);
693 server_schedule_sync(s
, priority
);
697 if (vacuumed
|| !shall_try_append_again(f
, r
)) {
698 log_error_errno(r
, "Failed to write entry (%d items, %zu bytes), ignoring: %m", n
, IOVEC_TOTAL_SIZE(iovec
, n
));
703 server_vacuum(s
, false);
705 f
= find_journal(s
, uid
);
709 log_debug("Retrying write.");
710 r
= journal_file_append_entry(f
, &ts
, iovec
, n
, &s
->seqnum
, NULL
, NULL
);
712 log_error_errno(r
, "Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %m", n
, IOVEC_TOTAL_SIZE(iovec
, n
));
714 server_schedule_sync(s
, priority
);
717 static int get_invocation_id(const char *cgroup_root
, const char *slice
, const char *unit
, char **ret
) {
718 _cleanup_free_
char *escaped
= NULL
, *slice_path
= NULL
, *p
= NULL
;
719 char *copy
, ids
[SD_ID128_STRING_MAX
];
722 /* Read the invocation ID of a unit off a unit. It's stored in the "trusted.invocation_id" extended attribute
723 * on the cgroup path. */
725 r
= cg_slice_to_path(slice
, &slice_path
);
729 escaped
= cg_escape(unit
);
733 p
= strjoin(cgroup_root
, "/", slice_path
, "/", escaped
);
737 r
= cg_get_xattr(SYSTEMD_CGROUP_CONTROLLER
, p
, "trusted.invocation_id", ids
, 32);
744 if (!id128_is_valid(ids
))
755 static void dispatch_message_real(
757 struct iovec
*iovec
, unsigned n
, unsigned m
,
758 const struct ucred
*ucred
,
759 const struct timeval
*tv
,
760 const char *label
, size_t label_len
,
766 char pid
[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t
)],
767 uid
[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t
)],
768 gid
[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t
)],
769 owner_uid
[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t
)],
770 source_time
[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t
)],
771 o_uid
[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t
)],
772 o_gid
[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t
)],
773 o_owner_uid
[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t
)];
779 uid_t realuid
= 0, owner
= 0, journal_uid
;
780 bool owner_valid
= false;
782 char audit_session
[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
783 audit_loginuid
[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t
)],
784 o_audit_session
[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
785 o_audit_loginuid
[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t
)];
794 assert(n
+ N_IOVEC_META_FIELDS
+ (object_pid
> 0 ? N_IOVEC_OBJECT_FIELDS
: 0) <= m
);
797 realuid
= ucred
->uid
;
799 sprintf(pid
, "_PID="PID_FMT
, ucred
->pid
);
800 IOVEC_SET_STRING(iovec
[n
++], pid
);
802 sprintf(uid
, "_UID="UID_FMT
, ucred
->uid
);
803 IOVEC_SET_STRING(iovec
[n
++], uid
);
805 sprintf(gid
, "_GID="GID_FMT
, ucred
->gid
);
806 IOVEC_SET_STRING(iovec
[n
++], gid
);
808 r
= get_process_comm(ucred
->pid
, &t
);
810 x
= strjoina("_COMM=", t
);
812 IOVEC_SET_STRING(iovec
[n
++], x
);
815 r
= get_process_exe(ucred
->pid
, &t
);
817 x
= strjoina("_EXE=", t
);
819 IOVEC_SET_STRING(iovec
[n
++], x
);
822 r
= get_process_cmdline(ucred
->pid
, 0, false, &t
);
824 x
= strjoina("_CMDLINE=", t
);
826 IOVEC_SET_STRING(iovec
[n
++], x
);
829 r
= get_process_capeff(ucred
->pid
, &t
);
831 x
= strjoina("_CAP_EFFECTIVE=", t
);
833 IOVEC_SET_STRING(iovec
[n
++], x
);
837 r
= audit_session_from_pid(ucred
->pid
, &audit
);
839 sprintf(audit_session
, "_AUDIT_SESSION=%"PRIu32
, audit
);
840 IOVEC_SET_STRING(iovec
[n
++], audit_session
);
843 r
= audit_loginuid_from_pid(ucred
->pid
, &loginuid
);
845 sprintf(audit_loginuid
, "_AUDIT_LOGINUID="UID_FMT
, loginuid
);
846 IOVEC_SET_STRING(iovec
[n
++], audit_loginuid
);
854 r
= cg_pid_get_path_shifted(ucred
->pid
, s
->cgroup_root
, &c
);
857 _cleanup_free_
char *raw_unit
= NULL
, *raw_slice
= NULL
;
858 char *session
= NULL
;
860 x
= strjoina("_SYSTEMD_CGROUP=", c
);
861 IOVEC_SET_STRING(iovec
[n
++], x
);
863 r
= cg_path_get_session(c
, &t
);
865 session
= strjoina("_SYSTEMD_SESSION=", t
);
867 IOVEC_SET_STRING(iovec
[n
++], session
);
870 if (cg_path_get_owner_uid(c
, &owner
) >= 0) {
873 sprintf(owner_uid
, "_SYSTEMD_OWNER_UID="UID_FMT
, owner
);
874 IOVEC_SET_STRING(iovec
[n
++], owner_uid
);
877 if (cg_path_get_unit(c
, &raw_unit
) >= 0) {
878 x
= strjoina("_SYSTEMD_UNIT=", raw_unit
);
879 IOVEC_SET_STRING(iovec
[n
++], x
);
880 } else if (unit_id
&& !session
) {
881 x
= strjoina("_SYSTEMD_UNIT=", unit_id
);
882 IOVEC_SET_STRING(iovec
[n
++], x
);
885 if (cg_path_get_user_unit(c
, &t
) >= 0) {
886 x
= strjoina("_SYSTEMD_USER_UNIT=", t
);
888 IOVEC_SET_STRING(iovec
[n
++], x
);
889 } else if (unit_id
&& session
) {
890 x
= strjoina("_SYSTEMD_USER_UNIT=", unit_id
);
891 IOVEC_SET_STRING(iovec
[n
++], x
);
894 if (cg_path_get_slice(c
, &raw_slice
) >= 0) {
895 x
= strjoina("_SYSTEMD_SLICE=", raw_slice
);
896 IOVEC_SET_STRING(iovec
[n
++], x
);
899 if (cg_path_get_user_slice(c
, &t
) >= 0) {
900 x
= strjoina("_SYSTEMD_USER_SLICE=", t
);
902 IOVEC_SET_STRING(iovec
[n
++], x
);
905 if (raw_slice
&& raw_unit
) {
906 if (get_invocation_id(s
->cgroup_root
, raw_slice
, raw_unit
, &t
) >= 0) {
907 x
= strjoina("_SYSTEMD_INVOCATION_ID=", t
);
909 IOVEC_SET_STRING(iovec
[n
++], x
);
915 } else if (unit_id
) {
916 x
= strjoina("_SYSTEMD_UNIT=", unit_id
);
917 IOVEC_SET_STRING(iovec
[n
++], x
);
921 if (mac_selinux_have()) {
923 x
= alloca(strlen("_SELINUX_CONTEXT=") + label_len
+ 1);
925 *((char*) mempcpy(stpcpy(x
, "_SELINUX_CONTEXT="), label
, label_len
)) = 0;
926 IOVEC_SET_STRING(iovec
[n
++], x
);
930 if (getpidcon(ucred
->pid
, &con
) >= 0) {
931 x
= strjoina("_SELINUX_CONTEXT=", con
);
934 IOVEC_SET_STRING(iovec
[n
++], x
);
943 r
= get_process_uid(object_pid
, &object_uid
);
945 sprintf(o_uid
, "OBJECT_UID="UID_FMT
, object_uid
);
946 IOVEC_SET_STRING(iovec
[n
++], o_uid
);
949 r
= get_process_gid(object_pid
, &object_gid
);
951 sprintf(o_gid
, "OBJECT_GID="GID_FMT
, object_gid
);
952 IOVEC_SET_STRING(iovec
[n
++], o_gid
);
955 r
= get_process_comm(object_pid
, &t
);
957 x
= strjoina("OBJECT_COMM=", t
);
959 IOVEC_SET_STRING(iovec
[n
++], x
);
962 r
= get_process_exe(object_pid
, &t
);
964 x
= strjoina("OBJECT_EXE=", t
);
966 IOVEC_SET_STRING(iovec
[n
++], x
);
969 r
= get_process_cmdline(object_pid
, 0, false, &t
);
971 x
= strjoina("OBJECT_CMDLINE=", t
);
973 IOVEC_SET_STRING(iovec
[n
++], x
);
977 r
= audit_session_from_pid(object_pid
, &audit
);
979 sprintf(o_audit_session
, "OBJECT_AUDIT_SESSION=%"PRIu32
, audit
);
980 IOVEC_SET_STRING(iovec
[n
++], o_audit_session
);
983 r
= audit_loginuid_from_pid(object_pid
, &loginuid
);
985 sprintf(o_audit_loginuid
, "OBJECT_AUDIT_LOGINUID="UID_FMT
, loginuid
);
986 IOVEC_SET_STRING(iovec
[n
++], o_audit_loginuid
);
990 r
= cg_pid_get_path_shifted(object_pid
, s
->cgroup_root
, &c
);
992 x
= strjoina("OBJECT_SYSTEMD_CGROUP=", c
);
993 IOVEC_SET_STRING(iovec
[n
++], x
);
995 r
= cg_path_get_session(c
, &t
);
997 x
= strjoina("OBJECT_SYSTEMD_SESSION=", t
);
999 IOVEC_SET_STRING(iovec
[n
++], x
);
1002 if (cg_path_get_owner_uid(c
, &owner
) >= 0) {
1003 sprintf(o_owner_uid
, "OBJECT_SYSTEMD_OWNER_UID="UID_FMT
, owner
);
1004 IOVEC_SET_STRING(iovec
[n
++], o_owner_uid
);
1007 if (cg_path_get_unit(c
, &t
) >= 0) {
1008 x
= strjoina("OBJECT_SYSTEMD_UNIT=", t
);
1010 IOVEC_SET_STRING(iovec
[n
++], x
);
1013 if (cg_path_get_user_unit(c
, &t
) >= 0) {
1014 x
= strjoina("OBJECT_SYSTEMD_USER_UNIT=", t
);
1016 IOVEC_SET_STRING(iovec
[n
++], x
);
1019 if (cg_path_get_slice(c
, &t
) >= 0) {
1020 x
= strjoina("OBJECT_SYSTEMD_SLICE=", t
);
1022 IOVEC_SET_STRING(iovec
[n
++], x
);
1025 if (cg_path_get_user_slice(c
, &t
) >= 0) {
1026 x
= strjoina("OBJECT_SYSTEMD_USER_SLICE=", t
);
1028 IOVEC_SET_STRING(iovec
[n
++], x
);
1037 sprintf(source_time
, "_SOURCE_REALTIME_TIMESTAMP=" USEC_FMT
, timeval_load(tv
));
1038 IOVEC_SET_STRING(iovec
[n
++], source_time
);
1041 /* Note that strictly speaking storing the boot id here is
1042 * redundant since the entry includes this in-line
1043 * anyway. However, we need this indexed, too. */
1044 if (!isempty(s
->boot_id_field
))
1045 IOVEC_SET_STRING(iovec
[n
++], s
->boot_id_field
);
1047 if (!isempty(s
->machine_id_field
))
1048 IOVEC_SET_STRING(iovec
[n
++], s
->machine_id_field
);
1050 if (!isempty(s
->hostname_field
))
1051 IOVEC_SET_STRING(iovec
[n
++], s
->hostname_field
);
1055 if (s
->split_mode
== SPLIT_UID
&& realuid
> 0)
1056 /* Split up strictly by any UID */
1057 journal_uid
= realuid
;
1058 else if (s
->split_mode
== SPLIT_LOGIN
&& realuid
> 0 && owner_valid
&& owner
> 0)
1059 /* Split up by login UIDs. We do this only if the
1060 * realuid is not root, in order not to accidentally
1061 * leak privileged information to the user that is
1062 * logged by a privileged process that is part of an
1063 * unprivileged session. */
1064 journal_uid
= owner
;
1068 write_to_journal(s
, journal_uid
, iovec
, n
, priority
);
1071 void server_driver_message(Server
*s
, const char *message_id
, const char *format
, ...) {
1072 struct iovec iovec
[N_IOVEC_META_FIELDS
+ 5 + N_IOVEC_PAYLOAD_FIELDS
];
1076 struct ucred ucred
= {};
1081 assert_cc(3 == LOG_FAC(LOG_DAEMON
));
1082 IOVEC_SET_STRING(iovec
[n
++], "SYSLOG_FACILITY=3");
1083 IOVEC_SET_STRING(iovec
[n
++], "SYSLOG_IDENTIFIER=systemd-journald");
1085 IOVEC_SET_STRING(iovec
[n
++], "_TRANSPORT=driver");
1086 assert_cc(6 == LOG_INFO
);
1087 IOVEC_SET_STRING(iovec
[n
++], "PRIORITY=6");
1090 IOVEC_SET_STRING(iovec
[n
++], message_id
);
1093 va_start(ap
, format
);
1094 r
= log_format_iovec(iovec
, ELEMENTSOF(iovec
), &n
, false, 0, format
, ap
);
1095 /* Error handling below */
1098 ucred
.pid
= getpid();
1099 ucred
.uid
= getuid();
1100 ucred
.gid
= getgid();
1103 dispatch_message_real(s
, iovec
, n
, ELEMENTSOF(iovec
), &ucred
, NULL
, NULL
, 0, NULL
, LOG_INFO
, 0, NULL
);
1106 free(iovec
[m
++].iov_base
);
1109 /* We failed to format the message. Emit a warning instead. */
1112 xsprintf(buf
, "MESSAGE=Entry printing failed: %s", strerror(-r
));
1115 IOVEC_SET_STRING(iovec
[n
++], "PRIORITY=4");
1116 IOVEC_SET_STRING(iovec
[n
++], buf
);
1117 dispatch_message_real(s
, iovec
, n
, ELEMENTSOF(iovec
), &ucred
, NULL
, NULL
, 0, NULL
, LOG_INFO
, 0, NULL
);
1121 void server_dispatch_message(
1123 struct iovec
*iovec
, unsigned n
, unsigned m
,
1124 const struct ucred
*ucred
,
1125 const struct timeval
*tv
,
1126 const char *label
, size_t label_len
,
1127 const char *unit_id
,
1132 _cleanup_free_
char *path
= NULL
;
1133 uint64_t available
= 0;
1137 assert(iovec
|| n
== 0);
1142 if (LOG_PRI(priority
) > s
->max_level_store
)
1145 /* Stop early in case the information will not be stored
1147 if (s
->storage
== STORAGE_NONE
)
1153 r
= cg_pid_get_path_shifted(ucred
->pid
, s
->cgroup_root
, &path
);
1157 /* example: /user/lennart/3/foobar
1158 * /system/dbus.service/foobar
1160 * So let's cut of everything past the third /, since that is
1161 * where user directories start */
1163 c
= strchr(path
, '/');
1165 c
= strchr(c
+1, '/');
1167 c
= strchr(c
+1, '/');
1173 (void) determine_space(s
, &available
, NULL
);
1174 rl
= journal_rate_limit_test(s
->rate_limit
, path
, priority
& LOG_PRIMASK
, available
);
1178 /* Write a suppression message if we suppressed something */
1180 server_driver_message(s
, "MESSAGE_ID=" SD_MESSAGE_JOURNAL_DROPPED_STR
,
1181 LOG_MESSAGE("Suppressed %u messages from %s", rl
- 1, path
),
1185 /* restore cgroup path for logging */
1188 dispatch_message_real(s
, iovec
, n
, m
, ucred
, tv
, label
, label_len
, unit_id
, priority
, object_pid
, path
);
1191 int server_flush_to_var(Server
*s
, bool require_flag_file
) {
1193 sd_journal
*j
= NULL
;
1194 char ts
[FORMAT_TIMESPAN_MAX
];
1201 if (!IN_SET(s
->storage
, STORAGE_AUTO
, STORAGE_PERSISTENT
))
1204 if (!s
->runtime_journal
)
1207 if (require_flag_file
&& !flushed_flag_is_set())
1210 (void) system_journal_open(s
, true);
1212 if (!s
->system_journal
)
1215 log_debug("Flushing to /var...");
1217 start
= now(CLOCK_MONOTONIC
);
1219 r
= sd_id128_get_machine(&machine
);
1223 r
= sd_journal_open(&j
, SD_JOURNAL_RUNTIME_ONLY
);
1225 return log_error_errno(r
, "Failed to read runtime journal: %m");
1227 sd_journal_set_data_threshold(j
, 0);
1229 SD_JOURNAL_FOREACH(j
) {
1233 f
= j
->current_file
;
1234 assert(f
&& f
->current_offset
> 0);
1238 r
= journal_file_move_to_object(f
, OBJECT_ENTRY
, f
->current_offset
, &o
);
1240 log_error_errno(r
, "Can't read entry: %m");
1244 r
= journal_file_copy_entry(f
, s
->system_journal
, o
, f
->current_offset
, NULL
, NULL
, NULL
);
1248 if (!shall_try_append_again(s
->system_journal
, r
)) {
1249 log_error_errno(r
, "Can't write entry: %m");
1254 server_vacuum(s
, false);
1256 if (!s
->system_journal
) {
1257 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1262 log_debug("Retrying write.");
1263 r
= journal_file_copy_entry(f
, s
->system_journal
, o
, f
->current_offset
, NULL
, NULL
, NULL
);
1265 log_error_errno(r
, "Can't write entry: %m");
1273 journal_file_post_change(s
->system_journal
);
1275 s
->runtime_journal
= journal_file_close(s
->runtime_journal
);
1278 (void) rm_rf("/run/log/journal", REMOVE_ROOT
);
1280 sd_journal_close(j
);
1282 server_driver_message(s
, NULL
,
1283 LOG_MESSAGE("Time spent on flushing to /var is %s for %u entries.",
1284 format_timespan(ts
, sizeof(ts
), now(CLOCK_MONOTONIC
) - start
, 0),
1291 int server_process_datagram(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1292 Server
*s
= userdata
;
1293 struct ucred
*ucred
= NULL
;
1294 struct timeval
*tv
= NULL
;
1295 struct cmsghdr
*cmsg
;
1297 size_t label_len
= 0, m
;
1300 int *fds
= NULL
, v
= 0;
1304 struct cmsghdr cmsghdr
;
1306 /* We use NAME_MAX space for the SELinux label
1307 * here. The kernel currently enforces no
1308 * limit, but according to suggestions from
1309 * the SELinux people this will change and it
1310 * will probably be identical to NAME_MAX. For
1311 * now we use that, but this should be updated
1312 * one day when the final limit is known. */
1313 uint8_t buf
[CMSG_SPACE(sizeof(struct ucred
)) +
1314 CMSG_SPACE(sizeof(struct timeval
)) +
1315 CMSG_SPACE(sizeof(int)) + /* fd */
1316 CMSG_SPACE(NAME_MAX
)]; /* selinux label */
1319 union sockaddr_union sa
= {};
1321 struct msghdr msghdr
= {
1324 .msg_control
= &control
,
1325 .msg_controllen
= sizeof(control
),
1327 .msg_namelen
= sizeof(sa
),
1331 assert(fd
== s
->native_fd
|| fd
== s
->syslog_fd
|| fd
== s
->audit_fd
);
1333 if (revents
!= EPOLLIN
) {
1334 log_error("Got invalid event from epoll for datagram fd: %"PRIx32
, revents
);
1338 /* Try to get the right size, if we can. (Not all
1339 * sockets support SIOCINQ, hence we just try, but
1340 * don't rely on it. */
1341 (void) ioctl(fd
, SIOCINQ
, &v
);
1343 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1344 m
= PAGE_ALIGN(MAX3((size_t) v
+ 1,
1346 ALIGN(sizeof(struct nlmsghdr
)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH
)) + 1);
1348 if (!GREEDY_REALLOC(s
->buffer
, s
->buffer_size
, m
))
1351 iovec
.iov_base
= s
->buffer
;
1352 iovec
.iov_len
= s
->buffer_size
- 1; /* Leave room for trailing NUL we add later */
1354 n
= recvmsg(fd
, &msghdr
, MSG_DONTWAIT
|MSG_CMSG_CLOEXEC
);
1356 if (errno
== EINTR
|| errno
== EAGAIN
)
1359 return log_error_errno(errno
, "recvmsg() failed: %m");
1362 CMSG_FOREACH(cmsg
, &msghdr
) {
1364 if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1365 cmsg
->cmsg_type
== SCM_CREDENTIALS
&&
1366 cmsg
->cmsg_len
== CMSG_LEN(sizeof(struct ucred
)))
1367 ucred
= (struct ucred
*) CMSG_DATA(cmsg
);
1368 else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1369 cmsg
->cmsg_type
== SCM_SECURITY
) {
1370 label
= (char*) CMSG_DATA(cmsg
);
1371 label_len
= cmsg
->cmsg_len
- CMSG_LEN(0);
1372 } else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1373 cmsg
->cmsg_type
== SO_TIMESTAMP
&&
1374 cmsg
->cmsg_len
== CMSG_LEN(sizeof(struct timeval
)))
1375 tv
= (struct timeval
*) CMSG_DATA(cmsg
);
1376 else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1377 cmsg
->cmsg_type
== SCM_RIGHTS
) {
1378 fds
= (int*) CMSG_DATA(cmsg
);
1379 n_fds
= (cmsg
->cmsg_len
- CMSG_LEN(0)) / sizeof(int);
1383 /* And a trailing NUL, just in case */
1386 if (fd
== s
->syslog_fd
) {
1387 if (n
> 0 && n_fds
== 0)
1388 server_process_syslog_message(s
, strstrip(s
->buffer
), ucred
, tv
, label
, label_len
);
1390 log_warning("Got file descriptors via syslog socket. Ignoring.");
1392 } else if (fd
== s
->native_fd
) {
1393 if (n
> 0 && n_fds
== 0)
1394 server_process_native_message(s
, s
->buffer
, n
, ucred
, tv
, label
, label_len
);
1395 else if (n
== 0 && n_fds
== 1)
1396 server_process_native_file(s
, fds
[0], ucred
, tv
, label
, label_len
);
1398 log_warning("Got too many file descriptors via native socket. Ignoring.");
1401 assert(fd
== s
->audit_fd
);
1403 if (n
> 0 && n_fds
== 0)
1404 server_process_audit_message(s
, s
->buffer
, n
, ucred
, &sa
, msghdr
.msg_namelen
);
1406 log_warning("Got file descriptors via audit socket. Ignoring.");
1409 close_many(fds
, n_fds
);
1413 static int dispatch_sigusr1(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1414 Server
*s
= userdata
;
1419 log_info("Received request to flush runtime journal from PID " PID_FMT
, si
->ssi_pid
);
1421 (void) server_flush_to_var(s
, false);
1423 server_vacuum(s
, false);
1425 r
= touch("/run/systemd/journal/flushed");
1427 log_warning_errno(r
, "Failed to touch /run/systemd/journal/flushed, ignoring: %m");
1429 server_space_usage_message(s
, NULL
);
1433 static int dispatch_sigusr2(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1434 Server
*s
= userdata
;
1439 log_info("Received request to rotate journal from PID " PID_FMT
, si
->ssi_pid
);
1441 server_vacuum(s
, true);
1443 if (s
->system_journal
)
1444 patch_min_use(&s
->system_storage
);
1445 if (s
->runtime_journal
)
1446 patch_min_use(&s
->runtime_storage
);
1448 /* Let clients know when the most recent rotation happened. */
1449 r
= write_timestamp_file_atomic("/run/systemd/journal/rotated", now(CLOCK_MONOTONIC
));
1451 log_warning_errno(r
, "Failed to write /run/systemd/journal/rotated, ignoring: %m");
1456 static int dispatch_sigterm(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1457 Server
*s
= userdata
;
1461 log_received_signal(LOG_INFO
, si
);
1463 sd_event_exit(s
->event
, 0);
1467 static int dispatch_sigrtmin1(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1468 Server
*s
= userdata
;
1473 log_debug("Received request to sync from PID " PID_FMT
, si
->ssi_pid
);
1477 /* Let clients know when the most recent sync happened. */
1478 r
= write_timestamp_file_atomic("/run/systemd/journal/synced", now(CLOCK_MONOTONIC
));
1480 log_warning_errno(r
, "Failed to write /run/systemd/journal/synced, ignoring: %m");
1485 static int setup_signals(Server
*s
) {
1490 assert_se(sigprocmask_many(SIG_SETMASK
, NULL
, SIGINT
, SIGTERM
, SIGUSR1
, SIGUSR2
, SIGRTMIN
+1, -1) >= 0);
1492 r
= sd_event_add_signal(s
->event
, &s
->sigusr1_event_source
, SIGUSR1
, dispatch_sigusr1
, s
);
1496 r
= sd_event_add_signal(s
->event
, &s
->sigusr2_event_source
, SIGUSR2
, dispatch_sigusr2
, s
);
1500 r
= sd_event_add_signal(s
->event
, &s
->sigterm_event_source
, SIGTERM
, dispatch_sigterm
, s
);
1504 /* Let's process SIGTERM late, so that we flush all queued
1505 * messages to disk before we exit */
1506 r
= sd_event_source_set_priority(s
->sigterm_event_source
, SD_EVENT_PRIORITY_NORMAL
+20);
1510 /* When journald is invoked on the terminal (when debugging),
1511 * it's useful if C-c is handled equivalent to SIGTERM. */
1512 r
= sd_event_add_signal(s
->event
, &s
->sigint_event_source
, SIGINT
, dispatch_sigterm
, s
);
1516 r
= sd_event_source_set_priority(s
->sigint_event_source
, SD_EVENT_PRIORITY_NORMAL
+20);
1520 /* SIGRTMIN+1 causes an immediate sync. We process this very
1521 * late, so that everything else queued at this point is
1522 * really written to disk. Clients can watch
1523 * /run/systemd/journal/synced with inotify until its mtime
1524 * changes to see when a sync happened. */
1525 r
= sd_event_add_signal(s
->event
, &s
->sigrtmin1_event_source
, SIGRTMIN
+1, dispatch_sigrtmin1
, s
);
1529 r
= sd_event_source_set_priority(s
->sigrtmin1_event_source
, SD_EVENT_PRIORITY_NORMAL
+15);
1536 static int parse_proc_cmdline_item(const char *key
, const char *value
, void *data
) {
1542 if (proc_cmdline_key_streq(key
, "systemd.journald.forward_to_syslog")) {
1544 r
= value
? parse_boolean(value
) : true;
1546 log_warning("Failed to parse forward to syslog switch \"%s\". Ignoring.", value
);
1548 s
->forward_to_syslog
= r
;
1550 } else if (proc_cmdline_key_streq(key
, "systemd.journald.forward_to_kmsg")) {
1552 r
= value
? parse_boolean(value
) : true;
1554 log_warning("Failed to parse forward to kmsg switch \"%s\". Ignoring.", value
);
1556 s
->forward_to_kmsg
= r
;
1558 } else if (proc_cmdline_key_streq(key
, "systemd.journald.forward_to_console")) {
1560 r
= value
? parse_boolean(value
) : true;
1562 log_warning("Failed to parse forward to console switch \"%s\". Ignoring.", value
);
1564 s
->forward_to_console
= r
;
1566 } else if (proc_cmdline_key_streq(key
, "systemd.journald.forward_to_wall")) {
1568 r
= value
? parse_boolean(value
) : true;
1570 log_warning("Failed to parse forward to wall switch \"%s\". Ignoring.", value
);
1572 s
->forward_to_wall
= r
;
1574 } else if (proc_cmdline_key_streq(key
, "systemd.journald.max_level_console")) {
1576 if (proc_cmdline_value_missing(key
, value
))
1579 r
= log_level_from_string(value
);
1581 log_warning("Failed to parse max level console value \"%s\". Ignoring.", value
);
1583 s
->max_level_console
= r
;
1585 } else if (proc_cmdline_key_streq(key
, "systemd.journald.max_level_store")) {
1587 if (proc_cmdline_value_missing(key
, value
))
1590 r
= log_level_from_string(value
);
1592 log_warning("Failed to parse max level store value \"%s\". Ignoring.", value
);
1594 s
->max_level_store
= r
;
1596 } else if (proc_cmdline_key_streq(key
, "systemd.journald.max_level_syslog")) {
1598 if (proc_cmdline_value_missing(key
, value
))
1601 r
= log_level_from_string(value
);
1603 log_warning("Failed to parse max level syslog value \"%s\". Ignoring.", value
);
1605 s
->max_level_syslog
= r
;
1607 } else if (proc_cmdline_key_streq(key
, "systemd.journald.max_level_kmsg")) {
1609 if (proc_cmdline_value_missing(key
, value
))
1612 r
= log_level_from_string(value
);
1614 log_warning("Failed to parse max level kmsg value \"%s\". Ignoring.", value
);
1616 s
->max_level_kmsg
= r
;
1618 } else if (proc_cmdline_key_streq(key
, "systemd.journald.max_level_wall")) {
1620 if (proc_cmdline_value_missing(key
, value
))
1623 r
= log_level_from_string(value
);
1625 log_warning("Failed to parse max level wall value \"%s\". Ignoring.", value
);
1627 s
->max_level_wall
= r
;
1629 } else if (startswith(key
, "systemd.journald"))
1630 log_warning("Unknown journald kernel command line option \"%s\". Ignoring.", key
);
1632 /* do not warn about state here, since probably systemd already did */
1636 static int server_parse_config_file(Server
*s
) {
1639 return config_parse_many_nulstr(PKGSYSCONFDIR
"/journald.conf",
1640 CONF_PATHS_NULSTR("systemd/journald.conf.d"),
1642 config_item_perf_lookup
, journald_gperf_lookup
,
1646 static int server_dispatch_sync(sd_event_source
*es
, usec_t t
, void *userdata
) {
1647 Server
*s
= userdata
;
1655 int server_schedule_sync(Server
*s
, int priority
) {
1660 if (priority
<= LOG_CRIT
) {
1661 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1666 if (s
->sync_scheduled
)
1669 if (s
->sync_interval_usec
> 0) {
1672 r
= sd_event_now(s
->event
, CLOCK_MONOTONIC
, &when
);
1676 when
+= s
->sync_interval_usec
;
1678 if (!s
->sync_event_source
) {
1679 r
= sd_event_add_time(
1681 &s
->sync_event_source
,
1684 server_dispatch_sync
, s
);
1688 r
= sd_event_source_set_priority(s
->sync_event_source
, SD_EVENT_PRIORITY_IMPORTANT
);
1690 r
= sd_event_source_set_time(s
->sync_event_source
, when
);
1694 r
= sd_event_source_set_enabled(s
->sync_event_source
, SD_EVENT_ONESHOT
);
1699 s
->sync_scheduled
= true;
1705 static int dispatch_hostname_change(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1706 Server
*s
= userdata
;
1710 server_cache_hostname(s
);
1714 static int server_open_hostname(Server
*s
) {
1719 s
->hostname_fd
= open("/proc/sys/kernel/hostname", O_RDONLY
|O_CLOEXEC
|O_NDELAY
|O_NOCTTY
);
1720 if (s
->hostname_fd
< 0)
1721 return log_error_errno(errno
, "Failed to open /proc/sys/kernel/hostname: %m");
1723 r
= sd_event_add_io(s
->event
, &s
->hostname_event_source
, s
->hostname_fd
, 0, dispatch_hostname_change
, s
);
1725 /* kernels prior to 3.2 don't support polling this file. Ignore
1728 log_warning_errno(r
, "Failed to register hostname fd in event loop, ignoring: %m");
1729 s
->hostname_fd
= safe_close(s
->hostname_fd
);
1733 return log_error_errno(r
, "Failed to register hostname fd in event loop: %m");
1736 r
= sd_event_source_set_priority(s
->hostname_event_source
, SD_EVENT_PRIORITY_IMPORTANT
-10);
1738 return log_error_errno(r
, "Failed to adjust priority of host name event source: %m");
1743 static int dispatch_notify_event(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1744 Server
*s
= userdata
;
1748 assert(s
->notify_event_source
== es
);
1749 assert(s
->notify_fd
== fd
);
1751 /* The $NOTIFY_SOCKET is writable again, now send exactly one
1752 * message on it. Either it's the watchdog event, the initial
1753 * READY=1 event or an stdout stream event. If there's nothing
1754 * to write anymore, turn our event source off. The next time
1755 * there's something to send it will be turned on again. */
1757 if (!s
->sent_notify_ready
) {
1758 static const char p
[] =
1760 "STATUS=Processing requests...";
1763 l
= send(s
->notify_fd
, p
, strlen(p
), MSG_DONTWAIT
);
1765 if (errno
== EAGAIN
)
1768 return log_error_errno(errno
, "Failed to send READY=1 notification message: %m");
1771 s
->sent_notify_ready
= true;
1772 log_debug("Sent READY=1 notification.");
1774 } else if (s
->send_watchdog
) {
1776 static const char p
[] =
1781 l
= send(s
->notify_fd
, p
, strlen(p
), MSG_DONTWAIT
);
1783 if (errno
== EAGAIN
)
1786 return log_error_errno(errno
, "Failed to send WATCHDOG=1 notification message: %m");
1789 s
->send_watchdog
= false;
1790 log_debug("Sent WATCHDOG=1 notification.");
1792 } else if (s
->stdout_streams_notify_queue
)
1793 /* Dispatch one stream notification event */
1794 stdout_stream_send_notify(s
->stdout_streams_notify_queue
);
1796 /* Leave us enabled if there's still more to do. */
1797 if (s
->send_watchdog
|| s
->stdout_streams_notify_queue
)
1800 /* There was nothing to do anymore, let's turn ourselves off. */
1801 r
= sd_event_source_set_enabled(es
, SD_EVENT_OFF
);
1803 return log_error_errno(r
, "Failed to turn off notify event source: %m");
1808 static int dispatch_watchdog(sd_event_source
*es
, uint64_t usec
, void *userdata
) {
1809 Server
*s
= userdata
;
1814 s
->send_watchdog
= true;
1816 r
= sd_event_source_set_enabled(s
->notify_event_source
, SD_EVENT_ON
);
1818 log_warning_errno(r
, "Failed to turn on notify event source: %m");
1820 r
= sd_event_source_set_time(s
->watchdog_event_source
, usec
+ s
->watchdog_usec
/ 2);
1822 return log_error_errno(r
, "Failed to restart watchdog event source: %m");
1824 r
= sd_event_source_set_enabled(s
->watchdog_event_source
, SD_EVENT_ON
);
1826 return log_error_errno(r
, "Failed to enable watchdog event source: %m");
1831 static int server_connect_notify(Server
*s
) {
1832 union sockaddr_union sa
= {
1833 .un
.sun_family
= AF_UNIX
,
1839 assert(s
->notify_fd
< 0);
1840 assert(!s
->notify_event_source
);
1843 So here's the problem: we'd like to send notification
1844 messages to PID 1, but we cannot do that via sd_notify(),
1845 since that's synchronous, and we might end up blocking on
1846 it. Specifically: given that PID 1 might block on
1847 dbus-daemon during IPC, and dbus-daemon is logging to us,
1848 and might hence block on us, we might end up in a deadlock
1849 if we block on sending PID 1 notification messages — by
1850 generating a full blocking circle. To avoid this, let's
1851 create a non-blocking socket, and connect it to the
1852 notification socket, and then wait for POLLOUT before we
1853 send anything. This should efficiently avoid any deadlocks,
1854 as we'll never block on PID 1, hence PID 1 can safely block
1855 on dbus-daemon which can safely block on us again.
1857 Don't think that this issue is real? It is, see:
1858 https://github.com/systemd/systemd/issues/1505
1861 e
= getenv("NOTIFY_SOCKET");
1865 if ((e
[0] != '@' && e
[0] != '/') || e
[1] == 0) {
1866 log_error("NOTIFY_SOCKET set to an invalid value: %s", e
);
1870 if (strlen(e
) > sizeof(sa
.un
.sun_path
)) {
1871 log_error("NOTIFY_SOCKET path too long: %s", e
);
1875 s
->notify_fd
= socket(AF_UNIX
, SOCK_DGRAM
|SOCK_CLOEXEC
|SOCK_NONBLOCK
, 0);
1876 if (s
->notify_fd
< 0)
1877 return log_error_errno(errno
, "Failed to create notify socket: %m");
1879 (void) fd_inc_sndbuf(s
->notify_fd
, NOTIFY_SNDBUF_SIZE
);
1881 strncpy(sa
.un
.sun_path
, e
, sizeof(sa
.un
.sun_path
));
1882 if (sa
.un
.sun_path
[0] == '@')
1883 sa
.un
.sun_path
[0] = 0;
1885 r
= connect(s
->notify_fd
, &sa
.sa
, SOCKADDR_UN_LEN(sa
.un
));
1887 return log_error_errno(errno
, "Failed to connect to notify socket: %m");
1889 r
= sd_event_add_io(s
->event
, &s
->notify_event_source
, s
->notify_fd
, EPOLLOUT
, dispatch_notify_event
, s
);
1891 return log_error_errno(r
, "Failed to watch notification socket: %m");
1893 if (sd_watchdog_enabled(false, &s
->watchdog_usec
) > 0) {
1894 s
->send_watchdog
= true;
1896 r
= sd_event_add_time(s
->event
, &s
->watchdog_event_source
, CLOCK_MONOTONIC
, now(CLOCK_MONOTONIC
) + s
->watchdog_usec
/2, s
->watchdog_usec
/4, dispatch_watchdog
, s
);
1898 return log_error_errno(r
, "Failed to add watchdog time event: %m");
1901 /* This should fire pretty soon, which we'll use to send the
1907 int server_init(Server
*s
) {
1908 _cleanup_fdset_free_ FDSet
*fds
= NULL
;
1915 s
->syslog_fd
= s
->native_fd
= s
->stdout_fd
= s
->dev_kmsg_fd
= s
->audit_fd
= s
->hostname_fd
= s
->notify_fd
= -1;
1919 s
->watchdog_usec
= USEC_INFINITY
;
1921 s
->sync_interval_usec
= DEFAULT_SYNC_INTERVAL_USEC
;
1922 s
->sync_scheduled
= false;
1924 s
->rate_limit_interval
= DEFAULT_RATE_LIMIT_INTERVAL
;
1925 s
->rate_limit_burst
= DEFAULT_RATE_LIMIT_BURST
;
1927 s
->forward_to_wall
= true;
1929 s
->max_file_usec
= DEFAULT_MAX_FILE_USEC
;
1931 s
->max_level_store
= LOG_DEBUG
;
1932 s
->max_level_syslog
= LOG_DEBUG
;
1933 s
->max_level_kmsg
= LOG_NOTICE
;
1934 s
->max_level_console
= LOG_INFO
;
1935 s
->max_level_wall
= LOG_EMERG
;
1937 journal_reset_metrics(&s
->system_storage
.metrics
);
1938 journal_reset_metrics(&s
->runtime_storage
.metrics
);
1940 server_parse_config_file(s
);
1942 r
= proc_cmdline_parse(parse_proc_cmdline_item
, s
, PROC_CMDLINE_STRIP_RD_PREFIX
);
1944 log_warning_errno(r
, "Failed to parse kernel command line, ignoring: %m");
1946 if (!!s
->rate_limit_interval
^ !!s
->rate_limit_burst
) {
1947 log_debug("Setting both rate limit interval and burst from "USEC_FMT
",%u to 0,0",
1948 s
->rate_limit_interval
, s
->rate_limit_burst
);
1949 s
->rate_limit_interval
= s
->rate_limit_burst
= 0;
1952 (void) mkdir_p("/run/systemd/journal", 0755);
1954 s
->user_journals
= ordered_hashmap_new(NULL
);
1955 if (!s
->user_journals
)
1958 s
->mmap
= mmap_cache_new();
1962 s
->deferred_closes
= set_new(NULL
);
1963 if (!s
->deferred_closes
)
1966 r
= sd_event_default(&s
->event
);
1968 return log_error_errno(r
, "Failed to create event loop: %m");
1970 n
= sd_listen_fds(true);
1972 return log_error_errno(n
, "Failed to read listening file descriptors from environment: %m");
1974 for (fd
= SD_LISTEN_FDS_START
; fd
< SD_LISTEN_FDS_START
+ n
; fd
++) {
1976 if (sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/run/systemd/journal/socket", 0) > 0) {
1978 if (s
->native_fd
>= 0) {
1979 log_error("Too many native sockets passed.");
1985 } else if (sd_is_socket_unix(fd
, SOCK_STREAM
, 1, "/run/systemd/journal/stdout", 0) > 0) {
1987 if (s
->stdout_fd
>= 0) {
1988 log_error("Too many stdout sockets passed.");
1994 } else if (sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/dev/log", 0) > 0 ||
1995 sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/run/systemd/journal/dev-log", 0) > 0) {
1997 if (s
->syslog_fd
>= 0) {
1998 log_error("Too many /dev/log sockets passed.");
2004 } else if (sd_is_socket(fd
, AF_NETLINK
, SOCK_RAW
, -1) > 0) {
2006 if (s
->audit_fd
>= 0) {
2007 log_error("Too many audit sockets passed.");
2021 r
= fdset_put(fds
, fd
);
2027 /* Try to restore streams, but don't bother if this fails */
2028 (void) server_restore_streams(s
, fds
);
2030 if (fdset_size(fds
) > 0) {
2031 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds
));
2032 fds
= fdset_free(fds
);
2035 no_sockets
= s
->native_fd
< 0 && s
->stdout_fd
< 0 && s
->syslog_fd
< 0 && s
->audit_fd
< 0;
2037 /* always open stdout, syslog, native, and kmsg sockets */
2039 /* systemd-journald.socket: /run/systemd/journal/stdout */
2040 r
= server_open_stdout_socket(s
);
2044 /* systemd-journald-dev-log.socket: /run/systemd/journal/dev-log */
2045 r
= server_open_syslog_socket(s
);
2049 /* systemd-journald.socket: /run/systemd/journal/socket */
2050 r
= server_open_native_socket(s
);
2055 r
= server_open_dev_kmsg(s
);
2059 /* Unless we got *some* sockets and not audit, open audit socket */
2060 if (s
->audit_fd
>= 0 || no_sockets
) {
2061 r
= server_open_audit(s
);
2066 r
= server_open_kernel_seqnum(s
);
2070 r
= server_open_hostname(s
);
2074 r
= setup_signals(s
);
2078 s
->udev
= udev_new();
2082 s
->rate_limit
= journal_rate_limit_new(s
->rate_limit_interval
, s
->rate_limit_burst
);
2086 r
= cg_get_root_path(&s
->cgroup_root
);
2090 server_cache_hostname(s
);
2091 server_cache_boot_id(s
);
2092 server_cache_machine_id(s
);
2094 s
->runtime_storage
.name
= "Runtime journal";
2095 s
->system_storage
.name
= "System journal";
2097 s
->runtime_storage
.path
= strjoin("/run/log/journal/", SERVER_MACHINE_ID(s
));
2098 s
->system_storage
.path
= strjoin("/var/log/journal/", SERVER_MACHINE_ID(s
));
2099 if (!s
->runtime_storage
.path
|| !s
->system_storage
.path
)
2102 (void) server_connect_notify(s
);
2104 return system_journal_open(s
, false);
2107 void server_maybe_append_tags(Server
*s
) {
2113 n
= now(CLOCK_REALTIME
);
2115 if (s
->system_journal
)
2116 journal_file_maybe_append_tag(s
->system_journal
, n
);
2118 ORDERED_HASHMAP_FOREACH(f
, s
->user_journals
, i
)
2119 journal_file_maybe_append_tag(f
, n
);
2123 void server_done(Server
*s
) {
2127 if (s
->deferred_closes
) {
2128 journal_file_close_set(s
->deferred_closes
);
2129 set_free(s
->deferred_closes
);
2132 while (s
->stdout_streams
)
2133 stdout_stream_free(s
->stdout_streams
);
2135 if (s
->system_journal
)
2136 (void) journal_file_close(s
->system_journal
);
2138 if (s
->runtime_journal
)
2139 (void) journal_file_close(s
->runtime_journal
);
2141 while ((f
= ordered_hashmap_steal_first(s
->user_journals
)))
2142 (void) journal_file_close(f
);
2144 ordered_hashmap_free(s
->user_journals
);
2146 sd_event_source_unref(s
->syslog_event_source
);
2147 sd_event_source_unref(s
->native_event_source
);
2148 sd_event_source_unref(s
->stdout_event_source
);
2149 sd_event_source_unref(s
->dev_kmsg_event_source
);
2150 sd_event_source_unref(s
->audit_event_source
);
2151 sd_event_source_unref(s
->sync_event_source
);
2152 sd_event_source_unref(s
->sigusr1_event_source
);
2153 sd_event_source_unref(s
->sigusr2_event_source
);
2154 sd_event_source_unref(s
->sigterm_event_source
);
2155 sd_event_source_unref(s
->sigint_event_source
);
2156 sd_event_source_unref(s
->sigrtmin1_event_source
);
2157 sd_event_source_unref(s
->hostname_event_source
);
2158 sd_event_source_unref(s
->notify_event_source
);
2159 sd_event_source_unref(s
->watchdog_event_source
);
2160 sd_event_unref(s
->event
);
2162 safe_close(s
->syslog_fd
);
2163 safe_close(s
->native_fd
);
2164 safe_close(s
->stdout_fd
);
2165 safe_close(s
->dev_kmsg_fd
);
2166 safe_close(s
->audit_fd
);
2167 safe_close(s
->hostname_fd
);
2168 safe_close(s
->notify_fd
);
2171 journal_rate_limit_free(s
->rate_limit
);
2173 if (s
->kernel_seqnum
)
2174 munmap(s
->kernel_seqnum
, sizeof(uint64_t));
2178 free(s
->cgroup_root
);
2179 free(s
->hostname_field
);
2182 mmap_cache_unref(s
->mmap
);
2184 udev_unref(s
->udev
);
2187 static const char* const storage_table
[_STORAGE_MAX
] = {
2188 [STORAGE_AUTO
] = "auto",
2189 [STORAGE_VOLATILE
] = "volatile",
2190 [STORAGE_PERSISTENT
] = "persistent",
2191 [STORAGE_NONE
] = "none"
2194 DEFINE_STRING_TABLE_LOOKUP(storage
, Storage
);
2195 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage
, storage
, Storage
, "Failed to parse storage setting");
2197 static const char* const split_mode_table
[_SPLIT_MAX
] = {
2198 [SPLIT_LOGIN
] = "login",
2199 [SPLIT_UID
] = "uid",
2200 [SPLIT_NONE
] = "none",
2203 DEFINE_STRING_TABLE_LOOKUP(split_mode
, SplitMode
);
2204 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode
, split_mode
, SplitMode
, "Failed to parse split mode setting");