2 This file is part of systemd.
4 Copyright 2011 Lennart Poettering
6 systemd is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public License
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
21 #include <selinux/selinux.h>
23 #include <sys/ioctl.h>
25 #include <sys/signalfd.h>
26 #include <sys/statvfs.h>
27 #include <linux/sockios.h>
30 #include "sd-daemon.h"
31 #include "sd-journal.h"
32 #include "sd-messages.h"
35 #include "alloc-util.h"
36 #include "audit-util.h"
37 #include "cgroup-util.h"
38 #include "conf-parser.h"
39 #include "dirent-util.h"
40 #include "extract-word.h"
43 #include "format-util.h"
46 #include "hostname-util.h"
47 #include "id128-util.h"
49 #include "journal-authenticate.h"
50 #include "journal-file.h"
51 #include "journal-internal.h"
52 #include "journal-vacuum.h"
53 #include "journald-audit.h"
54 #include "journald-kmsg.h"
55 #include "journald-native.h"
56 #include "journald-rate-limit.h"
57 #include "journald-server.h"
58 #include "journald-stream.h"
59 #include "journald-syslog.h"
63 #include "parse-util.h"
64 #include "proc-cmdline.h"
65 #include "process-util.h"
67 #include "selinux-util.h"
68 #include "signal-util.h"
69 #include "socket-util.h"
70 #include "stdio-util.h"
71 #include "string-table.h"
72 #include "string-util.h"
73 #include "user-util.h"
74 #include "syslog-util.h"
76 #define USER_JOURNALS_MAX 1024
78 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
79 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
80 #define DEFAULT_RATE_LIMIT_BURST 1000
81 #define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
83 #define RECHECK_SPACE_USEC (30*USEC_PER_SEC)
85 #define NOTIFY_SNDBUF_SIZE (8*1024*1024)
87 /* The period to insert between posting changes for coalescing */
88 #define POST_CHANGE_TIMER_INTERVAL_USEC (250*USEC_PER_MSEC)
90 static int determine_path_usage(Server
*s
, const char *path
, uint64_t *ret_used
, uint64_t *ret_free
) {
91 _cleanup_closedir_
DIR *d
= NULL
;
100 return log_full_errno(errno
== ENOENT
? LOG_DEBUG
: LOG_ERR
,
101 errno
, "Failed to open %s: %m", path
);
103 if (fstatvfs(dirfd(d
), &ss
) < 0)
104 return log_error_errno(errno
, "Failed to fstatvfs(%s): %m", path
);
106 *ret_free
= ss
.f_bsize
* ss
.f_bavail
;
108 FOREACH_DIRENT_ALL(de
, d
, break) {
111 if (!endswith(de
->d_name
, ".journal") &&
112 !endswith(de
->d_name
, ".journal~"))
115 if (fstatat(dirfd(d
), de
->d_name
, &st
, AT_SYMLINK_NOFOLLOW
) < 0) {
116 log_debug_errno(errno
, "Failed to stat %s/%s, ignoring: %m", path
, de
->d_name
);
120 if (!S_ISREG(st
.st_mode
))
123 *ret_used
+= (uint64_t) st
.st_blocks
* 512UL;
129 static void cache_space_invalidate(JournalStorageSpace
*space
) {
130 memset(space
, 0, sizeof(*space
));
133 static int cache_space_refresh(Server
*s
, JournalStorage
*storage
) {
134 JournalStorageSpace
*space
;
135 JournalMetrics
*metrics
;
136 uint64_t vfs_used
, vfs_avail
, avail
;
142 metrics
= &storage
->metrics
;
143 space
= &storage
->space
;
145 ts
= now(CLOCK_MONOTONIC
);
147 if (space
->timestamp
+ RECHECK_SPACE_USEC
> ts
)
150 r
= determine_path_usage(s
, storage
->path
, &vfs_used
, &vfs_avail
);
154 space
->vfs_used
= vfs_used
;
155 space
->vfs_available
= vfs_avail
;
157 avail
= LESS_BY(vfs_avail
, metrics
->keep_free
);
159 space
->limit
= MIN(MAX(vfs_used
+ avail
, metrics
->min_use
), metrics
->max_use
);
160 space
->available
= LESS_BY(space
->limit
, vfs_used
);
161 space
->timestamp
= ts
;
165 static void patch_min_use(JournalStorage
*storage
) {
168 /* Let's bump the min_use limit to the current usage on disk. We do
169 * this when starting up and first opening the journal files. This way
170 * sudden spikes in disk usage will not cause journald to vacuum files
171 * without bounds. Note that this means that only a restart of journald
172 * will make it reset this value. */
174 storage
->metrics
.min_use
= MAX(storage
->metrics
.min_use
, storage
->space
.vfs_used
);
178 static int determine_space(Server
*s
, uint64_t *available
, uint64_t *limit
) {
184 js
= s
->system_journal
? &s
->system_storage
: &s
->runtime_storage
;
186 r
= cache_space_refresh(s
, js
);
189 *available
= js
->space
.available
;
191 *limit
= js
->space
.limit
;
196 void server_space_usage_message(Server
*s
, JournalStorage
*storage
) {
197 char fb1
[FORMAT_BYTES_MAX
], fb2
[FORMAT_BYTES_MAX
], fb3
[FORMAT_BYTES_MAX
],
198 fb4
[FORMAT_BYTES_MAX
], fb5
[FORMAT_BYTES_MAX
], fb6
[FORMAT_BYTES_MAX
];
199 JournalMetrics
*metrics
;
204 storage
= s
->system_journal
? &s
->system_storage
: &s
->runtime_storage
;
206 if (cache_space_refresh(s
, storage
) < 0)
209 metrics
= &storage
->metrics
;
210 format_bytes(fb1
, sizeof(fb1
), storage
->space
.vfs_used
);
211 format_bytes(fb2
, sizeof(fb2
), metrics
->max_use
);
212 format_bytes(fb3
, sizeof(fb3
), metrics
->keep_free
);
213 format_bytes(fb4
, sizeof(fb4
), storage
->space
.vfs_available
);
214 format_bytes(fb5
, sizeof(fb5
), storage
->space
.limit
);
215 format_bytes(fb6
, sizeof(fb6
), storage
->space
.available
);
217 server_driver_message(s
, SD_MESSAGE_JOURNAL_USAGE
,
218 LOG_MESSAGE("%s (%s) is %s, max %s, %s free.",
219 storage
->name
, storage
->path
, fb1
, fb5
, fb6
),
220 "JOURNAL_NAME=%s", storage
->name
,
221 "JOURNAL_PATH=%s", storage
->path
,
222 "CURRENT_USE=%"PRIu64
, storage
->space
.vfs_used
,
223 "CURRENT_USE_PRETTY=%s", fb1
,
224 "MAX_USE=%"PRIu64
, metrics
->max_use
,
225 "MAX_USE_PRETTY=%s", fb2
,
226 "DISK_KEEP_FREE=%"PRIu64
, metrics
->keep_free
,
227 "DISK_KEEP_FREE_PRETTY=%s", fb3
,
228 "DISK_AVAILABLE=%"PRIu64
, storage
->space
.vfs_available
,
229 "DISK_AVAILABLE_PRETTY=%s", fb4
,
230 "LIMIT=%"PRIu64
, storage
->space
.limit
,
231 "LIMIT_PRETTY=%s", fb5
,
232 "AVAILABLE=%"PRIu64
, storage
->space
.available
,
233 "AVAILABLE_PRETTY=%s", fb6
,
237 static void server_add_acls(JournalFile
*f
, uid_t uid
) {
244 if (uid
<= SYSTEM_UID_MAX
)
247 r
= add_acls_for_user(f
->fd
, uid
);
249 log_warning_errno(r
, "Failed to set ACL on %s, ignoring: %m", f
->path
);
253 static int open_journal(
259 JournalMetrics
*metrics
,
269 r
= journal_file_open_reliably(fname
, flags
, 0640, s
->compress
, seal
, metrics
, s
->mmap
, s
->deferred_closes
, NULL
, &f
);
271 r
= journal_file_open(-1, fname
, flags
, 0640, s
->compress
, seal
, metrics
, s
->mmap
, s
->deferred_closes
, NULL
, &f
);
275 r
= journal_file_enable_post_change_timer(f
, s
->event
, POST_CHANGE_TIMER_INTERVAL_USEC
);
277 (void) journal_file_close(f
);
285 static bool flushed_flag_is_set(void) {
286 return (access("/run/systemd/journal/flushed", F_OK
) >= 0);
289 static int system_journal_open(Server
*s
, bool flush_requested
) {
290 bool flushed
= false;
294 if (!s
->system_journal
&&
295 (s
->storage
== STORAGE_PERSISTENT
|| s
->storage
== STORAGE_AUTO
) &&
296 (flush_requested
|| (flushed
= flushed_flag_is_set()))) {
298 /* If in auto mode: first try to create the machine
299 * path, but not the prefix.
301 * If in persistent mode: create /var/log/journal and
302 * the machine path */
304 if (s
->storage
== STORAGE_PERSISTENT
)
305 (void) mkdir_p("/var/log/journal/", 0755);
307 (void) mkdir(s
->system_storage
.path
, 0755);
309 fn
= strjoina(s
->system_storage
.path
, "/system.journal");
310 r
= open_journal(s
, true, fn
, O_RDWR
|O_CREAT
, s
->seal
, &s
->system_storage
.metrics
, &s
->system_journal
);
312 server_add_acls(s
->system_journal
, 0);
313 (void) cache_space_refresh(s
, &s
->system_storage
);
314 patch_min_use(&s
->system_storage
);
316 if (r
!= -ENOENT
&& r
!= -EROFS
)
317 log_warning_errno(r
, "Failed to open system journal: %m");
322 /* If the runtime journal is open, and we're post-flush, we're
323 * recovering from a failed system journal rotate (ENOSPC)
324 * for which the runtime journal was reopened.
326 * Perform an implicit flush to var, leaving the runtime
327 * journal closed, now that the system journal is back.
329 if (s
->runtime_journal
&& flushed
)
330 (void) server_flush_to_var(s
);
333 if (!s
->runtime_journal
&&
334 (s
->storage
!= STORAGE_NONE
)) {
336 fn
= strjoina(s
->runtime_storage
.path
, "/system.journal");
338 if (s
->system_journal
) {
340 /* Try to open the runtime journal, but only
341 * if it already exists, so that we can flush
342 * it into the system journal */
344 r
= open_journal(s
, false, fn
, O_RDWR
, false, &s
->runtime_storage
.metrics
, &s
->runtime_journal
);
347 log_warning_errno(r
, "Failed to open runtime journal: %m");
354 /* OK, we really need the runtime journal, so create
355 * it if necessary. */
357 (void) mkdir("/run/log", 0755);
358 (void) mkdir("/run/log/journal", 0755);
359 (void) mkdir_parents(fn
, 0750);
361 r
= open_journal(s
, true, fn
, O_RDWR
|O_CREAT
, false, &s
->runtime_storage
.metrics
, &s
->runtime_journal
);
363 return log_error_errno(r
, "Failed to open runtime journal: %m");
366 if (s
->runtime_journal
) {
367 server_add_acls(s
->runtime_journal
, 0);
368 (void) cache_space_refresh(s
, &s
->runtime_storage
);
369 patch_min_use(&s
->runtime_storage
);
376 static JournalFile
* find_journal(Server
*s
, uid_t uid
) {
377 _cleanup_free_
char *p
= NULL
;
384 /* A rotate that fails to create the new journal (ENOSPC) leaves the
385 * rotated journal as NULL. Unless we revisit opening, even after
386 * space is made available we'll continue to return NULL indefinitely.
388 * system_journal_open() is a noop if the journals are already open, so
389 * we can just call it here to recover from failed rotates (or anything
390 * else that's left the journals as NULL).
392 * Fixes https://github.com/systemd/systemd/issues/3968 */
393 (void) system_journal_open(s
, false);
395 /* We split up user logs only on /var, not on /run. If the
396 * runtime file is open, we write to it exclusively, in order
397 * to guarantee proper order as soon as we flush /run to
398 * /var and close the runtime file. */
400 if (s
->runtime_journal
)
401 return s
->runtime_journal
;
403 if (uid
<= SYSTEM_UID_MAX
|| uid_is_dynamic(uid
))
404 return s
->system_journal
;
406 r
= sd_id128_get_machine(&machine
);
408 return s
->system_journal
;
410 f
= ordered_hashmap_get(s
->user_journals
, UID_TO_PTR(uid
));
414 if (asprintf(&p
, "/var/log/journal/" SD_ID128_FORMAT_STR
"/user-"UID_FMT
".journal",
415 SD_ID128_FORMAT_VAL(machine
), uid
) < 0)
416 return s
->system_journal
;
418 while (ordered_hashmap_size(s
->user_journals
) >= USER_JOURNALS_MAX
) {
419 /* Too many open? Then let's close one */
420 f
= ordered_hashmap_steal_first(s
->user_journals
);
422 (void) journal_file_close(f
);
425 r
= open_journal(s
, true, p
, O_RDWR
|O_CREAT
, s
->seal
, &s
->system_storage
.metrics
, &f
);
427 return s
->system_journal
;
429 server_add_acls(f
, uid
);
431 r
= ordered_hashmap_put(s
->user_journals
, UID_TO_PTR(uid
), f
);
433 (void) journal_file_close(f
);
434 return s
->system_journal
;
440 static int do_rotate(
453 r
= journal_file_rotate(f
, s
->compress
, seal
, s
->deferred_closes
);
456 log_error_errno(r
, "Failed to rotate %s: %m", (*f
)->path
);
458 log_error_errno(r
, "Failed to create new %s journal: %m", name
);
460 server_add_acls(*f
, uid
);
465 void server_rotate(Server
*s
) {
471 log_debug("Rotating...");
473 (void) do_rotate(s
, &s
->runtime_journal
, "runtime", false, 0);
474 (void) do_rotate(s
, &s
->system_journal
, "system", s
->seal
, 0);
476 ORDERED_HASHMAP_FOREACH_KEY(f
, k
, s
->user_journals
, i
) {
477 r
= do_rotate(s
, &f
, "user", s
->seal
, PTR_TO_UID(k
));
479 ordered_hashmap_replace(s
->user_journals
, k
, f
);
481 /* Old file has been closed and deallocated */
482 ordered_hashmap_remove(s
->user_journals
, k
);
485 /* Perform any deferred closes which aren't still offlining. */
486 SET_FOREACH(f
, s
->deferred_closes
, i
)
487 if (!journal_file_is_offlining(f
)) {
488 (void) set_remove(s
->deferred_closes
, f
);
489 (void) journal_file_close(f
);
493 void server_sync(Server
*s
) {
498 if (s
->system_journal
) {
499 r
= journal_file_set_offline(s
->system_journal
, false);
501 log_warning_errno(r
, "Failed to sync system journal, ignoring: %m");
504 ORDERED_HASHMAP_FOREACH(f
, s
->user_journals
, i
) {
505 r
= journal_file_set_offline(f
, false);
507 log_warning_errno(r
, "Failed to sync user journal, ignoring: %m");
510 if (s
->sync_event_source
) {
511 r
= sd_event_source_set_enabled(s
->sync_event_source
, SD_EVENT_OFF
);
513 log_error_errno(r
, "Failed to disable sync timer source: %m");
516 s
->sync_scheduled
= false;
519 static void do_vacuum(Server
*s
, JournalStorage
*storage
, bool verbose
) {
526 (void) cache_space_refresh(s
, storage
);
529 server_space_usage_message(s
, storage
);
531 r
= journal_directory_vacuum(storage
->path
, storage
->space
.limit
,
532 storage
->metrics
.n_max_files
, s
->max_retention_usec
,
533 &s
->oldest_file_usec
, verbose
);
534 if (r
< 0 && r
!= -ENOENT
)
535 log_warning_errno(r
, "Failed to vacuum %s, ignoring: %m", storage
->path
);
537 cache_space_invalidate(&storage
->space
);
540 int server_vacuum(Server
*s
, bool verbose
) {
543 log_debug("Vacuuming...");
545 s
->oldest_file_usec
= 0;
547 if (s
->system_journal
)
548 do_vacuum(s
, &s
->system_storage
, verbose
);
549 if (s
->runtime_journal
)
550 do_vacuum(s
, &s
->runtime_storage
, verbose
);
555 static void server_cache_machine_id(Server
*s
) {
561 r
= sd_id128_get_machine(&id
);
565 sd_id128_to_string(id
, stpcpy(s
->machine_id_field
, "_MACHINE_ID="));
568 static void server_cache_boot_id(Server
*s
) {
574 r
= sd_id128_get_boot(&id
);
578 sd_id128_to_string(id
, stpcpy(s
->boot_id_field
, "_BOOT_ID="));
581 static void server_cache_hostname(Server
*s
) {
582 _cleanup_free_
char *t
= NULL
;
587 t
= gethostname_malloc();
591 x
= strappend("_HOSTNAME=", t
);
595 free(s
->hostname_field
);
596 s
->hostname_field
= x
;
599 static bool shall_try_append_again(JournalFile
*f
, int r
) {
602 case -E2BIG
: /* Hit configured limit */
603 case -EFBIG
: /* Hit fs limit */
604 case -EDQUOT
: /* Quota limit hit */
605 case -ENOSPC
: /* Disk full */
606 log_debug("%s: Allocation limit reached, rotating.", f
->path
);
609 case -EIO
: /* I/O error of some kind (mmap) */
610 log_warning("%s: IO error, rotating.", f
->path
);
613 case -EHOSTDOWN
: /* Other machine */
614 log_info("%s: Journal file from other machine, rotating.", f
->path
);
617 case -EBUSY
: /* Unclean shutdown */
618 log_info("%s: Unclean shutdown, rotating.", f
->path
);
621 case -EPROTONOSUPPORT
: /* Unsupported feature */
622 log_info("%s: Unsupported feature, rotating.", f
->path
);
625 case -EBADMSG
: /* Corrupted */
626 case -ENODATA
: /* Truncated */
627 case -ESHUTDOWN
: /* Already archived */
628 log_warning("%s: Journal file corrupted, rotating.", f
->path
);
631 case -EIDRM
: /* Journal file has been deleted */
632 log_warning("%s: Journal file has been deleted, rotating.", f
->path
);
635 case -ETXTBSY
: /* Journal file is from the future */
636 log_warning("%s: Journal file is from the future, rotating.", f
->path
);
644 static void write_to_journal(Server
*s
, uid_t uid
, struct iovec
*iovec
, unsigned n
, int priority
) {
645 bool vacuumed
= false, rotate
= false;
646 struct dual_timestamp ts
;
654 /* Get the closest, linearized time we have for this log event from the event loop. (Note that we do not use
655 * the source time, and not even the time the event was originally seen, but instead simply the time we started
656 * processing it, as we want strictly linear ordering in what we write out.) */
657 assert_se(sd_event_now(s
->event
, CLOCK_REALTIME
, &ts
.realtime
) >= 0);
658 assert_se(sd_event_now(s
->event
, CLOCK_MONOTONIC
, &ts
.monotonic
) >= 0);
660 if (ts
.realtime
< s
->last_realtime_clock
) {
661 /* When the time jumps backwards, let's immediately rotate. Of course, this should not happen during
662 * regular operation. However, when it does happen, then we should make sure that we start fresh files
663 * to ensure that the entries in the journal files are strictly ordered by time, in order to ensure
664 * bisection works correctly. */
666 log_debug("Time jumped backwards, rotating.");
670 f
= find_journal(s
, uid
);
674 if (journal_file_rotate_suggested(f
, s
->max_file_usec
)) {
675 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f
->path
);
682 server_vacuum(s
, false);
685 f
= find_journal(s
, uid
);
690 s
->last_realtime_clock
= ts
.realtime
;
692 r
= journal_file_append_entry(f
, &ts
, iovec
, n
, &s
->seqnum
, NULL
, NULL
);
694 server_schedule_sync(s
, priority
);
698 if (vacuumed
|| !shall_try_append_again(f
, r
)) {
699 log_error_errno(r
, "Failed to write entry (%d items, %zu bytes), ignoring: %m", n
, IOVEC_TOTAL_SIZE(iovec
, n
));
704 server_vacuum(s
, false);
706 f
= find_journal(s
, uid
);
710 log_debug("Retrying write.");
711 r
= journal_file_append_entry(f
, &ts
, iovec
, n
, &s
->seqnum
, NULL
, NULL
);
713 log_error_errno(r
, "Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %m", n
, IOVEC_TOTAL_SIZE(iovec
, n
));
715 server_schedule_sync(s
, priority
);
718 static int get_invocation_id(const char *cgroup_root
, const char *slice
, const char *unit
, char **ret
) {
719 _cleanup_free_
char *escaped
= NULL
, *slice_path
= NULL
, *p
= NULL
;
720 char *copy
, ids
[SD_ID128_STRING_MAX
];
723 /* Read the invocation ID of a unit off a unit. It's stored in the "trusted.invocation_id" extended attribute
724 * on the cgroup path. */
726 r
= cg_slice_to_path(slice
, &slice_path
);
730 escaped
= cg_escape(unit
);
734 p
= strjoin(cgroup_root
, "/", slice_path
, "/", escaped
);
738 r
= cg_get_xattr(SYSTEMD_CGROUP_CONTROLLER
, p
, "trusted.invocation_id", ids
, 32);
745 if (!id128_is_valid(ids
))
756 static void dispatch_message_real(
758 struct iovec
*iovec
, unsigned n
, unsigned m
,
759 const struct ucred
*ucred
,
760 const struct timeval
*tv
,
761 const char *label
, size_t label_len
,
766 char pid
[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t
)],
767 uid
[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t
)],
768 gid
[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t
)],
769 owner_uid
[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t
)],
770 source_time
[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t
)],
771 o_uid
[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t
)],
772 o_gid
[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t
)],
773 o_owner_uid
[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t
)];
779 uid_t realuid
= 0, owner
= 0, journal_uid
;
780 bool owner_valid
= false;
782 char audit_session
[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
783 audit_loginuid
[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t
)],
784 o_audit_session
[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
785 o_audit_loginuid
[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t
)];
794 assert(n
+ N_IOVEC_META_FIELDS
+ (object_pid
> 0 ? N_IOVEC_OBJECT_FIELDS
: 0) <= m
);
797 realuid
= ucred
->uid
;
799 sprintf(pid
, "_PID="PID_FMT
, ucred
->pid
);
800 IOVEC_SET_STRING(iovec
[n
++], pid
);
802 sprintf(uid
, "_UID="UID_FMT
, ucred
->uid
);
803 IOVEC_SET_STRING(iovec
[n
++], uid
);
805 sprintf(gid
, "_GID="GID_FMT
, ucred
->gid
);
806 IOVEC_SET_STRING(iovec
[n
++], gid
);
808 r
= get_process_comm(ucred
->pid
, &t
);
810 x
= strjoina("_COMM=", t
);
812 IOVEC_SET_STRING(iovec
[n
++], x
);
815 r
= get_process_exe(ucred
->pid
, &t
);
817 x
= strjoina("_EXE=", t
);
819 IOVEC_SET_STRING(iovec
[n
++], x
);
822 r
= get_process_cmdline(ucred
->pid
, 0, false, &t
);
824 x
= strjoina("_CMDLINE=", t
);
826 IOVEC_SET_STRING(iovec
[n
++], x
);
829 r
= get_process_capeff(ucred
->pid
, &t
);
831 x
= strjoina("_CAP_EFFECTIVE=", t
);
833 IOVEC_SET_STRING(iovec
[n
++], x
);
837 r
= audit_session_from_pid(ucred
->pid
, &audit
);
839 sprintf(audit_session
, "_AUDIT_SESSION=%"PRIu32
, audit
);
840 IOVEC_SET_STRING(iovec
[n
++], audit_session
);
843 r
= audit_loginuid_from_pid(ucred
->pid
, &loginuid
);
845 sprintf(audit_loginuid
, "_AUDIT_LOGINUID="UID_FMT
, loginuid
);
846 IOVEC_SET_STRING(iovec
[n
++], audit_loginuid
);
850 r
= cg_pid_get_path_shifted(ucred
->pid
, s
->cgroup_root
, &c
);
852 _cleanup_free_
char *raw_unit
= NULL
, *raw_slice
= NULL
;
853 char *session
= NULL
;
855 x
= strjoina("_SYSTEMD_CGROUP=", c
);
856 IOVEC_SET_STRING(iovec
[n
++], x
);
858 r
= cg_path_get_session(c
, &t
);
860 session
= strjoina("_SYSTEMD_SESSION=", t
);
862 IOVEC_SET_STRING(iovec
[n
++], session
);
865 if (cg_path_get_owner_uid(c
, &owner
) >= 0) {
868 sprintf(owner_uid
, "_SYSTEMD_OWNER_UID="UID_FMT
, owner
);
869 IOVEC_SET_STRING(iovec
[n
++], owner_uid
);
872 if (cg_path_get_unit(c
, &raw_unit
) >= 0) {
873 x
= strjoina("_SYSTEMD_UNIT=", raw_unit
);
874 IOVEC_SET_STRING(iovec
[n
++], x
);
875 } else if (unit_id
&& !session
) {
876 x
= strjoina("_SYSTEMD_UNIT=", unit_id
);
877 IOVEC_SET_STRING(iovec
[n
++], x
);
880 if (cg_path_get_user_unit(c
, &t
) >= 0) {
881 x
= strjoina("_SYSTEMD_USER_UNIT=", t
);
883 IOVEC_SET_STRING(iovec
[n
++], x
);
884 } else if (unit_id
&& session
) {
885 x
= strjoina("_SYSTEMD_USER_UNIT=", unit_id
);
886 IOVEC_SET_STRING(iovec
[n
++], x
);
889 if (cg_path_get_slice(c
, &raw_slice
) >= 0) {
890 x
= strjoina("_SYSTEMD_SLICE=", raw_slice
);
891 IOVEC_SET_STRING(iovec
[n
++], x
);
894 if (cg_path_get_user_slice(c
, &t
) >= 0) {
895 x
= strjoina("_SYSTEMD_USER_SLICE=", t
);
897 IOVEC_SET_STRING(iovec
[n
++], x
);
900 if (raw_slice
&& raw_unit
) {
901 if (get_invocation_id(s
->cgroup_root
, raw_slice
, raw_unit
, &t
) >= 0) {
902 x
= strjoina("_SYSTEMD_INVOCATION_ID=", t
);
904 IOVEC_SET_STRING(iovec
[n
++], x
);
909 } else if (unit_id
) {
910 x
= strjoina("_SYSTEMD_UNIT=", unit_id
);
911 IOVEC_SET_STRING(iovec
[n
++], x
);
915 if (mac_selinux_have()) {
917 x
= alloca(strlen("_SELINUX_CONTEXT=") + label_len
+ 1);
919 *((char*) mempcpy(stpcpy(x
, "_SELINUX_CONTEXT="), label
, label_len
)) = 0;
920 IOVEC_SET_STRING(iovec
[n
++], x
);
924 if (getpidcon(ucred
->pid
, &con
) >= 0) {
925 x
= strjoina("_SELINUX_CONTEXT=", con
);
928 IOVEC_SET_STRING(iovec
[n
++], x
);
937 r
= get_process_uid(object_pid
, &object_uid
);
939 sprintf(o_uid
, "OBJECT_UID="UID_FMT
, object_uid
);
940 IOVEC_SET_STRING(iovec
[n
++], o_uid
);
943 r
= get_process_gid(object_pid
, &object_gid
);
945 sprintf(o_gid
, "OBJECT_GID="GID_FMT
, object_gid
);
946 IOVEC_SET_STRING(iovec
[n
++], o_gid
);
949 r
= get_process_comm(object_pid
, &t
);
951 x
= strjoina("OBJECT_COMM=", t
);
953 IOVEC_SET_STRING(iovec
[n
++], x
);
956 r
= get_process_exe(object_pid
, &t
);
958 x
= strjoina("OBJECT_EXE=", t
);
960 IOVEC_SET_STRING(iovec
[n
++], x
);
963 r
= get_process_cmdline(object_pid
, 0, false, &t
);
965 x
= strjoina("OBJECT_CMDLINE=", t
);
967 IOVEC_SET_STRING(iovec
[n
++], x
);
971 r
= audit_session_from_pid(object_pid
, &audit
);
973 sprintf(o_audit_session
, "OBJECT_AUDIT_SESSION=%"PRIu32
, audit
);
974 IOVEC_SET_STRING(iovec
[n
++], o_audit_session
);
977 r
= audit_loginuid_from_pid(object_pid
, &loginuid
);
979 sprintf(o_audit_loginuid
, "OBJECT_AUDIT_LOGINUID="UID_FMT
, loginuid
);
980 IOVEC_SET_STRING(iovec
[n
++], o_audit_loginuid
);
984 r
= cg_pid_get_path_shifted(object_pid
, s
->cgroup_root
, &c
);
986 x
= strjoina("OBJECT_SYSTEMD_CGROUP=", c
);
987 IOVEC_SET_STRING(iovec
[n
++], x
);
989 r
= cg_path_get_session(c
, &t
);
991 x
= strjoina("OBJECT_SYSTEMD_SESSION=", t
);
993 IOVEC_SET_STRING(iovec
[n
++], x
);
996 if (cg_path_get_owner_uid(c
, &owner
) >= 0) {
997 sprintf(o_owner_uid
, "OBJECT_SYSTEMD_OWNER_UID="UID_FMT
, owner
);
998 IOVEC_SET_STRING(iovec
[n
++], o_owner_uid
);
1001 if (cg_path_get_unit(c
, &t
) >= 0) {
1002 x
= strjoina("OBJECT_SYSTEMD_UNIT=", t
);
1004 IOVEC_SET_STRING(iovec
[n
++], x
);
1007 if (cg_path_get_user_unit(c
, &t
) >= 0) {
1008 x
= strjoina("OBJECT_SYSTEMD_USER_UNIT=", t
);
1010 IOVEC_SET_STRING(iovec
[n
++], x
);
1013 if (cg_path_get_slice(c
, &t
) >= 0) {
1014 x
= strjoina("OBJECT_SYSTEMD_SLICE=", t
);
1016 IOVEC_SET_STRING(iovec
[n
++], x
);
1019 if (cg_path_get_user_slice(c
, &t
) >= 0) {
1020 x
= strjoina("OBJECT_SYSTEMD_USER_SLICE=", t
);
1022 IOVEC_SET_STRING(iovec
[n
++], x
);
1031 sprintf(source_time
, "_SOURCE_REALTIME_TIMESTAMP=" USEC_FMT
, timeval_load(tv
));
1032 IOVEC_SET_STRING(iovec
[n
++], source_time
);
1035 /* Note that strictly speaking storing the boot id here is
1036 * redundant since the entry includes this in-line
1037 * anyway. However, we need this indexed, too. */
1038 if (!isempty(s
->boot_id_field
))
1039 IOVEC_SET_STRING(iovec
[n
++], s
->boot_id_field
);
1041 if (!isempty(s
->machine_id_field
))
1042 IOVEC_SET_STRING(iovec
[n
++], s
->machine_id_field
);
1044 if (!isempty(s
->hostname_field
))
1045 IOVEC_SET_STRING(iovec
[n
++], s
->hostname_field
);
1049 if (s
->split_mode
== SPLIT_UID
&& realuid
> 0)
1050 /* Split up strictly by any UID */
1051 journal_uid
= realuid
;
1052 else if (s
->split_mode
== SPLIT_LOGIN
&& realuid
> 0 && owner_valid
&& owner
> 0)
1053 /* Split up by login UIDs. We do this only if the
1054 * realuid is not root, in order not to accidentally
1055 * leak privileged information to the user that is
1056 * logged by a privileged process that is part of an
1057 * unprivileged session. */
1058 journal_uid
= owner
;
1062 write_to_journal(s
, journal_uid
, iovec
, n
, priority
);
1065 void server_driver_message(Server
*s
, sd_id128_t message_id
, const char *format
, ...) {
1066 char mid
[11 + 32 + 1];
1067 struct iovec iovec
[N_IOVEC_META_FIELDS
+ 5 + N_IOVEC_PAYLOAD_FIELDS
];
1071 struct ucred ucred
= {};
1076 assert_cc(3 == LOG_FAC(LOG_DAEMON
));
1077 IOVEC_SET_STRING(iovec
[n
++], "SYSLOG_FACILITY=3");
1078 IOVEC_SET_STRING(iovec
[n
++], "SYSLOG_IDENTIFIER=systemd-journald");
1080 IOVEC_SET_STRING(iovec
[n
++], "_TRANSPORT=driver");
1081 assert_cc(6 == LOG_INFO
);
1082 IOVEC_SET_STRING(iovec
[n
++], "PRIORITY=6");
1084 if (!sd_id128_is_null(message_id
)) {
1085 snprintf(mid
, sizeof(mid
), LOG_MESSAGE_ID(message_id
));
1086 IOVEC_SET_STRING(iovec
[n
++], mid
);
1091 va_start(ap
, format
);
1092 r
= log_format_iovec(iovec
, ELEMENTSOF(iovec
), &n
, false, 0, format
, ap
);
1093 /* Error handling below */
1096 ucred
.pid
= getpid();
1097 ucred
.uid
= getuid();
1098 ucred
.gid
= getgid();
1101 dispatch_message_real(s
, iovec
, n
, ELEMENTSOF(iovec
), &ucred
, NULL
, NULL
, 0, NULL
, LOG_INFO
, 0);
1104 free(iovec
[m
++].iov_base
);
1107 /* We failed to format the message. Emit a warning instead. */
1110 xsprintf(buf
, "MESSAGE=Entry printing failed: %s", strerror(-r
));
1113 IOVEC_SET_STRING(iovec
[n
++], "PRIORITY=4");
1114 IOVEC_SET_STRING(iovec
[n
++], buf
);
1115 dispatch_message_real(s
, iovec
, n
, ELEMENTSOF(iovec
), &ucred
, NULL
, NULL
, 0, NULL
, LOG_INFO
, 0);
1119 void server_dispatch_message(
1121 struct iovec
*iovec
, unsigned n
, unsigned m
,
1122 const struct ucred
*ucred
,
1123 const struct timeval
*tv
,
1124 const char *label
, size_t label_len
,
1125 const char *unit_id
,
1130 _cleanup_free_
char *path
= NULL
;
1131 uint64_t available
= 0;
1135 assert(iovec
|| n
== 0);
1140 if (LOG_PRI(priority
) > s
->max_level_store
)
1143 /* Stop early in case the information will not be stored
1145 if (s
->storage
== STORAGE_NONE
)
1151 r
= cg_pid_get_path_shifted(ucred
->pid
, s
->cgroup_root
, &path
);
1155 /* example: /user/lennart/3/foobar
1156 * /system/dbus.service/foobar
1158 * So let's cut of everything past the third /, since that is
1159 * where user directories start */
1161 c
= strchr(path
, '/');
1163 c
= strchr(c
+1, '/');
1165 c
= strchr(c
+1, '/');
1171 (void) determine_space(s
, &available
, NULL
);
1172 rl
= journal_rate_limit_test(s
->rate_limit
, path
, priority
& LOG_PRIMASK
, available
);
1176 /* Write a suppression message if we suppressed something */
1178 server_driver_message(s
, SD_MESSAGE_JOURNAL_DROPPED
,
1179 LOG_MESSAGE("Suppressed %u messages from %s", rl
- 1, path
),
1183 dispatch_message_real(s
, iovec
, n
, m
, ucred
, tv
, label
, label_len
, unit_id
, priority
, object_pid
);
1186 int server_flush_to_var(Server
*s
) {
1188 sd_journal
*j
= NULL
;
1189 char ts
[FORMAT_TIMESPAN_MAX
];
1196 if (s
->storage
!= STORAGE_AUTO
&&
1197 s
->storage
!= STORAGE_PERSISTENT
)
1200 if (!s
->runtime_journal
)
1203 (void) system_journal_open(s
, true);
1205 if (!s
->system_journal
)
1208 log_debug("Flushing to /var...");
1210 start
= now(CLOCK_MONOTONIC
);
1212 r
= sd_id128_get_machine(&machine
);
1216 r
= sd_journal_open(&j
, SD_JOURNAL_RUNTIME_ONLY
);
1218 return log_error_errno(r
, "Failed to read runtime journal: %m");
1220 sd_journal_set_data_threshold(j
, 0);
1222 SD_JOURNAL_FOREACH(j
) {
1226 f
= j
->current_file
;
1227 assert(f
&& f
->current_offset
> 0);
1231 r
= journal_file_move_to_object(f
, OBJECT_ENTRY
, f
->current_offset
, &o
);
1233 log_error_errno(r
, "Can't read entry: %m");
1237 r
= journal_file_copy_entry(f
, s
->system_journal
, o
, f
->current_offset
, NULL
, NULL
, NULL
);
1241 if (!shall_try_append_again(s
->system_journal
, r
)) {
1242 log_error_errno(r
, "Can't write entry: %m");
1247 server_vacuum(s
, false);
1249 if (!s
->system_journal
) {
1250 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1255 log_debug("Retrying write.");
1256 r
= journal_file_copy_entry(f
, s
->system_journal
, o
, f
->current_offset
, NULL
, NULL
, NULL
);
1258 log_error_errno(r
, "Can't write entry: %m");
1266 journal_file_post_change(s
->system_journal
);
1268 s
->runtime_journal
= journal_file_close(s
->runtime_journal
);
1271 (void) rm_rf("/run/log/journal", REMOVE_ROOT
);
1273 sd_journal_close(j
);
1275 server_driver_message(s
, SD_ID128_NULL
,
1276 LOG_MESSAGE("Time spent on flushing to /var is %s for %u entries.",
1277 format_timespan(ts
, sizeof(ts
), now(CLOCK_MONOTONIC
) - start
, 0),
1284 int server_process_datagram(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1285 Server
*s
= userdata
;
1286 struct ucred
*ucred
= NULL
;
1287 struct timeval
*tv
= NULL
;
1288 struct cmsghdr
*cmsg
;
1290 size_t label_len
= 0, m
;
1293 int *fds
= NULL
, v
= 0;
1297 struct cmsghdr cmsghdr
;
1299 /* We use NAME_MAX space for the SELinux label
1300 * here. The kernel currently enforces no
1301 * limit, but according to suggestions from
1302 * the SELinux people this will change and it
1303 * will probably be identical to NAME_MAX. For
1304 * now we use that, but this should be updated
1305 * one day when the final limit is known. */
1306 uint8_t buf
[CMSG_SPACE(sizeof(struct ucred
)) +
1307 CMSG_SPACE(sizeof(struct timeval
)) +
1308 CMSG_SPACE(sizeof(int)) + /* fd */
1309 CMSG_SPACE(NAME_MAX
)]; /* selinux label */
1312 union sockaddr_union sa
= {};
1314 struct msghdr msghdr
= {
1317 .msg_control
= &control
,
1318 .msg_controllen
= sizeof(control
),
1320 .msg_namelen
= sizeof(sa
),
1324 assert(fd
== s
->native_fd
|| fd
== s
->syslog_fd
|| fd
== s
->audit_fd
);
1326 if (revents
!= EPOLLIN
) {
1327 log_error("Got invalid event from epoll for datagram fd: %"PRIx32
, revents
);
1331 /* Try to get the right size, if we can. (Not all
1332 * sockets support SIOCINQ, hence we just try, but
1333 * don't rely on it. */
1334 (void) ioctl(fd
, SIOCINQ
, &v
);
1336 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1337 m
= PAGE_ALIGN(MAX3((size_t) v
+ 1,
1339 ALIGN(sizeof(struct nlmsghdr
)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH
)) + 1);
1341 if (!GREEDY_REALLOC(s
->buffer
, s
->buffer_size
, m
))
1344 iovec
.iov_base
= s
->buffer
;
1345 iovec
.iov_len
= s
->buffer_size
- 1; /* Leave room for trailing NUL we add later */
1347 n
= recvmsg(fd
, &msghdr
, MSG_DONTWAIT
|MSG_CMSG_CLOEXEC
);
1349 if (errno
== EINTR
|| errno
== EAGAIN
)
1352 return log_error_errno(errno
, "recvmsg() failed: %m");
1355 CMSG_FOREACH(cmsg
, &msghdr
) {
1357 if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1358 cmsg
->cmsg_type
== SCM_CREDENTIALS
&&
1359 cmsg
->cmsg_len
== CMSG_LEN(sizeof(struct ucred
)))
1360 ucred
= (struct ucred
*) CMSG_DATA(cmsg
);
1361 else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1362 cmsg
->cmsg_type
== SCM_SECURITY
) {
1363 label
= (char*) CMSG_DATA(cmsg
);
1364 label_len
= cmsg
->cmsg_len
- CMSG_LEN(0);
1365 } else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1366 cmsg
->cmsg_type
== SO_TIMESTAMP
&&
1367 cmsg
->cmsg_len
== CMSG_LEN(sizeof(struct timeval
)))
1368 tv
= (struct timeval
*) CMSG_DATA(cmsg
);
1369 else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1370 cmsg
->cmsg_type
== SCM_RIGHTS
) {
1371 fds
= (int*) CMSG_DATA(cmsg
);
1372 n_fds
= (cmsg
->cmsg_len
- CMSG_LEN(0)) / sizeof(int);
1376 /* And a trailing NUL, just in case */
1379 if (fd
== s
->syslog_fd
) {
1380 if (n
> 0 && n_fds
== 0)
1381 server_process_syslog_message(s
, strstrip(s
->buffer
), ucred
, tv
, label
, label_len
);
1383 log_warning("Got file descriptors via syslog socket. Ignoring.");
1385 } else if (fd
== s
->native_fd
) {
1386 if (n
> 0 && n_fds
== 0)
1387 server_process_native_message(s
, s
->buffer
, n
, ucred
, tv
, label
, label_len
);
1388 else if (n
== 0 && n_fds
== 1)
1389 server_process_native_file(s
, fds
[0], ucred
, tv
, label
, label_len
);
1391 log_warning("Got too many file descriptors via native socket. Ignoring.");
1394 assert(fd
== s
->audit_fd
);
1396 if (n
> 0 && n_fds
== 0)
1397 server_process_audit_message(s
, s
->buffer
, n
, ucred
, &sa
, msghdr
.msg_namelen
);
1399 log_warning("Got file descriptors via audit socket. Ignoring.");
1402 close_many(fds
, n_fds
);
1406 static int dispatch_sigusr1(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1407 Server
*s
= userdata
;
1412 log_info("Received request to flush runtime journal from PID " PID_FMT
, si
->ssi_pid
);
1414 (void) server_flush_to_var(s
);
1416 server_vacuum(s
, false);
1418 r
= touch("/run/systemd/journal/flushed");
1420 log_warning_errno(r
, "Failed to touch /run/systemd/journal/flushed, ignoring: %m");
1422 server_space_usage_message(s
, NULL
);
1426 static int dispatch_sigusr2(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1427 Server
*s
= userdata
;
1432 log_info("Received request to rotate journal from PID " PID_FMT
, si
->ssi_pid
);
1434 server_vacuum(s
, true);
1436 if (s
->system_journal
)
1437 patch_min_use(&s
->system_storage
);
1438 if (s
->runtime_journal
)
1439 patch_min_use(&s
->runtime_storage
);
1441 /* Let clients know when the most recent rotation happened. */
1442 r
= write_timestamp_file_atomic("/run/systemd/journal/rotated", now(CLOCK_MONOTONIC
));
1444 log_warning_errno(r
, "Failed to write /run/systemd/journal/rotated, ignoring: %m");
1449 static int dispatch_sigterm(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1450 Server
*s
= userdata
;
1454 log_received_signal(LOG_INFO
, si
);
1456 sd_event_exit(s
->event
, 0);
1460 static int dispatch_sigrtmin1(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1461 Server
*s
= userdata
;
1466 log_debug("Received request to sync from PID " PID_FMT
, si
->ssi_pid
);
1470 /* Let clients know when the most recent sync happened. */
1471 r
= write_timestamp_file_atomic("/run/systemd/journal/synced", now(CLOCK_MONOTONIC
));
1473 log_warning_errno(r
, "Failed to write /run/systemd/journal/synced, ignoring: %m");
1478 static int setup_signals(Server
*s
) {
1483 assert_se(sigprocmask_many(SIG_SETMASK
, NULL
, SIGINT
, SIGTERM
, SIGUSR1
, SIGUSR2
, SIGRTMIN
+1, -1) >= 0);
1485 r
= sd_event_add_signal(s
->event
, &s
->sigusr1_event_source
, SIGUSR1
, dispatch_sigusr1
, s
);
1489 r
= sd_event_add_signal(s
->event
, &s
->sigusr2_event_source
, SIGUSR2
, dispatch_sigusr2
, s
);
1493 r
= sd_event_add_signal(s
->event
, &s
->sigterm_event_source
, SIGTERM
, dispatch_sigterm
, s
);
1497 /* Let's process SIGTERM late, so that we flush all queued
1498 * messages to disk before we exit */
1499 r
= sd_event_source_set_priority(s
->sigterm_event_source
, SD_EVENT_PRIORITY_NORMAL
+20);
1503 /* When journald is invoked on the terminal (when debugging),
1504 * it's useful if C-c is handled equivalent to SIGTERM. */
1505 r
= sd_event_add_signal(s
->event
, &s
->sigint_event_source
, SIGINT
, dispatch_sigterm
, s
);
1509 r
= sd_event_source_set_priority(s
->sigint_event_source
, SD_EVENT_PRIORITY_NORMAL
+20);
1513 /* SIGRTMIN+1 causes an immediate sync. We process this very
1514 * late, so that everything else queued at this point is
1515 * really written to disk. Clients can watch
1516 * /run/systemd/journal/synced with inotify until its mtime
1517 * changes to see when a sync happened. */
1518 r
= sd_event_add_signal(s
->event
, &s
->sigrtmin1_event_source
, SIGRTMIN
+1, dispatch_sigrtmin1
, s
);
1522 r
= sd_event_source_set_priority(s
->sigrtmin1_event_source
, SD_EVENT_PRIORITY_NORMAL
+15);
1529 static int parse_proc_cmdline_item(const char *key
, const char *value
, void *data
) {
1535 if (streq(key
, "systemd.journald.forward_to_syslog")) {
1536 r
= value
? parse_boolean(value
) : true;
1538 log_warning("Failed to parse forward to syslog switch \"%s\". Ignoring.", value
);
1540 s
->forward_to_syslog
= r
;
1541 } else if (streq(key
, "systemd.journald.forward_to_kmsg")) {
1542 r
= value
? parse_boolean(value
) : true;
1544 log_warning("Failed to parse forward to kmsg switch \"%s\". Ignoring.", value
);
1546 s
->forward_to_kmsg
= r
;
1547 } else if (streq(key
, "systemd.journald.forward_to_console")) {
1548 r
= value
? parse_boolean(value
) : true;
1550 log_warning("Failed to parse forward to console switch \"%s\". Ignoring.", value
);
1552 s
->forward_to_console
= r
;
1553 } else if (streq(key
, "systemd.journald.forward_to_wall")) {
1554 r
= value
? parse_boolean(value
) : true;
1556 log_warning("Failed to parse forward to wall switch \"%s\". Ignoring.", value
);
1558 s
->forward_to_wall
= r
;
1559 } else if (streq(key
, "systemd.journald.max_level_console") && value
) {
1560 r
= log_level_from_string(value
);
1562 log_warning("Failed to parse max level console value \"%s\". Ignoring.", value
);
1564 s
->max_level_console
= r
;
1565 } else if (streq(key
, "systemd.journald.max_level_store") && value
) {
1566 r
= log_level_from_string(value
);
1568 log_warning("Failed to parse max level store value \"%s\". Ignoring.", value
);
1570 s
->max_level_store
= r
;
1571 } else if (streq(key
, "systemd.journald.max_level_syslog") && value
) {
1572 r
= log_level_from_string(value
);
1574 log_warning("Failed to parse max level syslog value \"%s\". Ignoring.", value
);
1576 s
->max_level_syslog
= r
;
1577 } else if (streq(key
, "systemd.journald.max_level_kmsg") && value
) {
1578 r
= log_level_from_string(value
);
1580 log_warning("Failed to parse max level kmsg value \"%s\". Ignoring.", value
);
1582 s
->max_level_kmsg
= r
;
1583 } else if (streq(key
, "systemd.journald.max_level_wall") && value
) {
1584 r
= log_level_from_string(value
);
1586 log_warning("Failed to parse max level wall value \"%s\". Ignoring.", value
);
1588 s
->max_level_wall
= r
;
1589 } else if (startswith(key
, "systemd.journald"))
1590 log_warning("Unknown journald kernel command line option \"%s\". Ignoring.", key
);
1592 /* do not warn about state here, since probably systemd already did */
1596 static int server_parse_config_file(Server
*s
) {
1599 return config_parse_many_nulstr(PKGSYSCONFDIR
"/journald.conf",
1600 CONF_PATHS_NULSTR("systemd/journald.conf.d"),
1602 config_item_perf_lookup
, journald_gperf_lookup
,
1606 static int server_dispatch_sync(sd_event_source
*es
, usec_t t
, void *userdata
) {
1607 Server
*s
= userdata
;
1615 int server_schedule_sync(Server
*s
, int priority
) {
1620 if (priority
<= LOG_CRIT
) {
1621 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1626 if (s
->sync_scheduled
)
1629 if (s
->sync_interval_usec
> 0) {
1632 r
= sd_event_now(s
->event
, CLOCK_MONOTONIC
, &when
);
1636 when
+= s
->sync_interval_usec
;
1638 if (!s
->sync_event_source
) {
1639 r
= sd_event_add_time(
1641 &s
->sync_event_source
,
1644 server_dispatch_sync
, s
);
1648 r
= sd_event_source_set_priority(s
->sync_event_source
, SD_EVENT_PRIORITY_IMPORTANT
);
1650 r
= sd_event_source_set_time(s
->sync_event_source
, when
);
1654 r
= sd_event_source_set_enabled(s
->sync_event_source
, SD_EVENT_ONESHOT
);
1659 s
->sync_scheduled
= true;
1665 static int dispatch_hostname_change(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1666 Server
*s
= userdata
;
1670 server_cache_hostname(s
);
1674 static int server_open_hostname(Server
*s
) {
1679 s
->hostname_fd
= open("/proc/sys/kernel/hostname", O_RDONLY
|O_CLOEXEC
|O_NDELAY
|O_NOCTTY
);
1680 if (s
->hostname_fd
< 0)
1681 return log_error_errno(errno
, "Failed to open /proc/sys/kernel/hostname: %m");
1683 r
= sd_event_add_io(s
->event
, &s
->hostname_event_source
, s
->hostname_fd
, 0, dispatch_hostname_change
, s
);
1685 /* kernels prior to 3.2 don't support polling this file. Ignore
1688 log_warning_errno(r
, "Failed to register hostname fd in event loop, ignoring: %m");
1689 s
->hostname_fd
= safe_close(s
->hostname_fd
);
1693 return log_error_errno(r
, "Failed to register hostname fd in event loop: %m");
1696 r
= sd_event_source_set_priority(s
->hostname_event_source
, SD_EVENT_PRIORITY_IMPORTANT
-10);
1698 return log_error_errno(r
, "Failed to adjust priority of host name event source: %m");
1703 static int dispatch_notify_event(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1704 Server
*s
= userdata
;
1708 assert(s
->notify_event_source
== es
);
1709 assert(s
->notify_fd
== fd
);
1711 /* The $NOTIFY_SOCKET is writable again, now send exactly one
1712 * message on it. Either it's the watchdog event, the initial
1713 * READY=1 event or an stdout stream event. If there's nothing
1714 * to write anymore, turn our event source off. The next time
1715 * there's something to send it will be turned on again. */
1717 if (!s
->sent_notify_ready
) {
1718 static const char p
[] =
1720 "STATUS=Processing requests...";
1723 l
= send(s
->notify_fd
, p
, strlen(p
), MSG_DONTWAIT
);
1725 if (errno
== EAGAIN
)
1728 return log_error_errno(errno
, "Failed to send READY=1 notification message: %m");
1731 s
->sent_notify_ready
= true;
1732 log_debug("Sent READY=1 notification.");
1734 } else if (s
->send_watchdog
) {
1736 static const char p
[] =
1741 l
= send(s
->notify_fd
, p
, strlen(p
), MSG_DONTWAIT
);
1743 if (errno
== EAGAIN
)
1746 return log_error_errno(errno
, "Failed to send WATCHDOG=1 notification message: %m");
1749 s
->send_watchdog
= false;
1750 log_debug("Sent WATCHDOG=1 notification.");
1752 } else if (s
->stdout_streams_notify_queue
)
1753 /* Dispatch one stream notification event */
1754 stdout_stream_send_notify(s
->stdout_streams_notify_queue
);
1756 /* Leave us enabled if there's still more to do. */
1757 if (s
->send_watchdog
|| s
->stdout_streams_notify_queue
)
1760 /* There was nothing to do anymore, let's turn ourselves off. */
1761 r
= sd_event_source_set_enabled(es
, SD_EVENT_OFF
);
1763 return log_error_errno(r
, "Failed to turn off notify event source: %m");
1768 static int dispatch_watchdog(sd_event_source
*es
, uint64_t usec
, void *userdata
) {
1769 Server
*s
= userdata
;
1774 s
->send_watchdog
= true;
1776 r
= sd_event_source_set_enabled(s
->notify_event_source
, SD_EVENT_ON
);
1778 log_warning_errno(r
, "Failed to turn on notify event source: %m");
1780 r
= sd_event_source_set_time(s
->watchdog_event_source
, usec
+ s
->watchdog_usec
/ 2);
1782 return log_error_errno(r
, "Failed to restart watchdog event source: %m");
1784 r
= sd_event_source_set_enabled(s
->watchdog_event_source
, SD_EVENT_ON
);
1786 return log_error_errno(r
, "Failed to enable watchdog event source: %m");
1791 static int server_connect_notify(Server
*s
) {
1792 union sockaddr_union sa
= {
1793 .un
.sun_family
= AF_UNIX
,
1799 assert(s
->notify_fd
< 0);
1800 assert(!s
->notify_event_source
);
1803 So here's the problem: we'd like to send notification
1804 messages to PID 1, but we cannot do that via sd_notify(),
1805 since that's synchronous, and we might end up blocking on
1806 it. Specifically: given that PID 1 might block on
1807 dbus-daemon during IPC, and dbus-daemon is logging to us,
1808 and might hence block on us, we might end up in a deadlock
1809 if we block on sending PID 1 notification messages — by
1810 generating a full blocking circle. To avoid this, let's
1811 create a non-blocking socket, and connect it to the
1812 notification socket, and then wait for POLLOUT before we
1813 send anything. This should efficiently avoid any deadlocks,
1814 as we'll never block on PID 1, hence PID 1 can safely block
1815 on dbus-daemon which can safely block on us again.
1817 Don't think that this issue is real? It is, see:
1818 https://github.com/systemd/systemd/issues/1505
1821 e
= getenv("NOTIFY_SOCKET");
1825 if ((e
[0] != '@' && e
[0] != '/') || e
[1] == 0) {
1826 log_error("NOTIFY_SOCKET set to an invalid value: %s", e
);
1830 if (strlen(e
) > sizeof(sa
.un
.sun_path
)) {
1831 log_error("NOTIFY_SOCKET path too long: %s", e
);
1835 s
->notify_fd
= socket(AF_UNIX
, SOCK_DGRAM
|SOCK_CLOEXEC
|SOCK_NONBLOCK
, 0);
1836 if (s
->notify_fd
< 0)
1837 return log_error_errno(errno
, "Failed to create notify socket: %m");
1839 (void) fd_inc_sndbuf(s
->notify_fd
, NOTIFY_SNDBUF_SIZE
);
1841 strncpy(sa
.un
.sun_path
, e
, sizeof(sa
.un
.sun_path
));
1842 if (sa
.un
.sun_path
[0] == '@')
1843 sa
.un
.sun_path
[0] = 0;
1845 r
= connect(s
->notify_fd
, &sa
.sa
, SOCKADDR_UN_LEN(sa
.un
));
1847 return log_error_errno(errno
, "Failed to connect to notify socket: %m");
1849 r
= sd_event_add_io(s
->event
, &s
->notify_event_source
, s
->notify_fd
, EPOLLOUT
, dispatch_notify_event
, s
);
1851 return log_error_errno(r
, "Failed to watch notification socket: %m");
1853 if (sd_watchdog_enabled(false, &s
->watchdog_usec
) > 0) {
1854 s
->send_watchdog
= true;
1856 r
= sd_event_add_time(s
->event
, &s
->watchdog_event_source
, CLOCK_MONOTONIC
, now(CLOCK_MONOTONIC
) + s
->watchdog_usec
/2, s
->watchdog_usec
/4, dispatch_watchdog
, s
);
1858 return log_error_errno(r
, "Failed to add watchdog time event: %m");
1861 /* This should fire pretty soon, which we'll use to send the
1867 int server_init(Server
*s
) {
1868 _cleanup_fdset_free_ FDSet
*fds
= NULL
;
1875 s
->syslog_fd
= s
->native_fd
= s
->stdout_fd
= s
->dev_kmsg_fd
= s
->audit_fd
= s
->hostname_fd
= s
->notify_fd
= -1;
1879 s
->watchdog_usec
= USEC_INFINITY
;
1881 s
->sync_interval_usec
= DEFAULT_SYNC_INTERVAL_USEC
;
1882 s
->sync_scheduled
= false;
1884 s
->rate_limit_interval
= DEFAULT_RATE_LIMIT_INTERVAL
;
1885 s
->rate_limit_burst
= DEFAULT_RATE_LIMIT_BURST
;
1887 s
->forward_to_wall
= true;
1889 s
->max_file_usec
= DEFAULT_MAX_FILE_USEC
;
1891 s
->max_level_store
= LOG_DEBUG
;
1892 s
->max_level_syslog
= LOG_DEBUG
;
1893 s
->max_level_kmsg
= LOG_NOTICE
;
1894 s
->max_level_console
= LOG_INFO
;
1895 s
->max_level_wall
= LOG_EMERG
;
1897 journal_reset_metrics(&s
->system_storage
.metrics
);
1898 journal_reset_metrics(&s
->runtime_storage
.metrics
);
1900 server_parse_config_file(s
);
1901 parse_proc_cmdline(parse_proc_cmdline_item
, s
, true);
1903 if (!!s
->rate_limit_interval
^ !!s
->rate_limit_burst
) {
1904 log_debug("Setting both rate limit interval and burst from "USEC_FMT
",%u to 0,0",
1905 s
->rate_limit_interval
, s
->rate_limit_burst
);
1906 s
->rate_limit_interval
= s
->rate_limit_burst
= 0;
1909 (void) mkdir_p("/run/systemd/journal", 0755);
1911 s
->user_journals
= ordered_hashmap_new(NULL
);
1912 if (!s
->user_journals
)
1915 s
->mmap
= mmap_cache_new();
1919 s
->deferred_closes
= set_new(NULL
);
1920 if (!s
->deferred_closes
)
1923 r
= sd_event_default(&s
->event
);
1925 return log_error_errno(r
, "Failed to create event loop: %m");
1927 n
= sd_listen_fds(true);
1929 return log_error_errno(n
, "Failed to read listening file descriptors from environment: %m");
1931 for (fd
= SD_LISTEN_FDS_START
; fd
< SD_LISTEN_FDS_START
+ n
; fd
++) {
1933 if (sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/run/systemd/journal/socket", 0) > 0) {
1935 if (s
->native_fd
>= 0) {
1936 log_error("Too many native sockets passed.");
1942 } else if (sd_is_socket_unix(fd
, SOCK_STREAM
, 1, "/run/systemd/journal/stdout", 0) > 0) {
1944 if (s
->stdout_fd
>= 0) {
1945 log_error("Too many stdout sockets passed.");
1951 } else if (sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/dev/log", 0) > 0 ||
1952 sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/run/systemd/journal/dev-log", 0) > 0) {
1954 if (s
->syslog_fd
>= 0) {
1955 log_error("Too many /dev/log sockets passed.");
1961 } else if (sd_is_socket(fd
, AF_NETLINK
, SOCK_RAW
, -1) > 0) {
1963 if (s
->audit_fd
>= 0) {
1964 log_error("Too many audit sockets passed.");
1978 r
= fdset_put(fds
, fd
);
1984 /* Try to restore streams, but don't bother if this fails */
1985 (void) server_restore_streams(s
, fds
);
1987 if (fdset_size(fds
) > 0) {
1988 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds
));
1989 fds
= fdset_free(fds
);
1992 no_sockets
= s
->native_fd
< 0 && s
->stdout_fd
< 0 && s
->syslog_fd
< 0 && s
->audit_fd
< 0;
1994 /* always open stdout, syslog, native, and kmsg sockets */
1996 /* systemd-journald.socket: /run/systemd/journal/stdout */
1997 r
= server_open_stdout_socket(s
);
2001 /* systemd-journald-dev-log.socket: /run/systemd/journal/dev-log */
2002 r
= server_open_syslog_socket(s
);
2006 /* systemd-journald.socket: /run/systemd/journal/socket */
2007 r
= server_open_native_socket(s
);
2012 r
= server_open_dev_kmsg(s
);
2016 /* Unless we got *some* sockets and not audit, open audit socket */
2017 if (s
->audit_fd
>= 0 || no_sockets
) {
2018 r
= server_open_audit(s
);
2023 r
= server_open_kernel_seqnum(s
);
2027 r
= server_open_hostname(s
);
2031 r
= setup_signals(s
);
2035 s
->udev
= udev_new();
2039 s
->rate_limit
= journal_rate_limit_new(s
->rate_limit_interval
, s
->rate_limit_burst
);
2043 r
= cg_get_root_path(&s
->cgroup_root
);
2047 server_cache_hostname(s
);
2048 server_cache_boot_id(s
);
2049 server_cache_machine_id(s
);
2051 s
->runtime_storage
.name
= "Runtime journal";
2052 s
->system_storage
.name
= "System journal";
2054 s
->runtime_storage
.path
= strjoin("/run/log/journal/", SERVER_MACHINE_ID(s
));
2055 s
->system_storage
.path
= strjoin("/var/log/journal/", SERVER_MACHINE_ID(s
));
2056 if (!s
->runtime_storage
.path
|| !s
->system_storage
.path
)
2059 (void) server_connect_notify(s
);
2061 return system_journal_open(s
, false);
2064 void server_maybe_append_tags(Server
*s
) {
2070 n
= now(CLOCK_REALTIME
);
2072 if (s
->system_journal
)
2073 journal_file_maybe_append_tag(s
->system_journal
, n
);
2075 ORDERED_HASHMAP_FOREACH(f
, s
->user_journals
, i
)
2076 journal_file_maybe_append_tag(f
, n
);
2080 void server_done(Server
*s
) {
2084 if (s
->deferred_closes
) {
2085 journal_file_close_set(s
->deferred_closes
);
2086 set_free(s
->deferred_closes
);
2089 while (s
->stdout_streams
)
2090 stdout_stream_free(s
->stdout_streams
);
2092 if (s
->system_journal
)
2093 (void) journal_file_close(s
->system_journal
);
2095 if (s
->runtime_journal
)
2096 (void) journal_file_close(s
->runtime_journal
);
2098 while ((f
= ordered_hashmap_steal_first(s
->user_journals
)))
2099 (void) journal_file_close(f
);
2101 ordered_hashmap_free(s
->user_journals
);
2103 sd_event_source_unref(s
->syslog_event_source
);
2104 sd_event_source_unref(s
->native_event_source
);
2105 sd_event_source_unref(s
->stdout_event_source
);
2106 sd_event_source_unref(s
->dev_kmsg_event_source
);
2107 sd_event_source_unref(s
->audit_event_source
);
2108 sd_event_source_unref(s
->sync_event_source
);
2109 sd_event_source_unref(s
->sigusr1_event_source
);
2110 sd_event_source_unref(s
->sigusr2_event_source
);
2111 sd_event_source_unref(s
->sigterm_event_source
);
2112 sd_event_source_unref(s
->sigint_event_source
);
2113 sd_event_source_unref(s
->sigrtmin1_event_source
);
2114 sd_event_source_unref(s
->hostname_event_source
);
2115 sd_event_source_unref(s
->notify_event_source
);
2116 sd_event_source_unref(s
->watchdog_event_source
);
2117 sd_event_unref(s
->event
);
2119 safe_close(s
->syslog_fd
);
2120 safe_close(s
->native_fd
);
2121 safe_close(s
->stdout_fd
);
2122 safe_close(s
->dev_kmsg_fd
);
2123 safe_close(s
->audit_fd
);
2124 safe_close(s
->hostname_fd
);
2125 safe_close(s
->notify_fd
);
2128 journal_rate_limit_free(s
->rate_limit
);
2130 if (s
->kernel_seqnum
)
2131 munmap(s
->kernel_seqnum
, sizeof(uint64_t));
2135 free(s
->cgroup_root
);
2136 free(s
->hostname_field
);
2139 mmap_cache_unref(s
->mmap
);
2141 udev_unref(s
->udev
);
2144 static const char* const storage_table
[_STORAGE_MAX
] = {
2145 [STORAGE_AUTO
] = "auto",
2146 [STORAGE_VOLATILE
] = "volatile",
2147 [STORAGE_PERSISTENT
] = "persistent",
2148 [STORAGE_NONE
] = "none"
2151 DEFINE_STRING_TABLE_LOOKUP(storage
, Storage
);
2152 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage
, storage
, Storage
, "Failed to parse storage setting");
2154 static const char* const split_mode_table
[_SPLIT_MAX
] = {
2155 [SPLIT_LOGIN
] = "login",
2156 [SPLIT_UID
] = "uid",
2157 [SPLIT_NONE
] = "none",
2160 DEFINE_STRING_TABLE_LOOKUP(split_mode
, SplitMode
);
2161 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode
, split_mode
, SplitMode
, "Failed to parse split mode setting");