1 /* SPDX-License-Identifier: LGPL-2.1+ */
3 This file is part of systemd.
5 Copyright 2011 Lennart Poettering
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <selinux/selinux.h>
24 #include <sys/ioctl.h>
26 #include <sys/signalfd.h>
27 #include <sys/statvfs.h>
28 #include <linux/sockios.h>
31 #include "sd-daemon.h"
32 #include "sd-journal.h"
33 #include "sd-messages.h"
36 #include "alloc-util.h"
37 #include "audit-util.h"
38 #include "cgroup-util.h"
39 #include "conf-parser.h"
40 #include "dirent-util.h"
41 #include "extract-word.h"
44 #include "format-util.h"
47 #include "hostname-util.h"
48 #include "id128-util.h"
50 #include "journal-authenticate.h"
51 #include "journal-file.h"
52 #include "journal-internal.h"
53 #include "journal-vacuum.h"
54 #include "journald-audit.h"
55 #include "journald-context.h"
56 #include "journald-kmsg.h"
57 #include "journald-native.h"
58 #include "journald-rate-limit.h"
59 #include "journald-server.h"
60 #include "journald-stream.h"
61 #include "journald-syslog.h"
65 #include "parse-util.h"
66 #include "proc-cmdline.h"
67 #include "process-util.h"
69 #include "selinux-util.h"
70 #include "signal-util.h"
71 #include "socket-util.h"
72 #include "stdio-util.h"
73 #include "string-table.h"
74 #include "string-util.h"
75 #include "syslog-util.h"
76 #include "user-util.h"
78 #define USER_JOURNALS_MAX 1024
80 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
81 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
82 #define DEFAULT_RATE_LIMIT_BURST 1000
83 #define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
85 #define RECHECK_SPACE_USEC (30*USEC_PER_SEC)
87 #define NOTIFY_SNDBUF_SIZE (8*1024*1024)
89 /* The period to insert between posting changes for coalescing */
90 #define POST_CHANGE_TIMER_INTERVAL_USEC (250*USEC_PER_MSEC)
92 /* Pick a good default that is likely to fit into AF_UNIX and AF_INET SOCK_DGRAM datagrams, and even leaves some room
93 * for a bit of additional metadata. */
94 #define DEFAULT_LINE_MAX (48*1024)
96 static int determine_path_usage(Server
*s
, const char *path
, uint64_t *ret_used
, uint64_t *ret_free
) {
97 _cleanup_closedir_
DIR *d
= NULL
;
106 return log_full_errno(errno
== ENOENT
? LOG_DEBUG
: LOG_ERR
,
107 errno
, "Failed to open %s: %m", path
);
109 if (fstatvfs(dirfd(d
), &ss
) < 0)
110 return log_error_errno(errno
, "Failed to fstatvfs(%s): %m", path
);
112 *ret_free
= ss
.f_bsize
* ss
.f_bavail
;
114 FOREACH_DIRENT_ALL(de
, d
, break) {
117 if (!endswith(de
->d_name
, ".journal") &&
118 !endswith(de
->d_name
, ".journal~"))
121 if (fstatat(dirfd(d
), de
->d_name
, &st
, AT_SYMLINK_NOFOLLOW
) < 0) {
122 log_debug_errno(errno
, "Failed to stat %s/%s, ignoring: %m", path
, de
->d_name
);
126 if (!S_ISREG(st
.st_mode
))
129 *ret_used
+= (uint64_t) st
.st_blocks
* 512UL;
135 static void cache_space_invalidate(JournalStorageSpace
*space
) {
136 memset(space
, 0, sizeof(*space
));
139 static int cache_space_refresh(Server
*s
, JournalStorage
*storage
) {
140 JournalStorageSpace
*space
;
141 JournalMetrics
*metrics
;
142 uint64_t vfs_used
, vfs_avail
, avail
;
148 metrics
= &storage
->metrics
;
149 space
= &storage
->space
;
151 ts
= now(CLOCK_MONOTONIC
);
153 if (space
->timestamp
!= 0 && space
->timestamp
+ RECHECK_SPACE_USEC
> ts
)
156 r
= determine_path_usage(s
, storage
->path
, &vfs_used
, &vfs_avail
);
160 space
->vfs_used
= vfs_used
;
161 space
->vfs_available
= vfs_avail
;
163 avail
= LESS_BY(vfs_avail
, metrics
->keep_free
);
165 space
->limit
= MIN(MAX(vfs_used
+ avail
, metrics
->min_use
), metrics
->max_use
);
166 space
->available
= LESS_BY(space
->limit
, vfs_used
);
167 space
->timestamp
= ts
;
171 static void patch_min_use(JournalStorage
*storage
) {
174 /* Let's bump the min_use limit to the current usage on disk. We do
175 * this when starting up and first opening the journal files. This way
176 * sudden spikes in disk usage will not cause journald to vacuum files
177 * without bounds. Note that this means that only a restart of journald
178 * will make it reset this value. */
180 storage
->metrics
.min_use
= MAX(storage
->metrics
.min_use
, storage
->space
.vfs_used
);
184 static int determine_space(Server
*s
, uint64_t *available
, uint64_t *limit
) {
190 js
= s
->system_journal
? &s
->system_storage
: &s
->runtime_storage
;
192 r
= cache_space_refresh(s
, js
);
195 *available
= js
->space
.available
;
197 *limit
= js
->space
.limit
;
202 void server_space_usage_message(Server
*s
, JournalStorage
*storage
) {
203 char fb1
[FORMAT_BYTES_MAX
], fb2
[FORMAT_BYTES_MAX
], fb3
[FORMAT_BYTES_MAX
],
204 fb4
[FORMAT_BYTES_MAX
], fb5
[FORMAT_BYTES_MAX
], fb6
[FORMAT_BYTES_MAX
];
205 JournalMetrics
*metrics
;
210 storage
= s
->system_journal
? &s
->system_storage
: &s
->runtime_storage
;
212 if (cache_space_refresh(s
, storage
) < 0)
215 metrics
= &storage
->metrics
;
216 format_bytes(fb1
, sizeof(fb1
), storage
->space
.vfs_used
);
217 format_bytes(fb2
, sizeof(fb2
), metrics
->max_use
);
218 format_bytes(fb3
, sizeof(fb3
), metrics
->keep_free
);
219 format_bytes(fb4
, sizeof(fb4
), storage
->space
.vfs_available
);
220 format_bytes(fb5
, sizeof(fb5
), storage
->space
.limit
);
221 format_bytes(fb6
, sizeof(fb6
), storage
->space
.available
);
223 server_driver_message(s
, 0,
224 "MESSAGE_ID=" SD_MESSAGE_JOURNAL_USAGE_STR
,
225 LOG_MESSAGE("%s (%s) is %s, max %s, %s free.",
226 storage
->name
, storage
->path
, fb1
, fb5
, fb6
),
227 "JOURNAL_NAME=%s", storage
->name
,
228 "JOURNAL_PATH=%s", storage
->path
,
229 "CURRENT_USE=%"PRIu64
, storage
->space
.vfs_used
,
230 "CURRENT_USE_PRETTY=%s", fb1
,
231 "MAX_USE=%"PRIu64
, metrics
->max_use
,
232 "MAX_USE_PRETTY=%s", fb2
,
233 "DISK_KEEP_FREE=%"PRIu64
, metrics
->keep_free
,
234 "DISK_KEEP_FREE_PRETTY=%s", fb3
,
235 "DISK_AVAILABLE=%"PRIu64
, storage
->space
.vfs_available
,
236 "DISK_AVAILABLE_PRETTY=%s", fb4
,
237 "LIMIT=%"PRIu64
, storage
->space
.limit
,
238 "LIMIT_PRETTY=%s", fb5
,
239 "AVAILABLE=%"PRIu64
, storage
->space
.available
,
240 "AVAILABLE_PRETTY=%s", fb6
,
244 static void server_add_acls(JournalFile
*f
, uid_t uid
) {
251 if (uid
<= SYSTEM_UID_MAX
)
254 r
= add_acls_for_user(f
->fd
, uid
);
256 log_warning_errno(r
, "Failed to set ACL on %s, ignoring: %m", f
->path
);
260 static int open_journal(
266 JournalMetrics
*metrics
,
276 r
= journal_file_open_reliably(fname
, flags
, 0640, s
->compress
, seal
, metrics
, s
->mmap
, s
->deferred_closes
, NULL
, &f
);
278 r
= journal_file_open(-1, fname
, flags
, 0640, s
->compress
, seal
, metrics
, s
->mmap
, s
->deferred_closes
, NULL
, &f
);
282 r
= journal_file_enable_post_change_timer(f
, s
->event
, POST_CHANGE_TIMER_INTERVAL_USEC
);
284 (void) journal_file_close(f
);
292 static bool flushed_flag_is_set(void) {
293 return access("/run/systemd/journal/flushed", F_OK
) >= 0;
296 static int system_journal_open(Server
*s
, bool flush_requested
) {
300 if (!s
->system_journal
&&
301 IN_SET(s
->storage
, STORAGE_PERSISTENT
, STORAGE_AUTO
) &&
302 (flush_requested
|| flushed_flag_is_set())) {
304 /* If in auto mode: first try to create the machine
305 * path, but not the prefix.
307 * If in persistent mode: create /var/log/journal and
308 * the machine path */
310 if (s
->storage
== STORAGE_PERSISTENT
)
311 (void) mkdir_p("/var/log/journal/", 0755);
313 (void) mkdir(s
->system_storage
.path
, 0755);
315 fn
= strjoina(s
->system_storage
.path
, "/system.journal");
316 r
= open_journal(s
, true, fn
, O_RDWR
|O_CREAT
, s
->seal
, &s
->system_storage
.metrics
, &s
->system_journal
);
318 server_add_acls(s
->system_journal
, 0);
319 (void) cache_space_refresh(s
, &s
->system_storage
);
320 patch_min_use(&s
->system_storage
);
322 if (!IN_SET(r
, -ENOENT
, -EROFS
))
323 log_warning_errno(r
, "Failed to open system journal: %m");
328 /* If the runtime journal is open, and we're post-flush, we're
329 * recovering from a failed system journal rotate (ENOSPC)
330 * for which the runtime journal was reopened.
332 * Perform an implicit flush to var, leaving the runtime
333 * journal closed, now that the system journal is back.
335 if (!flush_requested
)
336 (void) server_flush_to_var(s
, true);
339 if (!s
->runtime_journal
&&
340 (s
->storage
!= STORAGE_NONE
)) {
342 fn
= strjoina(s
->runtime_storage
.path
, "/system.journal");
344 if (s
->system_journal
) {
346 /* Try to open the runtime journal, but only
347 * if it already exists, so that we can flush
348 * it into the system journal */
350 r
= open_journal(s
, false, fn
, O_RDWR
, false, &s
->runtime_storage
.metrics
, &s
->runtime_journal
);
353 log_warning_errno(r
, "Failed to open runtime journal: %m");
360 /* OK, we really need the runtime journal, so create
361 * it if necessary. */
363 (void) mkdir("/run/log", 0755);
364 (void) mkdir("/run/log/journal", 0755);
365 (void) mkdir_parents(fn
, 0750);
367 r
= open_journal(s
, true, fn
, O_RDWR
|O_CREAT
, false, &s
->runtime_storage
.metrics
, &s
->runtime_journal
);
369 return log_error_errno(r
, "Failed to open runtime journal: %m");
372 if (s
->runtime_journal
) {
373 server_add_acls(s
->runtime_journal
, 0);
374 (void) cache_space_refresh(s
, &s
->runtime_storage
);
375 patch_min_use(&s
->runtime_storage
);
382 static JournalFile
* find_journal(Server
*s
, uid_t uid
) {
383 _cleanup_free_
char *p
= NULL
;
390 /* A rotate that fails to create the new journal (ENOSPC) leaves the
391 * rotated journal as NULL. Unless we revisit opening, even after
392 * space is made available we'll continue to return NULL indefinitely.
394 * system_journal_open() is a noop if the journals are already open, so
395 * we can just call it here to recover from failed rotates (or anything
396 * else that's left the journals as NULL).
398 * Fixes https://github.com/systemd/systemd/issues/3968 */
399 (void) system_journal_open(s
, false);
401 /* We split up user logs only on /var, not on /run. If the
402 * runtime file is open, we write to it exclusively, in order
403 * to guarantee proper order as soon as we flush /run to
404 * /var and close the runtime file. */
406 if (s
->runtime_journal
)
407 return s
->runtime_journal
;
409 if (uid
<= SYSTEM_UID_MAX
|| uid_is_dynamic(uid
))
410 return s
->system_journal
;
412 r
= sd_id128_get_machine(&machine
);
414 return s
->system_journal
;
416 f
= ordered_hashmap_get(s
->user_journals
, UID_TO_PTR(uid
));
420 if (asprintf(&p
, "/var/log/journal/" SD_ID128_FORMAT_STR
"/user-"UID_FMT
".journal",
421 SD_ID128_FORMAT_VAL(machine
), uid
) < 0)
422 return s
->system_journal
;
424 while (ordered_hashmap_size(s
->user_journals
) >= USER_JOURNALS_MAX
) {
425 /* Too many open? Then let's close one */
426 f
= ordered_hashmap_steal_first(s
->user_journals
);
428 (void) journal_file_close(f
);
431 r
= open_journal(s
, true, p
, O_RDWR
|O_CREAT
, s
->seal
, &s
->system_storage
.metrics
, &f
);
433 return s
->system_journal
;
435 server_add_acls(f
, uid
);
437 r
= ordered_hashmap_put(s
->user_journals
, UID_TO_PTR(uid
), f
);
439 (void) journal_file_close(f
);
440 return s
->system_journal
;
446 static int do_rotate(
459 r
= journal_file_rotate(f
, s
->compress
, seal
, s
->deferred_closes
);
462 log_error_errno(r
, "Failed to rotate %s: %m", (*f
)->path
);
464 log_error_errno(r
, "Failed to create new %s journal: %m", name
);
466 server_add_acls(*f
, uid
);
471 void server_rotate(Server
*s
) {
477 log_debug("Rotating...");
479 (void) do_rotate(s
, &s
->runtime_journal
, "runtime", false, 0);
480 (void) do_rotate(s
, &s
->system_journal
, "system", s
->seal
, 0);
482 ORDERED_HASHMAP_FOREACH_KEY(f
, k
, s
->user_journals
, i
) {
483 r
= do_rotate(s
, &f
, "user", s
->seal
, PTR_TO_UID(k
));
485 ordered_hashmap_replace(s
->user_journals
, k
, f
);
487 /* Old file has been closed and deallocated */
488 ordered_hashmap_remove(s
->user_journals
, k
);
491 /* Perform any deferred closes which aren't still offlining. */
492 SET_FOREACH(f
, s
->deferred_closes
, i
)
493 if (!journal_file_is_offlining(f
)) {
494 (void) set_remove(s
->deferred_closes
, f
);
495 (void) journal_file_close(f
);
499 void server_sync(Server
*s
) {
504 if (s
->system_journal
) {
505 r
= journal_file_set_offline(s
->system_journal
, false);
507 log_warning_errno(r
, "Failed to sync system journal, ignoring: %m");
510 ORDERED_HASHMAP_FOREACH(f
, s
->user_journals
, i
) {
511 r
= journal_file_set_offline(f
, false);
513 log_warning_errno(r
, "Failed to sync user journal, ignoring: %m");
516 if (s
->sync_event_source
) {
517 r
= sd_event_source_set_enabled(s
->sync_event_source
, SD_EVENT_OFF
);
519 log_error_errno(r
, "Failed to disable sync timer source: %m");
522 s
->sync_scheduled
= false;
525 static void do_vacuum(Server
*s
, JournalStorage
*storage
, bool verbose
) {
532 (void) cache_space_refresh(s
, storage
);
535 server_space_usage_message(s
, storage
);
537 r
= journal_directory_vacuum(storage
->path
, storage
->space
.limit
,
538 storage
->metrics
.n_max_files
, s
->max_retention_usec
,
539 &s
->oldest_file_usec
, verbose
);
540 if (r
< 0 && r
!= -ENOENT
)
541 log_warning_errno(r
, "Failed to vacuum %s, ignoring: %m", storage
->path
);
543 cache_space_invalidate(&storage
->space
);
546 int server_vacuum(Server
*s
, bool verbose
) {
549 log_debug("Vacuuming...");
551 s
->oldest_file_usec
= 0;
553 if (s
->system_journal
)
554 do_vacuum(s
, &s
->system_storage
, verbose
);
555 if (s
->runtime_journal
)
556 do_vacuum(s
, &s
->runtime_storage
, verbose
);
561 static void server_cache_machine_id(Server
*s
) {
567 r
= sd_id128_get_machine(&id
);
571 sd_id128_to_string(id
, stpcpy(s
->machine_id_field
, "_MACHINE_ID="));
574 static void server_cache_boot_id(Server
*s
) {
580 r
= sd_id128_get_boot(&id
);
584 sd_id128_to_string(id
, stpcpy(s
->boot_id_field
, "_BOOT_ID="));
587 static void server_cache_hostname(Server
*s
) {
588 _cleanup_free_
char *t
= NULL
;
593 t
= gethostname_malloc();
597 x
= strappend("_HOSTNAME=", t
);
601 free(s
->hostname_field
);
602 s
->hostname_field
= x
;
605 static bool shall_try_append_again(JournalFile
*f
, int r
) {
608 case -E2BIG
: /* Hit configured limit */
609 case -EFBIG
: /* Hit fs limit */
610 case -EDQUOT
: /* Quota limit hit */
611 case -ENOSPC
: /* Disk full */
612 log_debug("%s: Allocation limit reached, rotating.", f
->path
);
615 case -EIO
: /* I/O error of some kind (mmap) */
616 log_warning("%s: IO error, rotating.", f
->path
);
619 case -EHOSTDOWN
: /* Other machine */
620 log_info("%s: Journal file from other machine, rotating.", f
->path
);
623 case -EBUSY
: /* Unclean shutdown */
624 log_info("%s: Unclean shutdown, rotating.", f
->path
);
627 case -EPROTONOSUPPORT
: /* Unsupported feature */
628 log_info("%s: Unsupported feature, rotating.", f
->path
);
631 case -EBADMSG
: /* Corrupted */
632 case -ENODATA
: /* Truncated */
633 case -ESHUTDOWN
: /* Already archived */
634 log_warning("%s: Journal file corrupted, rotating.", f
->path
);
637 case -EIDRM
: /* Journal file has been deleted */
638 log_warning("%s: Journal file has been deleted, rotating.", f
->path
);
641 case -ETXTBSY
: /* Journal file is from the future */
642 log_warning("%s: Journal file is from the future, rotating.", f
->path
);
650 static void write_to_journal(Server
*s
, uid_t uid
, struct iovec
*iovec
, unsigned n
, int priority
) {
651 bool vacuumed
= false, rotate
= false;
652 struct dual_timestamp ts
;
660 /* Get the closest, linearized time we have for this log event from the event loop. (Note that we do not use
661 * the source time, and not even the time the event was originally seen, but instead simply the time we started
662 * processing it, as we want strictly linear ordering in what we write out.) */
663 assert_se(sd_event_now(s
->event
, CLOCK_REALTIME
, &ts
.realtime
) >= 0);
664 assert_se(sd_event_now(s
->event
, CLOCK_MONOTONIC
, &ts
.monotonic
) >= 0);
666 if (ts
.realtime
< s
->last_realtime_clock
) {
667 /* When the time jumps backwards, let's immediately rotate. Of course, this should not happen during
668 * regular operation. However, when it does happen, then we should make sure that we start fresh files
669 * to ensure that the entries in the journal files are strictly ordered by time, in order to ensure
670 * bisection works correctly. */
672 log_debug("Time jumped backwards, rotating.");
676 f
= find_journal(s
, uid
);
680 if (journal_file_rotate_suggested(f
, s
->max_file_usec
)) {
681 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f
->path
);
688 server_vacuum(s
, false);
691 f
= find_journal(s
, uid
);
696 s
->last_realtime_clock
= ts
.realtime
;
698 r
= journal_file_append_entry(f
, &ts
, iovec
, n
, &s
->seqnum
, NULL
, NULL
);
700 server_schedule_sync(s
, priority
);
704 if (vacuumed
|| !shall_try_append_again(f
, r
)) {
705 log_error_errno(r
, "Failed to write entry (%d items, %zu bytes), ignoring: %m", n
, IOVEC_TOTAL_SIZE(iovec
, n
));
710 server_vacuum(s
, false);
712 f
= find_journal(s
, uid
);
716 log_debug("Retrying write.");
717 r
= journal_file_append_entry(f
, &ts
, iovec
, n
, &s
->seqnum
, NULL
, NULL
);
719 log_error_errno(r
, "Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %m", n
, IOVEC_TOTAL_SIZE(iovec
, n
));
721 server_schedule_sync(s
, priority
);
724 #define IOVEC_ADD_NUMERIC_FIELD(iovec, n, value, type, isset, format, field) \
725 if (isset(value)) { \
727 k = newa(char, strlen(field "=") + DECIMAL_STR_MAX(type) + 1); \
728 sprintf(k, field "=" format, value); \
729 iovec[n++] = IOVEC_MAKE_STRING(k); \
732 #define IOVEC_ADD_STRING_FIELD(iovec, n, value, field) \
733 if (!isempty(value)) { \
735 k = strjoina(field "=", value); \
736 iovec[n++] = IOVEC_MAKE_STRING(k); \
739 #define IOVEC_ADD_ID128_FIELD(iovec, n, value, field) \
740 if (!sd_id128_is_null(value)) { \
742 k = newa(char, strlen(field "=") + SD_ID128_STRING_MAX); \
743 sd_id128_to_string(value, stpcpy(k, field "=")); \
744 iovec[n++] = IOVEC_MAKE_STRING(k); \
747 #define IOVEC_ADD_SIZED_FIELD(iovec, n, value, value_size, field) \
748 if (value_size > 0) { \
750 k = newa(char, strlen(field "=") + value_size + 1); \
751 *((char*) mempcpy(stpcpy(k, field "="), value, value_size)) = 0; \
752 iovec[n++] = IOVEC_MAKE_STRING(k); \
755 static void dispatch_message_real(
757 struct iovec
*iovec
, size_t n
, size_t m
,
758 const ClientContext
*c
,
759 const struct timeval
*tv
,
763 char source_time
[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t
)];
771 N_IOVEC_META_FIELDS
+
772 (pid_is_valid(object_pid
) ? N_IOVEC_OBJECT_FIELDS
: 0) +
773 client_context_extra_fields_n_iovec(c
) <= m
);
776 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->pid
, pid_t
, pid_is_valid
, PID_FMT
, "_PID");
777 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->uid
, uid_t
, uid_is_valid
, UID_FMT
, "_UID");
778 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->gid
, gid_t
, gid_is_valid
, GID_FMT
, "_GID");
780 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->comm
, "_COMM");
781 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->exe
, "_EXE");
782 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->cmdline
, "_CMDLINE");
783 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->capeff
, "_CAP_EFFECTIVE");
785 IOVEC_ADD_SIZED_FIELD(iovec
, n
, c
->label
, c
->label_size
, "_SELINUX_CONTEXT");
787 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->auditid
, uint32_t, audit_session_is_valid
, "%" PRIu32
, "_AUDIT_SESSION");
788 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->loginuid
, uid_t
, uid_is_valid
, UID_FMT
, "_AUDIT_LOGINUID");
790 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->cgroup
, "_SYSTEMD_CGROUP");
791 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->session
, "_SYSTEMD_SESSION");
792 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->owner_uid
, uid_t
, uid_is_valid
, UID_FMT
, "_SYSTEMD_OWNER_UID");
793 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->unit
, "_SYSTEMD_UNIT");
794 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->user_unit
, "_SYSTEMD_USER_UNIT");
795 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->slice
, "_SYSTEMD_SLICE");
796 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->user_slice
, "_SYSTEMD_USER_SLICE");
798 IOVEC_ADD_ID128_FIELD(iovec
, n
, c
->invocation_id
, "_SYSTEMD_INVOCATION_ID");
800 if (c
->extra_fields_n_iovec
> 0) {
801 memcpy(iovec
+ n
, c
->extra_fields_iovec
, c
->extra_fields_n_iovec
* sizeof(struct iovec
));
802 n
+= c
->extra_fields_n_iovec
;
808 if (pid_is_valid(object_pid
) && client_context_get(s
, object_pid
, NULL
, NULL
, 0, NULL
, &o
) >= 0) {
810 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->pid
, pid_t
, pid_is_valid
, PID_FMT
, "OBJECT_PID");
811 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->uid
, uid_t
, uid_is_valid
, UID_FMT
, "OBJECT_UID");
812 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->gid
, gid_t
, gid_is_valid
, GID_FMT
, "OBJECT_GID");
814 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->comm
, "OBJECT_COMM");
815 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->exe
, "OBJECT_EXE");
816 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->cmdline
, "OBJECT_CMDLINE");
817 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->capeff
, "OBJECT_CAP_EFFECTIVE");
819 IOVEC_ADD_SIZED_FIELD(iovec
, n
, o
->label
, o
->label_size
, "OBJECT_SELINUX_CONTEXT");
821 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->auditid
, uint32_t, audit_session_is_valid
, "%" PRIu32
, "OBJECT_AUDIT_SESSION");
822 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->loginuid
, uid_t
, uid_is_valid
, UID_FMT
, "OBJECT_AUDIT_LOGINUID");
824 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->cgroup
, "OBJECT_SYSTEMD_CGROUP");
825 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->session
, "OBJECT_SYSTEMD_SESSION");
826 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->owner_uid
, uid_t
, uid_is_valid
, UID_FMT
, "OBJECT_SYSTEMD_OWNER_UID");
827 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->unit
, "OBJECT_SYSTEMD_UNIT");
828 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->user_unit
, "OBJECT_SYSTEMD_USER_UNIT");
829 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->slice
, "OBJECT_SYSTEMD_SLICE");
830 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->user_slice
, "OBJECT_SYSTEMD_USER_SLICE");
832 IOVEC_ADD_ID128_FIELD(iovec
, n
, o
->invocation_id
, "OBJECT_SYSTEMD_INVOCATION_ID=");
838 sprintf(source_time
, "_SOURCE_REALTIME_TIMESTAMP=" USEC_FMT
, timeval_load(tv
));
839 iovec
[n
++] = IOVEC_MAKE_STRING(source_time
);
842 /* Note that strictly speaking storing the boot id here is
843 * redundant since the entry includes this in-line
844 * anyway. However, we need this indexed, too. */
845 if (!isempty(s
->boot_id_field
))
846 iovec
[n
++] = IOVEC_MAKE_STRING(s
->boot_id_field
);
848 if (!isempty(s
->machine_id_field
))
849 iovec
[n
++] = IOVEC_MAKE_STRING(s
->machine_id_field
);
851 if (!isempty(s
->hostname_field
))
852 iovec
[n
++] = IOVEC_MAKE_STRING(s
->hostname_field
);
856 if (s
->split_mode
== SPLIT_UID
&& c
&& uid_is_valid(c
->uid
))
857 /* Split up strictly by (non-root) UID */
858 journal_uid
= c
->uid
;
859 else if (s
->split_mode
== SPLIT_LOGIN
&& c
&& c
->uid
> 0 && uid_is_valid(c
->owner_uid
))
860 /* Split up by login UIDs. We do this only if the
861 * realuid is not root, in order not to accidentally
862 * leak privileged information to the user that is
863 * logged by a privileged process that is part of an
864 * unprivileged session. */
865 journal_uid
= c
->owner_uid
;
869 write_to_journal(s
, journal_uid
, iovec
, n
, priority
);
872 void server_driver_message(Server
*s
, pid_t object_pid
, const char *message_id
, const char *format
, ...) {
882 m
= N_IOVEC_META_FIELDS
+ 5 + N_IOVEC_PAYLOAD_FIELDS
+ client_context_extra_fields_n_iovec(s
->my_context
);
883 iovec
= newa(struct iovec
, m
);
885 assert_cc(3 == LOG_FAC(LOG_DAEMON
));
886 iovec
[n
++] = IOVEC_MAKE_STRING("SYSLOG_FACILITY=3");
887 iovec
[n
++] = IOVEC_MAKE_STRING("SYSLOG_IDENTIFIER=systemd-journald");
889 iovec
[n
++] = IOVEC_MAKE_STRING("_TRANSPORT=driver");
890 assert_cc(6 == LOG_INFO
);
891 iovec
[n
++] = IOVEC_MAKE_STRING("PRIORITY=6");
894 iovec
[n
++] = IOVEC_MAKE_STRING(message_id
);
897 va_start(ap
, format
);
898 r
= log_format_iovec(iovec
, m
, &n
, false, 0, format
, ap
);
899 /* Error handling below */
903 dispatch_message_real(s
, iovec
, n
, m
, s
->my_context
, NULL
, LOG_INFO
, object_pid
);
906 free(iovec
[k
++].iov_base
);
909 /* We failed to format the message. Emit a warning instead. */
912 xsprintf(buf
, "MESSAGE=Entry printing failed: %s", strerror(-r
));
915 iovec
[n
++] = IOVEC_MAKE_STRING("PRIORITY=4");
916 iovec
[n
++] = IOVEC_MAKE_STRING(buf
);
917 dispatch_message_real(s
, iovec
, n
, m
, s
->my_context
, NULL
, LOG_INFO
, object_pid
);
921 void server_dispatch_message(
923 struct iovec
*iovec
, size_t n
, size_t m
,
925 const struct timeval
*tv
,
929 uint64_t available
= 0;
933 assert(iovec
|| n
== 0);
938 if (LOG_PRI(priority
) > s
->max_level_store
)
941 /* Stop early in case the information will not be stored
943 if (s
->storage
== STORAGE_NONE
)
947 (void) determine_space(s
, &available
, NULL
);
949 rl
= journal_rate_limit_test(s
->rate_limit
, c
->unit
, priority
& LOG_PRIMASK
, available
);
953 /* Write a suppression message if we suppressed something */
955 server_driver_message(s
, c
->pid
,
956 "MESSAGE_ID=" SD_MESSAGE_JOURNAL_DROPPED_STR
,
957 LOG_MESSAGE("Suppressed %i messages from %s", rl
- 1, c
->unit
),
958 LOG_MESSAGE("N_DROPPED=%i", rl
- 1),
962 dispatch_message_real(s
, iovec
, n
, m
, c
, tv
, priority
, object_pid
);
965 int server_flush_to_var(Server
*s
, bool require_flag_file
) {
967 sd_journal
*j
= NULL
;
968 char ts
[FORMAT_TIMESPAN_MAX
];
975 if (!IN_SET(s
->storage
, STORAGE_AUTO
, STORAGE_PERSISTENT
))
978 if (!s
->runtime_journal
)
981 if (require_flag_file
&& !flushed_flag_is_set())
984 (void) system_journal_open(s
, true);
986 if (!s
->system_journal
)
989 log_debug("Flushing to /var...");
991 start
= now(CLOCK_MONOTONIC
);
993 r
= sd_id128_get_machine(&machine
);
997 r
= sd_journal_open(&j
, SD_JOURNAL_RUNTIME_ONLY
);
999 return log_error_errno(r
, "Failed to read runtime journal: %m");
1001 sd_journal_set_data_threshold(j
, 0);
1003 SD_JOURNAL_FOREACH(j
) {
1007 f
= j
->current_file
;
1008 assert(f
&& f
->current_offset
> 0);
1012 r
= journal_file_move_to_object(f
, OBJECT_ENTRY
, f
->current_offset
, &o
);
1014 log_error_errno(r
, "Can't read entry: %m");
1018 r
= journal_file_copy_entry(f
, s
->system_journal
, o
, f
->current_offset
, NULL
, NULL
, NULL
);
1022 if (!shall_try_append_again(s
->system_journal
, r
)) {
1023 log_error_errno(r
, "Can't write entry: %m");
1028 server_vacuum(s
, false);
1030 if (!s
->system_journal
) {
1031 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1036 log_debug("Retrying write.");
1037 r
= journal_file_copy_entry(f
, s
->system_journal
, o
, f
->current_offset
, NULL
, NULL
, NULL
);
1039 log_error_errno(r
, "Can't write entry: %m");
1047 journal_file_post_change(s
->system_journal
);
1049 s
->runtime_journal
= journal_file_close(s
->runtime_journal
);
1052 (void) rm_rf("/run/log/journal", REMOVE_ROOT
);
1054 sd_journal_close(j
);
1056 server_driver_message(s
, 0, NULL
,
1057 LOG_MESSAGE("Time spent on flushing to /var is %s for %u entries.",
1058 format_timespan(ts
, sizeof(ts
), now(CLOCK_MONOTONIC
) - start
, 0),
1065 int server_process_datagram(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1066 Server
*s
= userdata
;
1067 struct ucred
*ucred
= NULL
;
1068 struct timeval
*tv
= NULL
;
1069 struct cmsghdr
*cmsg
;
1071 size_t label_len
= 0, m
;
1074 int *fds
= NULL
, v
= 0;
1078 struct cmsghdr cmsghdr
;
1080 /* We use NAME_MAX space for the SELinux label
1081 * here. The kernel currently enforces no
1082 * limit, but according to suggestions from
1083 * the SELinux people this will change and it
1084 * will probably be identical to NAME_MAX. For
1085 * now we use that, but this should be updated
1086 * one day when the final limit is known. */
1087 uint8_t buf
[CMSG_SPACE(sizeof(struct ucred
)) +
1088 CMSG_SPACE(sizeof(struct timeval
)) +
1089 CMSG_SPACE(sizeof(int)) + /* fd */
1090 CMSG_SPACE(NAME_MAX
)]; /* selinux label */
1093 union sockaddr_union sa
= {};
1095 struct msghdr msghdr
= {
1098 .msg_control
= &control
,
1099 .msg_controllen
= sizeof(control
),
1101 .msg_namelen
= sizeof(sa
),
1105 assert(fd
== s
->native_fd
|| fd
== s
->syslog_fd
|| fd
== s
->audit_fd
);
1107 if (revents
!= EPOLLIN
) {
1108 log_error("Got invalid event from epoll for datagram fd: %"PRIx32
, revents
);
1112 /* Try to get the right size, if we can. (Not all sockets support SIOCINQ, hence we just try, but don't rely on
1114 (void) ioctl(fd
, SIOCINQ
, &v
);
1116 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1117 m
= PAGE_ALIGN(MAX3((size_t) v
+ 1,
1119 ALIGN(sizeof(struct nlmsghdr
)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH
)) + 1);
1121 if (!GREEDY_REALLOC(s
->buffer
, s
->buffer_size
, m
))
1124 iovec
.iov_base
= s
->buffer
;
1125 iovec
.iov_len
= s
->buffer_size
- 1; /* Leave room for trailing NUL we add later */
1127 n
= recvmsg(fd
, &msghdr
, MSG_DONTWAIT
|MSG_CMSG_CLOEXEC
);
1129 if (IN_SET(errno
, EINTR
, EAGAIN
))
1132 return log_error_errno(errno
, "recvmsg() failed: %m");
1135 CMSG_FOREACH(cmsg
, &msghdr
) {
1137 if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1138 cmsg
->cmsg_type
== SCM_CREDENTIALS
&&
1139 cmsg
->cmsg_len
== CMSG_LEN(sizeof(struct ucred
)))
1140 ucred
= (struct ucred
*) CMSG_DATA(cmsg
);
1141 else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1142 cmsg
->cmsg_type
== SCM_SECURITY
) {
1143 label
= (char*) CMSG_DATA(cmsg
);
1144 label_len
= cmsg
->cmsg_len
- CMSG_LEN(0);
1145 } else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1146 cmsg
->cmsg_type
== SO_TIMESTAMP
&&
1147 cmsg
->cmsg_len
== CMSG_LEN(sizeof(struct timeval
)))
1148 tv
= (struct timeval
*) CMSG_DATA(cmsg
);
1149 else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1150 cmsg
->cmsg_type
== SCM_RIGHTS
) {
1151 fds
= (int*) CMSG_DATA(cmsg
);
1152 n_fds
= (cmsg
->cmsg_len
- CMSG_LEN(0)) / sizeof(int);
1156 /* And a trailing NUL, just in case */
1159 if (fd
== s
->syslog_fd
) {
1160 if (n
> 0 && n_fds
== 0)
1161 server_process_syslog_message(s
, strstrip(s
->buffer
), ucred
, tv
, label
, label_len
);
1163 log_warning("Got file descriptors via syslog socket. Ignoring.");
1165 } else if (fd
== s
->native_fd
) {
1166 if (n
> 0 && n_fds
== 0)
1167 server_process_native_message(s
, s
->buffer
, n
, ucred
, tv
, label
, label_len
);
1168 else if (n
== 0 && n_fds
== 1)
1169 server_process_native_file(s
, fds
[0], ucred
, tv
, label
, label_len
);
1171 log_warning("Got too many file descriptors via native socket. Ignoring.");
1174 assert(fd
== s
->audit_fd
);
1176 if (n
> 0 && n_fds
== 0)
1177 server_process_audit_message(s
, s
->buffer
, n
, ucred
, &sa
, msghdr
.msg_namelen
);
1179 log_warning("Got file descriptors via audit socket. Ignoring.");
1182 close_many(fds
, n_fds
);
1186 static int dispatch_sigusr1(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1187 Server
*s
= userdata
;
1192 log_info("Received request to flush runtime journal from PID " PID_FMT
, si
->ssi_pid
);
1194 (void) server_flush_to_var(s
, false);
1196 server_vacuum(s
, false);
1198 r
= touch("/run/systemd/journal/flushed");
1200 log_warning_errno(r
, "Failed to touch /run/systemd/journal/flushed, ignoring: %m");
1202 server_space_usage_message(s
, NULL
);
1206 static int dispatch_sigusr2(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1207 Server
*s
= userdata
;
1212 log_info("Received request to rotate journal from PID " PID_FMT
, si
->ssi_pid
);
1214 server_vacuum(s
, true);
1216 if (s
->system_journal
)
1217 patch_min_use(&s
->system_storage
);
1218 if (s
->runtime_journal
)
1219 patch_min_use(&s
->runtime_storage
);
1221 /* Let clients know when the most recent rotation happened. */
1222 r
= write_timestamp_file_atomic("/run/systemd/journal/rotated", now(CLOCK_MONOTONIC
));
1224 log_warning_errno(r
, "Failed to write /run/systemd/journal/rotated, ignoring: %m");
1229 static int dispatch_sigterm(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1230 Server
*s
= userdata
;
1234 log_received_signal(LOG_INFO
, si
);
1236 sd_event_exit(s
->event
, 0);
1240 static int dispatch_sigrtmin1(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1241 Server
*s
= userdata
;
1246 log_debug("Received request to sync from PID " PID_FMT
, si
->ssi_pid
);
1250 /* Let clients know when the most recent sync happened. */
1251 r
= write_timestamp_file_atomic("/run/systemd/journal/synced", now(CLOCK_MONOTONIC
));
1253 log_warning_errno(r
, "Failed to write /run/systemd/journal/synced, ignoring: %m");
1258 static int setup_signals(Server
*s
) {
1263 assert_se(sigprocmask_many(SIG_SETMASK
, NULL
, SIGINT
, SIGTERM
, SIGUSR1
, SIGUSR2
, SIGRTMIN
+1, -1) >= 0);
1265 r
= sd_event_add_signal(s
->event
, &s
->sigusr1_event_source
, SIGUSR1
, dispatch_sigusr1
, s
);
1269 r
= sd_event_add_signal(s
->event
, &s
->sigusr2_event_source
, SIGUSR2
, dispatch_sigusr2
, s
);
1273 r
= sd_event_add_signal(s
->event
, &s
->sigterm_event_source
, SIGTERM
, dispatch_sigterm
, s
);
1277 /* Let's process SIGTERM late, so that we flush all queued
1278 * messages to disk before we exit */
1279 r
= sd_event_source_set_priority(s
->sigterm_event_source
, SD_EVENT_PRIORITY_NORMAL
+20);
1283 /* When journald is invoked on the terminal (when debugging),
1284 * it's useful if C-c is handled equivalent to SIGTERM. */
1285 r
= sd_event_add_signal(s
->event
, &s
->sigint_event_source
, SIGINT
, dispatch_sigterm
, s
);
1289 r
= sd_event_source_set_priority(s
->sigint_event_source
, SD_EVENT_PRIORITY_NORMAL
+20);
1293 /* SIGRTMIN+1 causes an immediate sync. We process this very
1294 * late, so that everything else queued at this point is
1295 * really written to disk. Clients can watch
1296 * /run/systemd/journal/synced with inotify until its mtime
1297 * changes to see when a sync happened. */
1298 r
= sd_event_add_signal(s
->event
, &s
->sigrtmin1_event_source
, SIGRTMIN
+1, dispatch_sigrtmin1
, s
);
1302 r
= sd_event_source_set_priority(s
->sigrtmin1_event_source
, SD_EVENT_PRIORITY_NORMAL
+15);
1309 static int parse_proc_cmdline_item(const char *key
, const char *value
, void *data
) {
1315 if (proc_cmdline_key_streq(key
, "systemd.journald.forward_to_syslog")) {
1317 r
= value
? parse_boolean(value
) : true;
1319 log_warning("Failed to parse forward to syslog switch \"%s\". Ignoring.", value
);
1321 s
->forward_to_syslog
= r
;
1323 } else if (proc_cmdline_key_streq(key
, "systemd.journald.forward_to_kmsg")) {
1325 r
= value
? parse_boolean(value
) : true;
1327 log_warning("Failed to parse forward to kmsg switch \"%s\". Ignoring.", value
);
1329 s
->forward_to_kmsg
= r
;
1331 } else if (proc_cmdline_key_streq(key
, "systemd.journald.forward_to_console")) {
1333 r
= value
? parse_boolean(value
) : true;
1335 log_warning("Failed to parse forward to console switch \"%s\". Ignoring.", value
);
1337 s
->forward_to_console
= r
;
1339 } else if (proc_cmdline_key_streq(key
, "systemd.journald.forward_to_wall")) {
1341 r
= value
? parse_boolean(value
) : true;
1343 log_warning("Failed to parse forward to wall switch \"%s\". Ignoring.", value
);
1345 s
->forward_to_wall
= r
;
1347 } else if (proc_cmdline_key_streq(key
, "systemd.journald.max_level_console")) {
1349 if (proc_cmdline_value_missing(key
, value
))
1352 r
= log_level_from_string(value
);
1354 log_warning("Failed to parse max level console value \"%s\". Ignoring.", value
);
1356 s
->max_level_console
= r
;
1358 } else if (proc_cmdline_key_streq(key
, "systemd.journald.max_level_store")) {
1360 if (proc_cmdline_value_missing(key
, value
))
1363 r
= log_level_from_string(value
);
1365 log_warning("Failed to parse max level store value \"%s\". Ignoring.", value
);
1367 s
->max_level_store
= r
;
1369 } else if (proc_cmdline_key_streq(key
, "systemd.journald.max_level_syslog")) {
1371 if (proc_cmdline_value_missing(key
, value
))
1374 r
= log_level_from_string(value
);
1376 log_warning("Failed to parse max level syslog value \"%s\". Ignoring.", value
);
1378 s
->max_level_syslog
= r
;
1380 } else if (proc_cmdline_key_streq(key
, "systemd.journald.max_level_kmsg")) {
1382 if (proc_cmdline_value_missing(key
, value
))
1385 r
= log_level_from_string(value
);
1387 log_warning("Failed to parse max level kmsg value \"%s\". Ignoring.", value
);
1389 s
->max_level_kmsg
= r
;
1391 } else if (proc_cmdline_key_streq(key
, "systemd.journald.max_level_wall")) {
1393 if (proc_cmdline_value_missing(key
, value
))
1396 r
= log_level_from_string(value
);
1398 log_warning("Failed to parse max level wall value \"%s\". Ignoring.", value
);
1400 s
->max_level_wall
= r
;
1402 } else if (startswith(key
, "systemd.journald"))
1403 log_warning("Unknown journald kernel command line option \"%s\". Ignoring.", key
);
1405 /* do not warn about state here, since probably systemd already did */
1409 static int server_parse_config_file(Server
*s
) {
1412 return config_parse_many_nulstr(PKGSYSCONFDIR
"/journald.conf",
1413 CONF_PATHS_NULSTR("systemd/journald.conf.d"),
1415 config_item_perf_lookup
, journald_gperf_lookup
,
1416 CONFIG_PARSE_WARN
, s
);
1419 static int server_dispatch_sync(sd_event_source
*es
, usec_t t
, void *userdata
) {
1420 Server
*s
= userdata
;
1428 int server_schedule_sync(Server
*s
, int priority
) {
1433 if (priority
<= LOG_CRIT
) {
1434 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1439 if (s
->sync_scheduled
)
1442 if (s
->sync_interval_usec
> 0) {
1445 r
= sd_event_now(s
->event
, CLOCK_MONOTONIC
, &when
);
1449 when
+= s
->sync_interval_usec
;
1451 if (!s
->sync_event_source
) {
1452 r
= sd_event_add_time(
1454 &s
->sync_event_source
,
1457 server_dispatch_sync
, s
);
1461 r
= sd_event_source_set_priority(s
->sync_event_source
, SD_EVENT_PRIORITY_IMPORTANT
);
1463 r
= sd_event_source_set_time(s
->sync_event_source
, when
);
1467 r
= sd_event_source_set_enabled(s
->sync_event_source
, SD_EVENT_ONESHOT
);
1472 s
->sync_scheduled
= true;
1478 static int dispatch_hostname_change(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1479 Server
*s
= userdata
;
1483 server_cache_hostname(s
);
1487 static int server_open_hostname(Server
*s
) {
1492 s
->hostname_fd
= open("/proc/sys/kernel/hostname", O_RDONLY
|O_CLOEXEC
|O_NDELAY
|O_NOCTTY
);
1493 if (s
->hostname_fd
< 0)
1494 return log_error_errno(errno
, "Failed to open /proc/sys/kernel/hostname: %m");
1496 r
= sd_event_add_io(s
->event
, &s
->hostname_event_source
, s
->hostname_fd
, 0, dispatch_hostname_change
, s
);
1498 /* kernels prior to 3.2 don't support polling this file. Ignore
1501 log_warning_errno(r
, "Failed to register hostname fd in event loop, ignoring: %m");
1502 s
->hostname_fd
= safe_close(s
->hostname_fd
);
1506 return log_error_errno(r
, "Failed to register hostname fd in event loop: %m");
1509 r
= sd_event_source_set_priority(s
->hostname_event_source
, SD_EVENT_PRIORITY_IMPORTANT
-10);
1511 return log_error_errno(r
, "Failed to adjust priority of host name event source: %m");
1516 static int dispatch_notify_event(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1517 Server
*s
= userdata
;
1521 assert(s
->notify_event_source
== es
);
1522 assert(s
->notify_fd
== fd
);
1524 /* The $NOTIFY_SOCKET is writable again, now send exactly one
1525 * message on it. Either it's the watchdog event, the initial
1526 * READY=1 event or an stdout stream event. If there's nothing
1527 * to write anymore, turn our event source off. The next time
1528 * there's something to send it will be turned on again. */
1530 if (!s
->sent_notify_ready
) {
1531 static const char p
[] =
1533 "STATUS=Processing requests...";
1536 l
= send(s
->notify_fd
, p
, strlen(p
), MSG_DONTWAIT
);
1538 if (errno
== EAGAIN
)
1541 return log_error_errno(errno
, "Failed to send READY=1 notification message: %m");
1544 s
->sent_notify_ready
= true;
1545 log_debug("Sent READY=1 notification.");
1547 } else if (s
->send_watchdog
) {
1549 static const char p
[] =
1554 l
= send(s
->notify_fd
, p
, strlen(p
), MSG_DONTWAIT
);
1556 if (errno
== EAGAIN
)
1559 return log_error_errno(errno
, "Failed to send WATCHDOG=1 notification message: %m");
1562 s
->send_watchdog
= false;
1563 log_debug("Sent WATCHDOG=1 notification.");
1565 } else if (s
->stdout_streams_notify_queue
)
1566 /* Dispatch one stream notification event */
1567 stdout_stream_send_notify(s
->stdout_streams_notify_queue
);
1569 /* Leave us enabled if there's still more to do. */
1570 if (s
->send_watchdog
|| s
->stdout_streams_notify_queue
)
1573 /* There was nothing to do anymore, let's turn ourselves off. */
1574 r
= sd_event_source_set_enabled(es
, SD_EVENT_OFF
);
1576 return log_error_errno(r
, "Failed to turn off notify event source: %m");
1581 static int dispatch_watchdog(sd_event_source
*es
, uint64_t usec
, void *userdata
) {
1582 Server
*s
= userdata
;
1587 s
->send_watchdog
= true;
1589 r
= sd_event_source_set_enabled(s
->notify_event_source
, SD_EVENT_ON
);
1591 log_warning_errno(r
, "Failed to turn on notify event source: %m");
1593 r
= sd_event_source_set_time(s
->watchdog_event_source
, usec
+ s
->watchdog_usec
/ 2);
1595 return log_error_errno(r
, "Failed to restart watchdog event source: %m");
1597 r
= sd_event_source_set_enabled(s
->watchdog_event_source
, SD_EVENT_ON
);
1599 return log_error_errno(r
, "Failed to enable watchdog event source: %m");
1604 static int server_connect_notify(Server
*s
) {
1605 union sockaddr_union sa
= {
1606 .un
.sun_family
= AF_UNIX
,
1612 assert(s
->notify_fd
< 0);
1613 assert(!s
->notify_event_source
);
1616 So here's the problem: we'd like to send notification
1617 messages to PID 1, but we cannot do that via sd_notify(),
1618 since that's synchronous, and we might end up blocking on
1619 it. Specifically: given that PID 1 might block on
1620 dbus-daemon during IPC, and dbus-daemon is logging to us,
1621 and might hence block on us, we might end up in a deadlock
1622 if we block on sending PID 1 notification messages — by
1623 generating a full blocking circle. To avoid this, let's
1624 create a non-blocking socket, and connect it to the
1625 notification socket, and then wait for POLLOUT before we
1626 send anything. This should efficiently avoid any deadlocks,
1627 as we'll never block on PID 1, hence PID 1 can safely block
1628 on dbus-daemon which can safely block on us again.
1630 Don't think that this issue is real? It is, see:
1631 https://github.com/systemd/systemd/issues/1505
1634 e
= getenv("NOTIFY_SOCKET");
1638 if (!IN_SET(e
[0], '@', '/') || e
[1] == 0) {
1639 log_error("NOTIFY_SOCKET set to an invalid value: %s", e
);
1643 if (strlen(e
) > sizeof(sa
.un
.sun_path
)) {
1644 log_error("NOTIFY_SOCKET path too long: %s", e
);
1648 s
->notify_fd
= socket(AF_UNIX
, SOCK_DGRAM
|SOCK_CLOEXEC
|SOCK_NONBLOCK
, 0);
1649 if (s
->notify_fd
< 0)
1650 return log_error_errno(errno
, "Failed to create notify socket: %m");
1652 (void) fd_inc_sndbuf(s
->notify_fd
, NOTIFY_SNDBUF_SIZE
);
1654 strncpy(sa
.un
.sun_path
, e
, sizeof(sa
.un
.sun_path
));
1655 if (sa
.un
.sun_path
[0] == '@')
1656 sa
.un
.sun_path
[0] = 0;
1658 r
= connect(s
->notify_fd
, &sa
.sa
, SOCKADDR_UN_LEN(sa
.un
));
1660 return log_error_errno(errno
, "Failed to connect to notify socket: %m");
1662 r
= sd_event_add_io(s
->event
, &s
->notify_event_source
, s
->notify_fd
, EPOLLOUT
, dispatch_notify_event
, s
);
1664 return log_error_errno(r
, "Failed to watch notification socket: %m");
1666 if (sd_watchdog_enabled(false, &s
->watchdog_usec
) > 0) {
1667 s
->send_watchdog
= true;
1669 r
= sd_event_add_time(s
->event
, &s
->watchdog_event_source
, CLOCK_MONOTONIC
, now(CLOCK_MONOTONIC
) + s
->watchdog_usec
/2, s
->watchdog_usec
/4, dispatch_watchdog
, s
);
1671 return log_error_errno(r
, "Failed to add watchdog time event: %m");
1674 /* This should fire pretty soon, which we'll use to send the
1680 int server_init(Server
*s
) {
1681 _cleanup_fdset_free_ FDSet
*fds
= NULL
;
1688 s
->syslog_fd
= s
->native_fd
= s
->stdout_fd
= s
->dev_kmsg_fd
= s
->audit_fd
= s
->hostname_fd
= s
->notify_fd
= -1;
1691 s
->read_kmsg
= true;
1693 s
->watchdog_usec
= USEC_INFINITY
;
1695 s
->sync_interval_usec
= DEFAULT_SYNC_INTERVAL_USEC
;
1696 s
->sync_scheduled
= false;
1698 s
->rate_limit_interval
= DEFAULT_RATE_LIMIT_INTERVAL
;
1699 s
->rate_limit_burst
= DEFAULT_RATE_LIMIT_BURST
;
1701 s
->forward_to_wall
= true;
1703 s
->max_file_usec
= DEFAULT_MAX_FILE_USEC
;
1705 s
->max_level_store
= LOG_DEBUG
;
1706 s
->max_level_syslog
= LOG_DEBUG
;
1707 s
->max_level_kmsg
= LOG_NOTICE
;
1708 s
->max_level_console
= LOG_INFO
;
1709 s
->max_level_wall
= LOG_EMERG
;
1711 s
->line_max
= DEFAULT_LINE_MAX
;
1713 journal_reset_metrics(&s
->system_storage
.metrics
);
1714 journal_reset_metrics(&s
->runtime_storage
.metrics
);
1716 server_parse_config_file(s
);
1718 r
= proc_cmdline_parse(parse_proc_cmdline_item
, s
, PROC_CMDLINE_STRIP_RD_PREFIX
);
1720 log_warning_errno(r
, "Failed to parse kernel command line, ignoring: %m");
1722 if (!!s
->rate_limit_interval
^ !!s
->rate_limit_burst
) {
1723 log_debug("Setting both rate limit interval and burst from "USEC_FMT
",%u to 0,0",
1724 s
->rate_limit_interval
, s
->rate_limit_burst
);
1725 s
->rate_limit_interval
= s
->rate_limit_burst
= 0;
1728 (void) mkdir_p("/run/systemd/journal", 0755);
1730 s
->user_journals
= ordered_hashmap_new(NULL
);
1731 if (!s
->user_journals
)
1734 s
->mmap
= mmap_cache_new();
1738 s
->deferred_closes
= set_new(NULL
);
1739 if (!s
->deferred_closes
)
1742 r
= sd_event_default(&s
->event
);
1744 return log_error_errno(r
, "Failed to create event loop: %m");
1746 n
= sd_listen_fds(true);
1748 return log_error_errno(n
, "Failed to read listening file descriptors from environment: %m");
1750 for (fd
= SD_LISTEN_FDS_START
; fd
< SD_LISTEN_FDS_START
+ n
; fd
++) {
1752 if (sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/run/systemd/journal/socket", 0) > 0) {
1754 if (s
->native_fd
>= 0) {
1755 log_error("Too many native sockets passed.");
1761 } else if (sd_is_socket_unix(fd
, SOCK_STREAM
, 1, "/run/systemd/journal/stdout", 0) > 0) {
1763 if (s
->stdout_fd
>= 0) {
1764 log_error("Too many stdout sockets passed.");
1770 } else if (sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/dev/log", 0) > 0 ||
1771 sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/run/systemd/journal/dev-log", 0) > 0) {
1773 if (s
->syslog_fd
>= 0) {
1774 log_error("Too many /dev/log sockets passed.");
1780 } else if (sd_is_socket(fd
, AF_NETLINK
, SOCK_RAW
, -1) > 0) {
1782 if (s
->audit_fd
>= 0) {
1783 log_error("Too many audit sockets passed.");
1797 r
= fdset_put(fds
, fd
);
1803 /* Try to restore streams, but don't bother if this fails */
1804 (void) server_restore_streams(s
, fds
);
1806 if (fdset_size(fds
) > 0) {
1807 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds
));
1808 fds
= fdset_free(fds
);
1811 no_sockets
= s
->native_fd
< 0 && s
->stdout_fd
< 0 && s
->syslog_fd
< 0 && s
->audit_fd
< 0;
1813 /* always open stdout, syslog, native, and kmsg sockets */
1815 /* systemd-journald.socket: /run/systemd/journal/stdout */
1816 r
= server_open_stdout_socket(s
);
1820 /* systemd-journald-dev-log.socket: /run/systemd/journal/dev-log */
1821 r
= server_open_syslog_socket(s
);
1825 /* systemd-journald.socket: /run/systemd/journal/socket */
1826 r
= server_open_native_socket(s
);
1831 r
= server_open_dev_kmsg(s
);
1835 /* Unless we got *some* sockets and not audit, open audit socket */
1836 if (s
->audit_fd
>= 0 || no_sockets
) {
1837 r
= server_open_audit(s
);
1842 r
= server_open_kernel_seqnum(s
);
1846 r
= server_open_hostname(s
);
1850 r
= setup_signals(s
);
1854 s
->udev
= udev_new();
1858 s
->rate_limit
= journal_rate_limit_new(s
->rate_limit_interval
, s
->rate_limit_burst
);
1862 r
= cg_get_root_path(&s
->cgroup_root
);
1866 server_cache_hostname(s
);
1867 server_cache_boot_id(s
);
1868 server_cache_machine_id(s
);
1870 s
->runtime_storage
.name
= "Runtime journal";
1871 s
->system_storage
.name
= "System journal";
1873 s
->runtime_storage
.path
= strjoin("/run/log/journal/", SERVER_MACHINE_ID(s
));
1874 s
->system_storage
.path
= strjoin("/var/log/journal/", SERVER_MACHINE_ID(s
));
1875 if (!s
->runtime_storage
.path
|| !s
->system_storage
.path
)
1878 (void) server_connect_notify(s
);
1880 (void) client_context_acquire_default(s
);
1882 return system_journal_open(s
, false);
1885 void server_maybe_append_tags(Server
*s
) {
1891 n
= now(CLOCK_REALTIME
);
1893 if (s
->system_journal
)
1894 journal_file_maybe_append_tag(s
->system_journal
, n
);
1896 ORDERED_HASHMAP_FOREACH(f
, s
->user_journals
, i
)
1897 journal_file_maybe_append_tag(f
, n
);
1901 void server_done(Server
*s
) {
1905 if (s
->deferred_closes
) {
1906 journal_file_close_set(s
->deferred_closes
);
1907 set_free(s
->deferred_closes
);
1910 while (s
->stdout_streams
)
1911 stdout_stream_free(s
->stdout_streams
);
1913 client_context_flush_all(s
);
1915 if (s
->system_journal
)
1916 (void) journal_file_close(s
->system_journal
);
1918 if (s
->runtime_journal
)
1919 (void) journal_file_close(s
->runtime_journal
);
1921 while ((f
= ordered_hashmap_steal_first(s
->user_journals
)))
1922 (void) journal_file_close(f
);
1924 ordered_hashmap_free(s
->user_journals
);
1926 sd_event_source_unref(s
->syslog_event_source
);
1927 sd_event_source_unref(s
->native_event_source
);
1928 sd_event_source_unref(s
->stdout_event_source
);
1929 sd_event_source_unref(s
->dev_kmsg_event_source
);
1930 sd_event_source_unref(s
->audit_event_source
);
1931 sd_event_source_unref(s
->sync_event_source
);
1932 sd_event_source_unref(s
->sigusr1_event_source
);
1933 sd_event_source_unref(s
->sigusr2_event_source
);
1934 sd_event_source_unref(s
->sigterm_event_source
);
1935 sd_event_source_unref(s
->sigint_event_source
);
1936 sd_event_source_unref(s
->sigrtmin1_event_source
);
1937 sd_event_source_unref(s
->hostname_event_source
);
1938 sd_event_source_unref(s
->notify_event_source
);
1939 sd_event_source_unref(s
->watchdog_event_source
);
1940 sd_event_unref(s
->event
);
1942 safe_close(s
->syslog_fd
);
1943 safe_close(s
->native_fd
);
1944 safe_close(s
->stdout_fd
);
1945 safe_close(s
->dev_kmsg_fd
);
1946 safe_close(s
->audit_fd
);
1947 safe_close(s
->hostname_fd
);
1948 safe_close(s
->notify_fd
);
1951 journal_rate_limit_free(s
->rate_limit
);
1953 if (s
->kernel_seqnum
)
1954 munmap(s
->kernel_seqnum
, sizeof(uint64_t));
1958 free(s
->cgroup_root
);
1959 free(s
->hostname_field
);
1960 free(s
->runtime_storage
.path
);
1961 free(s
->system_storage
.path
);
1964 mmap_cache_unref(s
->mmap
);
1966 udev_unref(s
->udev
);
1969 static const char* const storage_table
[_STORAGE_MAX
] = {
1970 [STORAGE_AUTO
] = "auto",
1971 [STORAGE_VOLATILE
] = "volatile",
1972 [STORAGE_PERSISTENT
] = "persistent",
1973 [STORAGE_NONE
] = "none"
1976 DEFINE_STRING_TABLE_LOOKUP(storage
, Storage
);
1977 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage
, storage
, Storage
, "Failed to parse storage setting");
1979 static const char* const split_mode_table
[_SPLIT_MAX
] = {
1980 [SPLIT_LOGIN
] = "login",
1981 [SPLIT_UID
] = "uid",
1982 [SPLIT_NONE
] = "none",
1985 DEFINE_STRING_TABLE_LOOKUP(split_mode
, SplitMode
);
1986 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode
, split_mode
, SplitMode
, "Failed to parse split mode setting");
1988 int config_parse_line_max(
1990 const char *filename
,
1992 const char *section
,
1993 unsigned section_line
,
2008 if (isempty(rvalue
))
2009 /* Empty assignment means default */
2010 *sz
= DEFAULT_LINE_MAX
;
2014 r
= parse_size(rvalue
, 1024, &v
);
2016 log_syntax(unit
, LOG_ERR
, filename
, line
, r
, "Failed to parse LineMax= value, ignoring: %s", rvalue
);
2021 /* Why specify 79 here as minimum line length? Simply, because the most common traditional
2022 * terminal size is 80ch, and it might make sense to break one character before the natural
2023 * line break would occur on that. */
2024 log_syntax(unit
, LOG_WARNING
, filename
, line
, 0, "LineMax= too small, clamping to 79: %s", rvalue
);
2026 } else if (v
> (uint64_t) (SSIZE_MAX
-1)) {
2027 /* So, why specify SSIZE_MAX-1 here? Because that's one below the largest size value read()
2028 * can return, and we need one extra byte for the trailing NUL byte. Of course IRL such large
2029 * memory allocations will fail anyway, hence this limit is mostly theoretical anyway, as we'll
2030 * fail much earlier anyway. */
2031 log_syntax(unit
, LOG_WARNING
, filename
, line
, 0, "LineMax= too large, clamping to %" PRIu64
": %s", (uint64_t) (SSIZE_MAX
-1), rvalue
);