2 This file is part of systemd.
4 Copyright 2011 Lennart Poettering
6 systemd is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public License
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
21 #include <selinux/selinux.h>
23 #include <sys/ioctl.h>
25 #include <sys/signalfd.h>
26 #include <sys/statvfs.h>
27 #include <linux/sockios.h>
30 #include "sd-daemon.h"
31 #include "sd-journal.h"
32 #include "sd-messages.h"
35 #include "alloc-util.h"
36 #include "audit-util.h"
37 #include "cgroup-util.h"
38 #include "conf-parser.h"
39 #include "dirent-util.h"
40 #include "extract-word.h"
43 #include "format-util.h"
46 #include "hostname-util.h"
47 #include "id128-util.h"
49 #include "journal-authenticate.h"
50 #include "journal-file.h"
51 #include "journal-internal.h"
52 #include "journal-vacuum.h"
53 #include "journald-audit.h"
54 #include "journald-context.h"
55 #include "journald-kmsg.h"
56 #include "journald-native.h"
57 #include "journald-rate-limit.h"
58 #include "journald-server.h"
59 #include "journald-stream.h"
60 #include "journald-syslog.h"
64 #include "parse-util.h"
65 #include "proc-cmdline.h"
66 #include "process-util.h"
68 #include "selinux-util.h"
69 #include "signal-util.h"
70 #include "socket-util.h"
71 #include "stdio-util.h"
72 #include "string-table.h"
73 #include "string-util.h"
74 #include "syslog-util.h"
75 #include "user-util.h"
77 #define USER_JOURNALS_MAX 1024
79 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
80 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
81 #define DEFAULT_RATE_LIMIT_BURST 1000
82 #define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
84 #define RECHECK_SPACE_USEC (30*USEC_PER_SEC)
86 #define NOTIFY_SNDBUF_SIZE (8*1024*1024)
88 /* The period to insert between posting changes for coalescing */
89 #define POST_CHANGE_TIMER_INTERVAL_USEC (250*USEC_PER_MSEC)
91 static int determine_path_usage(Server
*s
, const char *path
, uint64_t *ret_used
, uint64_t *ret_free
) {
92 _cleanup_closedir_
DIR *d
= NULL
;
101 return log_full_errno(errno
== ENOENT
? LOG_DEBUG
: LOG_ERR
,
102 errno
, "Failed to open %s: %m", path
);
104 if (fstatvfs(dirfd(d
), &ss
) < 0)
105 return log_error_errno(errno
, "Failed to fstatvfs(%s): %m", path
);
107 *ret_free
= ss
.f_bsize
* ss
.f_bavail
;
109 FOREACH_DIRENT_ALL(de
, d
, break) {
112 if (!endswith(de
->d_name
, ".journal") &&
113 !endswith(de
->d_name
, ".journal~"))
116 if (fstatat(dirfd(d
), de
->d_name
, &st
, AT_SYMLINK_NOFOLLOW
) < 0) {
117 log_debug_errno(errno
, "Failed to stat %s/%s, ignoring: %m", path
, de
->d_name
);
121 if (!S_ISREG(st
.st_mode
))
124 *ret_used
+= (uint64_t) st
.st_blocks
* 512UL;
130 static void cache_space_invalidate(JournalStorageSpace
*space
) {
131 memset(space
, 0, sizeof(*space
));
134 static int cache_space_refresh(Server
*s
, JournalStorage
*storage
) {
135 JournalStorageSpace
*space
;
136 JournalMetrics
*metrics
;
137 uint64_t vfs_used
, vfs_avail
, avail
;
143 metrics
= &storage
->metrics
;
144 space
= &storage
->space
;
146 ts
= now(CLOCK_MONOTONIC
);
148 if (space
->timestamp
!= 0 && space
->timestamp
+ RECHECK_SPACE_USEC
> ts
)
151 r
= determine_path_usage(s
, storage
->path
, &vfs_used
, &vfs_avail
);
155 space
->vfs_used
= vfs_used
;
156 space
->vfs_available
= vfs_avail
;
158 avail
= LESS_BY(vfs_avail
, metrics
->keep_free
);
160 space
->limit
= MIN(MAX(vfs_used
+ avail
, metrics
->min_use
), metrics
->max_use
);
161 space
->available
= LESS_BY(space
->limit
, vfs_used
);
162 space
->timestamp
= ts
;
166 static void patch_min_use(JournalStorage
*storage
) {
169 /* Let's bump the min_use limit to the current usage on disk. We do
170 * this when starting up and first opening the journal files. This way
171 * sudden spikes in disk usage will not cause journald to vacuum files
172 * without bounds. Note that this means that only a restart of journald
173 * will make it reset this value. */
175 storage
->metrics
.min_use
= MAX(storage
->metrics
.min_use
, storage
->space
.vfs_used
);
179 static int determine_space(Server
*s
, uint64_t *available
, uint64_t *limit
) {
185 js
= s
->system_journal
? &s
->system_storage
: &s
->runtime_storage
;
187 r
= cache_space_refresh(s
, js
);
190 *available
= js
->space
.available
;
192 *limit
= js
->space
.limit
;
197 void server_space_usage_message(Server
*s
, JournalStorage
*storage
) {
198 char fb1
[FORMAT_BYTES_MAX
], fb2
[FORMAT_BYTES_MAX
], fb3
[FORMAT_BYTES_MAX
],
199 fb4
[FORMAT_BYTES_MAX
], fb5
[FORMAT_BYTES_MAX
], fb6
[FORMAT_BYTES_MAX
];
200 JournalMetrics
*metrics
;
205 storage
= s
->system_journal
? &s
->system_storage
: &s
->runtime_storage
;
207 if (cache_space_refresh(s
, storage
) < 0)
210 metrics
= &storage
->metrics
;
211 format_bytes(fb1
, sizeof(fb1
), storage
->space
.vfs_used
);
212 format_bytes(fb2
, sizeof(fb2
), metrics
->max_use
);
213 format_bytes(fb3
, sizeof(fb3
), metrics
->keep_free
);
214 format_bytes(fb4
, sizeof(fb4
), storage
->space
.vfs_available
);
215 format_bytes(fb5
, sizeof(fb5
), storage
->space
.limit
);
216 format_bytes(fb6
, sizeof(fb6
), storage
->space
.available
);
218 server_driver_message(s
, "MESSAGE_ID=" SD_MESSAGE_JOURNAL_USAGE_STR
,
219 LOG_MESSAGE("%s (%s) is %s, max %s, %s free.",
220 storage
->name
, storage
->path
, fb1
, fb5
, fb6
),
221 "JOURNAL_NAME=%s", storage
->name
,
222 "JOURNAL_PATH=%s", storage
->path
,
223 "CURRENT_USE=%"PRIu64
, storage
->space
.vfs_used
,
224 "CURRENT_USE_PRETTY=%s", fb1
,
225 "MAX_USE=%"PRIu64
, metrics
->max_use
,
226 "MAX_USE_PRETTY=%s", fb2
,
227 "DISK_KEEP_FREE=%"PRIu64
, metrics
->keep_free
,
228 "DISK_KEEP_FREE_PRETTY=%s", fb3
,
229 "DISK_AVAILABLE=%"PRIu64
, storage
->space
.vfs_available
,
230 "DISK_AVAILABLE_PRETTY=%s", fb4
,
231 "LIMIT=%"PRIu64
, storage
->space
.limit
,
232 "LIMIT_PRETTY=%s", fb5
,
233 "AVAILABLE=%"PRIu64
, storage
->space
.available
,
234 "AVAILABLE_PRETTY=%s", fb6
,
238 static void server_add_acls(JournalFile
*f
, uid_t uid
) {
245 if (uid
<= SYSTEM_UID_MAX
)
248 r
= add_acls_for_user(f
->fd
, uid
);
250 log_warning_errno(r
, "Failed to set ACL on %s, ignoring: %m", f
->path
);
254 static int open_journal(
260 JournalMetrics
*metrics
,
270 r
= journal_file_open_reliably(fname
, flags
, 0640, s
->compress
, seal
, metrics
, s
->mmap
, s
->deferred_closes
, NULL
, &f
);
272 r
= journal_file_open(-1, fname
, flags
, 0640, s
->compress
, seal
, metrics
, s
->mmap
, s
->deferred_closes
, NULL
, &f
);
276 r
= journal_file_enable_post_change_timer(f
, s
->event
, POST_CHANGE_TIMER_INTERVAL_USEC
);
278 (void) journal_file_close(f
);
286 static bool flushed_flag_is_set(void) {
287 return access("/run/systemd/journal/flushed", F_OK
) >= 0;
290 static int system_journal_open(Server
*s
, bool flush_requested
) {
294 if (!s
->system_journal
&&
295 IN_SET(s
->storage
, STORAGE_PERSISTENT
, STORAGE_AUTO
) &&
296 (flush_requested
|| flushed_flag_is_set())) {
298 /* If in auto mode: first try to create the machine
299 * path, but not the prefix.
301 * If in persistent mode: create /var/log/journal and
302 * the machine path */
304 if (s
->storage
== STORAGE_PERSISTENT
)
305 (void) mkdir_p("/var/log/journal/", 0755);
307 (void) mkdir(s
->system_storage
.path
, 0755);
309 fn
= strjoina(s
->system_storage
.path
, "/system.journal");
310 r
= open_journal(s
, true, fn
, O_RDWR
|O_CREAT
, s
->seal
, &s
->system_storage
.metrics
, &s
->system_journal
);
312 server_add_acls(s
->system_journal
, 0);
313 (void) cache_space_refresh(s
, &s
->system_storage
);
314 patch_min_use(&s
->system_storage
);
316 if (r
!= -ENOENT
&& r
!= -EROFS
)
317 log_warning_errno(r
, "Failed to open system journal: %m");
322 /* If the runtime journal is open, and we're post-flush, we're
323 * recovering from a failed system journal rotate (ENOSPC)
324 * for which the runtime journal was reopened.
326 * Perform an implicit flush to var, leaving the runtime
327 * journal closed, now that the system journal is back.
329 if (!flush_requested
)
330 (void) server_flush_to_var(s
, true);
333 if (!s
->runtime_journal
&&
334 (s
->storage
!= STORAGE_NONE
)) {
336 fn
= strjoina(s
->runtime_storage
.path
, "/system.journal");
338 if (s
->system_journal
) {
340 /* Try to open the runtime journal, but only
341 * if it already exists, so that we can flush
342 * it into the system journal */
344 r
= open_journal(s
, false, fn
, O_RDWR
, false, &s
->runtime_storage
.metrics
, &s
->runtime_journal
);
347 log_warning_errno(r
, "Failed to open runtime journal: %m");
354 /* OK, we really need the runtime journal, so create
355 * it if necessary. */
357 (void) mkdir("/run/log", 0755);
358 (void) mkdir("/run/log/journal", 0755);
359 (void) mkdir_parents(fn
, 0750);
361 r
= open_journal(s
, true, fn
, O_RDWR
|O_CREAT
, false, &s
->runtime_storage
.metrics
, &s
->runtime_journal
);
363 return log_error_errno(r
, "Failed to open runtime journal: %m");
366 if (s
->runtime_journal
) {
367 server_add_acls(s
->runtime_journal
, 0);
368 (void) cache_space_refresh(s
, &s
->runtime_storage
);
369 patch_min_use(&s
->runtime_storage
);
376 static JournalFile
* find_journal(Server
*s
, uid_t uid
) {
377 _cleanup_free_
char *p
= NULL
;
384 /* A rotate that fails to create the new journal (ENOSPC) leaves the
385 * rotated journal as NULL. Unless we revisit opening, even after
386 * space is made available we'll continue to return NULL indefinitely.
388 * system_journal_open() is a noop if the journals are already open, so
389 * we can just call it here to recover from failed rotates (or anything
390 * else that's left the journals as NULL).
392 * Fixes https://github.com/systemd/systemd/issues/3968 */
393 (void) system_journal_open(s
, false);
395 /* We split up user logs only on /var, not on /run. If the
396 * runtime file is open, we write to it exclusively, in order
397 * to guarantee proper order as soon as we flush /run to
398 * /var and close the runtime file. */
400 if (s
->runtime_journal
)
401 return s
->runtime_journal
;
403 if (uid
<= SYSTEM_UID_MAX
|| uid_is_dynamic(uid
))
404 return s
->system_journal
;
406 r
= sd_id128_get_machine(&machine
);
408 return s
->system_journal
;
410 f
= ordered_hashmap_get(s
->user_journals
, UID_TO_PTR(uid
));
414 if (asprintf(&p
, "/var/log/journal/" SD_ID128_FORMAT_STR
"/user-"UID_FMT
".journal",
415 SD_ID128_FORMAT_VAL(machine
), uid
) < 0)
416 return s
->system_journal
;
418 while (ordered_hashmap_size(s
->user_journals
) >= USER_JOURNALS_MAX
) {
419 /* Too many open? Then let's close one */
420 f
= ordered_hashmap_steal_first(s
->user_journals
);
422 (void) journal_file_close(f
);
425 r
= open_journal(s
, true, p
, O_RDWR
|O_CREAT
, s
->seal
, &s
->system_storage
.metrics
, &f
);
427 return s
->system_journal
;
429 server_add_acls(f
, uid
);
431 r
= ordered_hashmap_put(s
->user_journals
, UID_TO_PTR(uid
), f
);
433 (void) journal_file_close(f
);
434 return s
->system_journal
;
440 static int do_rotate(
453 r
= journal_file_rotate(f
, s
->compress
, seal
, s
->deferred_closes
);
456 log_error_errno(r
, "Failed to rotate %s: %m", (*f
)->path
);
458 log_error_errno(r
, "Failed to create new %s journal: %m", name
);
460 server_add_acls(*f
, uid
);
465 void server_rotate(Server
*s
) {
471 log_debug("Rotating...");
473 (void) do_rotate(s
, &s
->runtime_journal
, "runtime", false, 0);
474 (void) do_rotate(s
, &s
->system_journal
, "system", s
->seal
, 0);
476 ORDERED_HASHMAP_FOREACH_KEY(f
, k
, s
->user_journals
, i
) {
477 r
= do_rotate(s
, &f
, "user", s
->seal
, PTR_TO_UID(k
));
479 ordered_hashmap_replace(s
->user_journals
, k
, f
);
481 /* Old file has been closed and deallocated */
482 ordered_hashmap_remove(s
->user_journals
, k
);
485 /* Perform any deferred closes which aren't still offlining. */
486 SET_FOREACH(f
, s
->deferred_closes
, i
)
487 if (!journal_file_is_offlining(f
)) {
488 (void) set_remove(s
->deferred_closes
, f
);
489 (void) journal_file_close(f
);
493 void server_sync(Server
*s
) {
498 if (s
->system_journal
) {
499 r
= journal_file_set_offline(s
->system_journal
, false);
501 log_warning_errno(r
, "Failed to sync system journal, ignoring: %m");
504 ORDERED_HASHMAP_FOREACH(f
, s
->user_journals
, i
) {
505 r
= journal_file_set_offline(f
, false);
507 log_warning_errno(r
, "Failed to sync user journal, ignoring: %m");
510 if (s
->sync_event_source
) {
511 r
= sd_event_source_set_enabled(s
->sync_event_source
, SD_EVENT_OFF
);
513 log_error_errno(r
, "Failed to disable sync timer source: %m");
516 s
->sync_scheduled
= false;
519 static void do_vacuum(Server
*s
, JournalStorage
*storage
, bool verbose
) {
526 (void) cache_space_refresh(s
, storage
);
529 server_space_usage_message(s
, storage
);
531 r
= journal_directory_vacuum(storage
->path
, storage
->space
.limit
,
532 storage
->metrics
.n_max_files
, s
->max_retention_usec
,
533 &s
->oldest_file_usec
, verbose
);
534 if (r
< 0 && r
!= -ENOENT
)
535 log_warning_errno(r
, "Failed to vacuum %s, ignoring: %m", storage
->path
);
537 cache_space_invalidate(&storage
->space
);
540 int server_vacuum(Server
*s
, bool verbose
) {
543 log_debug("Vacuuming...");
545 s
->oldest_file_usec
= 0;
547 if (s
->system_journal
)
548 do_vacuum(s
, &s
->system_storage
, verbose
);
549 if (s
->runtime_journal
)
550 do_vacuum(s
, &s
->runtime_storage
, verbose
);
555 static void server_cache_machine_id(Server
*s
) {
561 r
= sd_id128_get_machine(&id
);
565 sd_id128_to_string(id
, stpcpy(s
->machine_id_field
, "_MACHINE_ID="));
568 static void server_cache_boot_id(Server
*s
) {
574 r
= sd_id128_get_boot(&id
);
578 sd_id128_to_string(id
, stpcpy(s
->boot_id_field
, "_BOOT_ID="));
581 static void server_cache_hostname(Server
*s
) {
582 _cleanup_free_
char *t
= NULL
;
587 t
= gethostname_malloc();
591 x
= strappend("_HOSTNAME=", t
);
595 free(s
->hostname_field
);
596 s
->hostname_field
= x
;
599 static bool shall_try_append_again(JournalFile
*f
, int r
) {
602 case -E2BIG
: /* Hit configured limit */
603 case -EFBIG
: /* Hit fs limit */
604 case -EDQUOT
: /* Quota limit hit */
605 case -ENOSPC
: /* Disk full */
606 log_debug("%s: Allocation limit reached, rotating.", f
->path
);
609 case -EIO
: /* I/O error of some kind (mmap) */
610 log_warning("%s: IO error, rotating.", f
->path
);
613 case -EHOSTDOWN
: /* Other machine */
614 log_info("%s: Journal file from other machine, rotating.", f
->path
);
617 case -EBUSY
: /* Unclean shutdown */
618 log_info("%s: Unclean shutdown, rotating.", f
->path
);
621 case -EPROTONOSUPPORT
: /* Unsupported feature */
622 log_info("%s: Unsupported feature, rotating.", f
->path
);
625 case -EBADMSG
: /* Corrupted */
626 case -ENODATA
: /* Truncated */
627 case -ESHUTDOWN
: /* Already archived */
628 log_warning("%s: Journal file corrupted, rotating.", f
->path
);
631 case -EIDRM
: /* Journal file has been deleted */
632 log_warning("%s: Journal file has been deleted, rotating.", f
->path
);
635 case -ETXTBSY
: /* Journal file is from the future */
636 log_warning("%s: Journal file is from the future, rotating.", f
->path
);
644 static void write_to_journal(Server
*s
, uid_t uid
, struct iovec
*iovec
, unsigned n
, int priority
) {
645 bool vacuumed
= false, rotate
= false;
646 struct dual_timestamp ts
;
654 /* Get the closest, linearized time we have for this log event from the event loop. (Note that we do not use
655 * the source time, and not even the time the event was originally seen, but instead simply the time we started
656 * processing it, as we want strictly linear ordering in what we write out.) */
657 assert_se(sd_event_now(s
->event
, CLOCK_REALTIME
, &ts
.realtime
) >= 0);
658 assert_se(sd_event_now(s
->event
, CLOCK_MONOTONIC
, &ts
.monotonic
) >= 0);
660 if (ts
.realtime
< s
->last_realtime_clock
) {
661 /* When the time jumps backwards, let's immediately rotate. Of course, this should not happen during
662 * regular operation. However, when it does happen, then we should make sure that we start fresh files
663 * to ensure that the entries in the journal files are strictly ordered by time, in order to ensure
664 * bisection works correctly. */
666 log_debug("Time jumped backwards, rotating.");
670 f
= find_journal(s
, uid
);
674 if (journal_file_rotate_suggested(f
, s
->max_file_usec
)) {
675 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f
->path
);
682 server_vacuum(s
, false);
685 f
= find_journal(s
, uid
);
690 s
->last_realtime_clock
= ts
.realtime
;
692 r
= journal_file_append_entry(f
, &ts
, iovec
, n
, &s
->seqnum
, NULL
, NULL
);
694 server_schedule_sync(s
, priority
);
698 if (vacuumed
|| !shall_try_append_again(f
, r
)) {
699 log_error_errno(r
, "Failed to write entry (%d items, %zu bytes), ignoring: %m", n
, IOVEC_TOTAL_SIZE(iovec
, n
));
704 server_vacuum(s
, false);
706 f
= find_journal(s
, uid
);
710 log_debug("Retrying write.");
711 r
= journal_file_append_entry(f
, &ts
, iovec
, n
, &s
->seqnum
, NULL
, NULL
);
713 log_error_errno(r
, "Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %m", n
, IOVEC_TOTAL_SIZE(iovec
, n
));
715 server_schedule_sync(s
, priority
);
718 #define IOVEC_ADD_NUMERIC_FIELD(iovec, n, value, type, isset, format, field) \
719 if (isset(value)) { \
721 k = newa(char, strlen(field "=") + DECIMAL_STR_MAX(type) + 1); \
722 sprintf(k, field "=" format, value); \
723 IOVEC_SET_STRING(iovec[n++], k); \
726 #define IOVEC_ADD_STRING_FIELD(iovec, n, value, field) \
727 if (!isempty(value)) { \
729 k = strjoina(field "=", value); \
730 IOVEC_SET_STRING(iovec[n++], k); \
733 #define IOVEC_ADD_ID128_FIELD(iovec, n, value, field) \
734 if (!sd_id128_is_null(value)) { \
736 k = newa(char, strlen(field "=") + SD_ID128_STRING_MAX); \
737 sd_id128_to_string(value, stpcpy(k, field "=")); \
738 IOVEC_SET_STRING(iovec[n++], k); \
741 #define IOVEC_ADD_SIZED_FIELD(iovec, n, value, value_size, field) \
742 if (value_size > 0) { \
744 k = newa(char, strlen(field "=") + value_size + 1); \
745 *((char*) mempcpy(stpcpy(k, field "="), value, value_size)) = 0; \
746 IOVEC_SET_STRING(iovec[n++], k); \
749 static void dispatch_message_real(
751 struct iovec
*iovec
, unsigned n
, unsigned m
,
752 const ClientContext
*c
,
753 const struct timeval
*tv
,
757 char source_time
[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t
)];
764 assert(n
+ N_IOVEC_META_FIELDS
+ (pid_is_valid(object_pid
) ? N_IOVEC_OBJECT_FIELDS
: 0) <= m
);
767 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->pid
, pid_t
, pid_is_valid
, PID_FMT
, "_PID");
768 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->uid
, uid_t
, uid_is_valid
, UID_FMT
, "_UID");
769 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->gid
, gid_t
, gid_is_valid
, GID_FMT
, "_GID");
771 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->comm
, "_COMM");
772 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->exe
, "_EXE");
773 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->cmdline
, "_CMDLINE");
774 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->capeff
, "_CAP_EFFECTIVE");
776 IOVEC_ADD_SIZED_FIELD(iovec
, n
, c
->label
, c
->label_size
, "_SELINUX_CONTEXT");
778 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->auditid
, uint32_t, audit_session_is_valid
, "%" PRIu32
, "_AUDIT_SESSION");
779 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->loginuid
, uid_t
, uid_is_valid
, UID_FMT
, "_AUDIT_LOGINUID");
781 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->cgroup
, "_SYSTEMD_CGROUP");
782 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->session
, "_SYSTEMD_SESSION");
783 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->owner_uid
, uid_t
, uid_is_valid
, UID_FMT
, "_SYSTEMD_OWNER_UID");
784 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->unit
, "_SYSTEMD_UNIT");
785 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->user_unit
, "_SYSTEMD_USER_UNIT");
786 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->slice
, "_SYSTEMD_SLICE");
787 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->user_slice
, "_SYSTEMD_USER_SLICE");
789 IOVEC_ADD_ID128_FIELD(iovec
, n
, c
->invocation_id
, "_SYSTEMD_INVOCATION_ID");
794 if (pid_is_valid(object_pid
) && client_context_get(s
, object_pid
, NULL
, NULL
, 0, NULL
, &o
) >= 0) {
796 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->pid
, pid_t
, pid_is_valid
, PID_FMT
, "OBJECT_PID");
797 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->uid
, uid_t
, uid_is_valid
, UID_FMT
, "OBJECT_UID");
798 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->gid
, gid_t
, gid_is_valid
, GID_FMT
, "OBJECT_GID");
800 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->comm
, "OBJECT_COMM");
801 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->exe
, "OBJECT_EXE");
802 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->cmdline
, "OBJECT_CMDLINE");
803 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->capeff
, "OBJECT_CAP_EFFECTIVE");
805 IOVEC_ADD_SIZED_FIELD(iovec
, n
, o
->label
, o
->label_size
, "OBJECT_SELINUX_CONTEXT");
807 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->auditid
, uint32_t, audit_session_is_valid
, "%" PRIu32
, "OBJECT_AUDIT_SESSION");
808 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->loginuid
, uid_t
, uid_is_valid
, UID_FMT
, "OBJECT_AUDIT_LOGINUID");
810 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->cgroup
, "OBJECT_SYSTEMD_CGROUP");
811 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->session
, "OBJECT_SYSTEMD_SESSION");
812 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->owner_uid
, uid_t
, uid_is_valid
, UID_FMT
, "OBJECT_SYSTEMD_OWNER_UID");
813 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->unit
, "OBJECT_SYSTEMD_UNIT");
814 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->user_unit
, "OBJECT_SYSTEMD_USER_UNIT");
815 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->slice
, "OBJECT_SYSTEMD_SLICE");
816 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->user_slice
, "OBJECT_SYSTEMD_USER_SLICE");
818 IOVEC_ADD_ID128_FIELD(iovec
, n
, o
->invocation_id
, "OBJECT_SYSTEMD_INVOCATION_ID=");
824 sprintf(source_time
, "_SOURCE_REALTIME_TIMESTAMP=" USEC_FMT
, timeval_load(tv
));
825 IOVEC_SET_STRING(iovec
[n
++], source_time
);
828 /* Note that strictly speaking storing the boot id here is
829 * redundant since the entry includes this in-line
830 * anyway. However, we need this indexed, too. */
831 if (!isempty(s
->boot_id_field
))
832 IOVEC_SET_STRING(iovec
[n
++], s
->boot_id_field
);
834 if (!isempty(s
->machine_id_field
))
835 IOVEC_SET_STRING(iovec
[n
++], s
->machine_id_field
);
837 if (!isempty(s
->hostname_field
))
838 IOVEC_SET_STRING(iovec
[n
++], s
->hostname_field
);
842 if (s
->split_mode
== SPLIT_UID
&& c
&& uid_is_valid(c
->uid
))
843 /* Split up strictly by (non-root) UID */
844 journal_uid
= c
->uid
;
845 else if (s
->split_mode
== SPLIT_LOGIN
&& c
&& c
->uid
> 0 && uid_is_valid(c
->owner_uid
))
846 /* Split up by login UIDs. We do this only if the
847 * realuid is not root, in order not to accidentally
848 * leak privileged information to the user that is
849 * logged by a privileged process that is part of an
850 * unprivileged session. */
851 journal_uid
= c
->owner_uid
;
855 write_to_journal(s
, journal_uid
, iovec
, n
, priority
);
858 void server_driver_message(Server
*s
, const char *message_id
, const char *format
, ...) {
860 struct iovec iovec
[N_IOVEC_META_FIELDS
+ 5 + N_IOVEC_PAYLOAD_FIELDS
];
868 assert_cc(3 == LOG_FAC(LOG_DAEMON
));
869 IOVEC_SET_STRING(iovec
[n
++], "SYSLOG_FACILITY=3");
870 IOVEC_SET_STRING(iovec
[n
++], "SYSLOG_IDENTIFIER=systemd-journald");
872 IOVEC_SET_STRING(iovec
[n
++], "_TRANSPORT=driver");
873 assert_cc(6 == LOG_INFO
);
874 IOVEC_SET_STRING(iovec
[n
++], "PRIORITY=6");
877 IOVEC_SET_STRING(iovec
[n
++], message_id
);
880 va_start(ap
, format
);
881 r
= log_format_iovec(iovec
, ELEMENTSOF(iovec
), &n
, false, 0, format
, ap
);
882 /* Error handling below */
886 dispatch_message_real(s
, iovec
, n
, ELEMENTSOF(iovec
), s
->my_context
, NULL
, LOG_INFO
, 0);
889 free(iovec
[m
++].iov_base
);
892 /* We failed to format the message. Emit a warning instead. */
895 xsprintf(buf
, "MESSAGE=Entry printing failed: %s", strerror(-r
));
898 IOVEC_SET_STRING(iovec
[n
++], "PRIORITY=4");
899 IOVEC_SET_STRING(iovec
[n
++], buf
);
900 dispatch_message_real(s
, iovec
, n
, ELEMENTSOF(iovec
), s
->my_context
, NULL
, LOG_INFO
, 0);
904 void server_dispatch_message(
906 struct iovec
*iovec
, unsigned n
, unsigned m
,
908 const struct timeval
*tv
,
912 uint64_t available
= 0;
916 assert(iovec
|| n
== 0);
921 if (LOG_PRI(priority
) > s
->max_level_store
)
924 /* Stop early in case the information will not be stored
926 if (s
->storage
== STORAGE_NONE
)
930 (void) determine_space(s
, &available
, NULL
);
932 rl
= journal_rate_limit_test(s
->rate_limit
, c
->unit
, priority
& LOG_PRIMASK
, available
);
936 /* Write a suppression message if we suppressed something */
938 server_driver_message(s
, "MESSAGE_ID=" SD_MESSAGE_JOURNAL_DROPPED_STR
,
939 LOG_MESSAGE("Suppressed %u messages from %s", rl
- 1, c
->unit
),
943 dispatch_message_real(s
, iovec
, n
, m
, c
, tv
, priority
, object_pid
);
946 int server_flush_to_var(Server
*s
, bool require_flag_file
) {
948 sd_journal
*j
= NULL
;
949 char ts
[FORMAT_TIMESPAN_MAX
];
956 if (!IN_SET(s
->storage
, STORAGE_AUTO
, STORAGE_PERSISTENT
))
959 if (!s
->runtime_journal
)
962 if (require_flag_file
&& !flushed_flag_is_set())
965 (void) system_journal_open(s
, true);
967 if (!s
->system_journal
)
970 log_debug("Flushing to /var...");
972 start
= now(CLOCK_MONOTONIC
);
974 r
= sd_id128_get_machine(&machine
);
978 r
= sd_journal_open(&j
, SD_JOURNAL_RUNTIME_ONLY
);
980 return log_error_errno(r
, "Failed to read runtime journal: %m");
982 sd_journal_set_data_threshold(j
, 0);
984 SD_JOURNAL_FOREACH(j
) {
989 assert(f
&& f
->current_offset
> 0);
993 r
= journal_file_move_to_object(f
, OBJECT_ENTRY
, f
->current_offset
, &o
);
995 log_error_errno(r
, "Can't read entry: %m");
999 r
= journal_file_copy_entry(f
, s
->system_journal
, o
, f
->current_offset
, NULL
, NULL
, NULL
);
1003 if (!shall_try_append_again(s
->system_journal
, r
)) {
1004 log_error_errno(r
, "Can't write entry: %m");
1009 server_vacuum(s
, false);
1011 if (!s
->system_journal
) {
1012 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1017 log_debug("Retrying write.");
1018 r
= journal_file_copy_entry(f
, s
->system_journal
, o
, f
->current_offset
, NULL
, NULL
, NULL
);
1020 log_error_errno(r
, "Can't write entry: %m");
1028 journal_file_post_change(s
->system_journal
);
1030 s
->runtime_journal
= journal_file_close(s
->runtime_journal
);
1033 (void) rm_rf("/run/log/journal", REMOVE_ROOT
);
1035 sd_journal_close(j
);
1037 server_driver_message(s
, NULL
,
1038 LOG_MESSAGE("Time spent on flushing to /var is %s for %u entries.",
1039 format_timespan(ts
, sizeof(ts
), now(CLOCK_MONOTONIC
) - start
, 0),
1046 int server_process_datagram(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1047 Server
*s
= userdata
;
1048 struct ucred
*ucred
= NULL
;
1049 struct timeval
*tv
= NULL
;
1050 struct cmsghdr
*cmsg
;
1052 size_t label_len
= 0, m
;
1055 int *fds
= NULL
, v
= 0;
1059 struct cmsghdr cmsghdr
;
1061 /* We use NAME_MAX space for the SELinux label
1062 * here. The kernel currently enforces no
1063 * limit, but according to suggestions from
1064 * the SELinux people this will change and it
1065 * will probably be identical to NAME_MAX. For
1066 * now we use that, but this should be updated
1067 * one day when the final limit is known. */
1068 uint8_t buf
[CMSG_SPACE(sizeof(struct ucred
)) +
1069 CMSG_SPACE(sizeof(struct timeval
)) +
1070 CMSG_SPACE(sizeof(int)) + /* fd */
1071 CMSG_SPACE(NAME_MAX
)]; /* selinux label */
1074 union sockaddr_union sa
= {};
1076 struct msghdr msghdr
= {
1079 .msg_control
= &control
,
1080 .msg_controllen
= sizeof(control
),
1082 .msg_namelen
= sizeof(sa
),
1086 assert(fd
== s
->native_fd
|| fd
== s
->syslog_fd
|| fd
== s
->audit_fd
);
1088 if (revents
!= EPOLLIN
) {
1089 log_error("Got invalid event from epoll for datagram fd: %"PRIx32
, revents
);
1093 /* Try to get the right size, if we can. (Not all sockets support SIOCINQ, hence we just try, but don't rely on
1095 (void) ioctl(fd
, SIOCINQ
, &v
);
1097 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1098 m
= PAGE_ALIGN(MAX3((size_t) v
+ 1,
1100 ALIGN(sizeof(struct nlmsghdr
)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH
)) + 1);
1102 if (!GREEDY_REALLOC(s
->buffer
, s
->buffer_size
, m
))
1105 iovec
.iov_base
= s
->buffer
;
1106 iovec
.iov_len
= s
->buffer_size
- 1; /* Leave room for trailing NUL we add later */
1108 n
= recvmsg(fd
, &msghdr
, MSG_DONTWAIT
|MSG_CMSG_CLOEXEC
);
1110 if (errno
== EINTR
|| errno
== EAGAIN
)
1113 return log_error_errno(errno
, "recvmsg() failed: %m");
1116 CMSG_FOREACH(cmsg
, &msghdr
) {
1118 if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1119 cmsg
->cmsg_type
== SCM_CREDENTIALS
&&
1120 cmsg
->cmsg_len
== CMSG_LEN(sizeof(struct ucred
)))
1121 ucred
= (struct ucred
*) CMSG_DATA(cmsg
);
1122 else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1123 cmsg
->cmsg_type
== SCM_SECURITY
) {
1124 label
= (char*) CMSG_DATA(cmsg
);
1125 label_len
= cmsg
->cmsg_len
- CMSG_LEN(0);
1126 } else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1127 cmsg
->cmsg_type
== SO_TIMESTAMP
&&
1128 cmsg
->cmsg_len
== CMSG_LEN(sizeof(struct timeval
)))
1129 tv
= (struct timeval
*) CMSG_DATA(cmsg
);
1130 else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1131 cmsg
->cmsg_type
== SCM_RIGHTS
) {
1132 fds
= (int*) CMSG_DATA(cmsg
);
1133 n_fds
= (cmsg
->cmsg_len
- CMSG_LEN(0)) / sizeof(int);
1137 /* And a trailing NUL, just in case */
1140 if (fd
== s
->syslog_fd
) {
1141 if (n
> 0 && n_fds
== 0)
1142 server_process_syslog_message(s
, strstrip(s
->buffer
), ucred
, tv
, label
, label_len
);
1144 log_warning("Got file descriptors via syslog socket. Ignoring.");
1146 } else if (fd
== s
->native_fd
) {
1147 if (n
> 0 && n_fds
== 0)
1148 server_process_native_message(s
, s
->buffer
, n
, ucred
, tv
, label
, label_len
);
1149 else if (n
== 0 && n_fds
== 1)
1150 server_process_native_file(s
, fds
[0], ucred
, tv
, label
, label_len
);
1152 log_warning("Got too many file descriptors via native socket. Ignoring.");
1155 assert(fd
== s
->audit_fd
);
1157 if (n
> 0 && n_fds
== 0)
1158 server_process_audit_message(s
, s
->buffer
, n
, ucred
, &sa
, msghdr
.msg_namelen
);
1160 log_warning("Got file descriptors via audit socket. Ignoring.");
1163 close_many(fds
, n_fds
);
1167 static int dispatch_sigusr1(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1168 Server
*s
= userdata
;
1173 log_info("Received request to flush runtime journal from PID " PID_FMT
, si
->ssi_pid
);
1175 (void) server_flush_to_var(s
, false);
1177 server_vacuum(s
, false);
1179 r
= touch("/run/systemd/journal/flushed");
1181 log_warning_errno(r
, "Failed to touch /run/systemd/journal/flushed, ignoring: %m");
1183 server_space_usage_message(s
, NULL
);
1187 static int dispatch_sigusr2(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1188 Server
*s
= userdata
;
1193 log_info("Received request to rotate journal from PID " PID_FMT
, si
->ssi_pid
);
1195 server_vacuum(s
, true);
1197 if (s
->system_journal
)
1198 patch_min_use(&s
->system_storage
);
1199 if (s
->runtime_journal
)
1200 patch_min_use(&s
->runtime_storage
);
1202 /* Let clients know when the most recent rotation happened. */
1203 r
= write_timestamp_file_atomic("/run/systemd/journal/rotated", now(CLOCK_MONOTONIC
));
1205 log_warning_errno(r
, "Failed to write /run/systemd/journal/rotated, ignoring: %m");
1210 static int dispatch_sigterm(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1211 Server
*s
= userdata
;
1215 log_received_signal(LOG_INFO
, si
);
1217 sd_event_exit(s
->event
, 0);
1221 static int dispatch_sigrtmin1(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1222 Server
*s
= userdata
;
1227 log_debug("Received request to sync from PID " PID_FMT
, si
->ssi_pid
);
1231 /* Let clients know when the most recent sync happened. */
1232 r
= write_timestamp_file_atomic("/run/systemd/journal/synced", now(CLOCK_MONOTONIC
));
1234 log_warning_errno(r
, "Failed to write /run/systemd/journal/synced, ignoring: %m");
1239 static int setup_signals(Server
*s
) {
1244 assert_se(sigprocmask_many(SIG_SETMASK
, NULL
, SIGINT
, SIGTERM
, SIGUSR1
, SIGUSR2
, SIGRTMIN
+1, -1) >= 0);
1246 r
= sd_event_add_signal(s
->event
, &s
->sigusr1_event_source
, SIGUSR1
, dispatch_sigusr1
, s
);
1250 r
= sd_event_add_signal(s
->event
, &s
->sigusr2_event_source
, SIGUSR2
, dispatch_sigusr2
, s
);
1254 r
= sd_event_add_signal(s
->event
, &s
->sigterm_event_source
, SIGTERM
, dispatch_sigterm
, s
);
1258 /* Let's process SIGTERM late, so that we flush all queued
1259 * messages to disk before we exit */
1260 r
= sd_event_source_set_priority(s
->sigterm_event_source
, SD_EVENT_PRIORITY_NORMAL
+20);
1264 /* When journald is invoked on the terminal (when debugging),
1265 * it's useful if C-c is handled equivalent to SIGTERM. */
1266 r
= sd_event_add_signal(s
->event
, &s
->sigint_event_source
, SIGINT
, dispatch_sigterm
, s
);
1270 r
= sd_event_source_set_priority(s
->sigint_event_source
, SD_EVENT_PRIORITY_NORMAL
+20);
1274 /* SIGRTMIN+1 causes an immediate sync. We process this very
1275 * late, so that everything else queued at this point is
1276 * really written to disk. Clients can watch
1277 * /run/systemd/journal/synced with inotify until its mtime
1278 * changes to see when a sync happened. */
1279 r
= sd_event_add_signal(s
->event
, &s
->sigrtmin1_event_source
, SIGRTMIN
+1, dispatch_sigrtmin1
, s
);
1283 r
= sd_event_source_set_priority(s
->sigrtmin1_event_source
, SD_EVENT_PRIORITY_NORMAL
+15);
1290 static int parse_proc_cmdline_item(const char *key
, const char *value
, void *data
) {
1296 if (proc_cmdline_key_streq(key
, "systemd.journald.forward_to_syslog")) {
1298 r
= value
? parse_boolean(value
) : true;
1300 log_warning("Failed to parse forward to syslog switch \"%s\". Ignoring.", value
);
1302 s
->forward_to_syslog
= r
;
1304 } else if (proc_cmdline_key_streq(key
, "systemd.journald.forward_to_kmsg")) {
1306 r
= value
? parse_boolean(value
) : true;
1308 log_warning("Failed to parse forward to kmsg switch \"%s\". Ignoring.", value
);
1310 s
->forward_to_kmsg
= r
;
1312 } else if (proc_cmdline_key_streq(key
, "systemd.journald.forward_to_console")) {
1314 r
= value
? parse_boolean(value
) : true;
1316 log_warning("Failed to parse forward to console switch \"%s\". Ignoring.", value
);
1318 s
->forward_to_console
= r
;
1320 } else if (proc_cmdline_key_streq(key
, "systemd.journald.forward_to_wall")) {
1322 r
= value
? parse_boolean(value
) : true;
1324 log_warning("Failed to parse forward to wall switch \"%s\". Ignoring.", value
);
1326 s
->forward_to_wall
= r
;
1328 } else if (proc_cmdline_key_streq(key
, "systemd.journald.max_level_console")) {
1330 if (proc_cmdline_value_missing(key
, value
))
1333 r
= log_level_from_string(value
);
1335 log_warning("Failed to parse max level console value \"%s\". Ignoring.", value
);
1337 s
->max_level_console
= r
;
1339 } else if (proc_cmdline_key_streq(key
, "systemd.journald.max_level_store")) {
1341 if (proc_cmdline_value_missing(key
, value
))
1344 r
= log_level_from_string(value
);
1346 log_warning("Failed to parse max level store value \"%s\". Ignoring.", value
);
1348 s
->max_level_store
= r
;
1350 } else if (proc_cmdline_key_streq(key
, "systemd.journald.max_level_syslog")) {
1352 if (proc_cmdline_value_missing(key
, value
))
1355 r
= log_level_from_string(value
);
1357 log_warning("Failed to parse max level syslog value \"%s\". Ignoring.", value
);
1359 s
->max_level_syslog
= r
;
1361 } else if (proc_cmdline_key_streq(key
, "systemd.journald.max_level_kmsg")) {
1363 if (proc_cmdline_value_missing(key
, value
))
1366 r
= log_level_from_string(value
);
1368 log_warning("Failed to parse max level kmsg value \"%s\". Ignoring.", value
);
1370 s
->max_level_kmsg
= r
;
1372 } else if (proc_cmdline_key_streq(key
, "systemd.journald.max_level_wall")) {
1374 if (proc_cmdline_value_missing(key
, value
))
1377 r
= log_level_from_string(value
);
1379 log_warning("Failed to parse max level wall value \"%s\". Ignoring.", value
);
1381 s
->max_level_wall
= r
;
1383 } else if (startswith(key
, "systemd.journald"))
1384 log_warning("Unknown journald kernel command line option \"%s\". Ignoring.", key
);
1386 /* do not warn about state here, since probably systemd already did */
1390 static int server_parse_config_file(Server
*s
) {
1393 return config_parse_many_nulstr(PKGSYSCONFDIR
"/journald.conf",
1394 CONF_PATHS_NULSTR("systemd/journald.conf.d"),
1396 config_item_perf_lookup
, journald_gperf_lookup
,
1400 static int server_dispatch_sync(sd_event_source
*es
, usec_t t
, void *userdata
) {
1401 Server
*s
= userdata
;
1409 int server_schedule_sync(Server
*s
, int priority
) {
1414 if (priority
<= LOG_CRIT
) {
1415 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1420 if (s
->sync_scheduled
)
1423 if (s
->sync_interval_usec
> 0) {
1426 r
= sd_event_now(s
->event
, CLOCK_MONOTONIC
, &when
);
1430 when
+= s
->sync_interval_usec
;
1432 if (!s
->sync_event_source
) {
1433 r
= sd_event_add_time(
1435 &s
->sync_event_source
,
1438 server_dispatch_sync
, s
);
1442 r
= sd_event_source_set_priority(s
->sync_event_source
, SD_EVENT_PRIORITY_IMPORTANT
);
1444 r
= sd_event_source_set_time(s
->sync_event_source
, when
);
1448 r
= sd_event_source_set_enabled(s
->sync_event_source
, SD_EVENT_ONESHOT
);
1453 s
->sync_scheduled
= true;
1459 static int dispatch_hostname_change(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1460 Server
*s
= userdata
;
1464 server_cache_hostname(s
);
1468 static int server_open_hostname(Server
*s
) {
1473 s
->hostname_fd
= open("/proc/sys/kernel/hostname", O_RDONLY
|O_CLOEXEC
|O_NDELAY
|O_NOCTTY
);
1474 if (s
->hostname_fd
< 0)
1475 return log_error_errno(errno
, "Failed to open /proc/sys/kernel/hostname: %m");
1477 r
= sd_event_add_io(s
->event
, &s
->hostname_event_source
, s
->hostname_fd
, 0, dispatch_hostname_change
, s
);
1479 /* kernels prior to 3.2 don't support polling this file. Ignore
1482 log_warning_errno(r
, "Failed to register hostname fd in event loop, ignoring: %m");
1483 s
->hostname_fd
= safe_close(s
->hostname_fd
);
1487 return log_error_errno(r
, "Failed to register hostname fd in event loop: %m");
1490 r
= sd_event_source_set_priority(s
->hostname_event_source
, SD_EVENT_PRIORITY_IMPORTANT
-10);
1492 return log_error_errno(r
, "Failed to adjust priority of host name event source: %m");
1497 static int dispatch_notify_event(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1498 Server
*s
= userdata
;
1502 assert(s
->notify_event_source
== es
);
1503 assert(s
->notify_fd
== fd
);
1505 /* The $NOTIFY_SOCKET is writable again, now send exactly one
1506 * message on it. Either it's the watchdog event, the initial
1507 * READY=1 event or an stdout stream event. If there's nothing
1508 * to write anymore, turn our event source off. The next time
1509 * there's something to send it will be turned on again. */
1511 if (!s
->sent_notify_ready
) {
1512 static const char p
[] =
1514 "STATUS=Processing requests...";
1517 l
= send(s
->notify_fd
, p
, strlen(p
), MSG_DONTWAIT
);
1519 if (errno
== EAGAIN
)
1522 return log_error_errno(errno
, "Failed to send READY=1 notification message: %m");
1525 s
->sent_notify_ready
= true;
1526 log_debug("Sent READY=1 notification.");
1528 } else if (s
->send_watchdog
) {
1530 static const char p
[] =
1535 l
= send(s
->notify_fd
, p
, strlen(p
), MSG_DONTWAIT
);
1537 if (errno
== EAGAIN
)
1540 return log_error_errno(errno
, "Failed to send WATCHDOG=1 notification message: %m");
1543 s
->send_watchdog
= false;
1544 log_debug("Sent WATCHDOG=1 notification.");
1546 } else if (s
->stdout_streams_notify_queue
)
1547 /* Dispatch one stream notification event */
1548 stdout_stream_send_notify(s
->stdout_streams_notify_queue
);
1550 /* Leave us enabled if there's still more to do. */
1551 if (s
->send_watchdog
|| s
->stdout_streams_notify_queue
)
1554 /* There was nothing to do anymore, let's turn ourselves off. */
1555 r
= sd_event_source_set_enabled(es
, SD_EVENT_OFF
);
1557 return log_error_errno(r
, "Failed to turn off notify event source: %m");
1562 static int dispatch_watchdog(sd_event_source
*es
, uint64_t usec
, void *userdata
) {
1563 Server
*s
= userdata
;
1568 s
->send_watchdog
= true;
1570 r
= sd_event_source_set_enabled(s
->notify_event_source
, SD_EVENT_ON
);
1572 log_warning_errno(r
, "Failed to turn on notify event source: %m");
1574 r
= sd_event_source_set_time(s
->watchdog_event_source
, usec
+ s
->watchdog_usec
/ 2);
1576 return log_error_errno(r
, "Failed to restart watchdog event source: %m");
1578 r
= sd_event_source_set_enabled(s
->watchdog_event_source
, SD_EVENT_ON
);
1580 return log_error_errno(r
, "Failed to enable watchdog event source: %m");
1585 static int server_connect_notify(Server
*s
) {
1586 union sockaddr_union sa
= {
1587 .un
.sun_family
= AF_UNIX
,
1593 assert(s
->notify_fd
< 0);
1594 assert(!s
->notify_event_source
);
1597 So here's the problem: we'd like to send notification
1598 messages to PID 1, but we cannot do that via sd_notify(),
1599 since that's synchronous, and we might end up blocking on
1600 it. Specifically: given that PID 1 might block on
1601 dbus-daemon during IPC, and dbus-daemon is logging to us,
1602 and might hence block on us, we might end up in a deadlock
1603 if we block on sending PID 1 notification messages — by
1604 generating a full blocking circle. To avoid this, let's
1605 create a non-blocking socket, and connect it to the
1606 notification socket, and then wait for POLLOUT before we
1607 send anything. This should efficiently avoid any deadlocks,
1608 as we'll never block on PID 1, hence PID 1 can safely block
1609 on dbus-daemon which can safely block on us again.
1611 Don't think that this issue is real? It is, see:
1612 https://github.com/systemd/systemd/issues/1505
1615 e
= getenv("NOTIFY_SOCKET");
1619 if ((e
[0] != '@' && e
[0] != '/') || e
[1] == 0) {
1620 log_error("NOTIFY_SOCKET set to an invalid value: %s", e
);
1624 if (strlen(e
) > sizeof(sa
.un
.sun_path
)) {
1625 log_error("NOTIFY_SOCKET path too long: %s", e
);
1629 s
->notify_fd
= socket(AF_UNIX
, SOCK_DGRAM
|SOCK_CLOEXEC
|SOCK_NONBLOCK
, 0);
1630 if (s
->notify_fd
< 0)
1631 return log_error_errno(errno
, "Failed to create notify socket: %m");
1633 (void) fd_inc_sndbuf(s
->notify_fd
, NOTIFY_SNDBUF_SIZE
);
1635 strncpy(sa
.un
.sun_path
, e
, sizeof(sa
.un
.sun_path
));
1636 if (sa
.un
.sun_path
[0] == '@')
1637 sa
.un
.sun_path
[0] = 0;
1639 r
= connect(s
->notify_fd
, &sa
.sa
, SOCKADDR_UN_LEN(sa
.un
));
1641 return log_error_errno(errno
, "Failed to connect to notify socket: %m");
1643 r
= sd_event_add_io(s
->event
, &s
->notify_event_source
, s
->notify_fd
, EPOLLOUT
, dispatch_notify_event
, s
);
1645 return log_error_errno(r
, "Failed to watch notification socket: %m");
1647 if (sd_watchdog_enabled(false, &s
->watchdog_usec
) > 0) {
1648 s
->send_watchdog
= true;
1650 r
= sd_event_add_time(s
->event
, &s
->watchdog_event_source
, CLOCK_MONOTONIC
, now(CLOCK_MONOTONIC
) + s
->watchdog_usec
/2, s
->watchdog_usec
/4, dispatch_watchdog
, s
);
1652 return log_error_errno(r
, "Failed to add watchdog time event: %m");
1655 /* This should fire pretty soon, which we'll use to send the
1661 int server_init(Server
*s
) {
1662 _cleanup_fdset_free_ FDSet
*fds
= NULL
;
1669 s
->syslog_fd
= s
->native_fd
= s
->stdout_fd
= s
->dev_kmsg_fd
= s
->audit_fd
= s
->hostname_fd
= s
->notify_fd
= -1;
1672 s
->read_kmsg
= true;
1674 s
->watchdog_usec
= USEC_INFINITY
;
1676 s
->sync_interval_usec
= DEFAULT_SYNC_INTERVAL_USEC
;
1677 s
->sync_scheduled
= false;
1679 s
->rate_limit_interval
= DEFAULT_RATE_LIMIT_INTERVAL
;
1680 s
->rate_limit_burst
= DEFAULT_RATE_LIMIT_BURST
;
1682 s
->forward_to_wall
= true;
1684 s
->max_file_usec
= DEFAULT_MAX_FILE_USEC
;
1686 s
->max_level_store
= LOG_DEBUG
;
1687 s
->max_level_syslog
= LOG_DEBUG
;
1688 s
->max_level_kmsg
= LOG_NOTICE
;
1689 s
->max_level_console
= LOG_INFO
;
1690 s
->max_level_wall
= LOG_EMERG
;
1692 journal_reset_metrics(&s
->system_storage
.metrics
);
1693 journal_reset_metrics(&s
->runtime_storage
.metrics
);
1695 server_parse_config_file(s
);
1697 r
= proc_cmdline_parse(parse_proc_cmdline_item
, s
, PROC_CMDLINE_STRIP_RD_PREFIX
);
1699 log_warning_errno(r
, "Failed to parse kernel command line, ignoring: %m");
1701 if (!!s
->rate_limit_interval
^ !!s
->rate_limit_burst
) {
1702 log_debug("Setting both rate limit interval and burst from "USEC_FMT
",%u to 0,0",
1703 s
->rate_limit_interval
, s
->rate_limit_burst
);
1704 s
->rate_limit_interval
= s
->rate_limit_burst
= 0;
1707 (void) mkdir_p("/run/systemd/journal", 0755);
1709 s
->user_journals
= ordered_hashmap_new(NULL
);
1710 if (!s
->user_journals
)
1713 s
->mmap
= mmap_cache_new();
1717 s
->deferred_closes
= set_new(NULL
);
1718 if (!s
->deferred_closes
)
1721 r
= sd_event_default(&s
->event
);
1723 return log_error_errno(r
, "Failed to create event loop: %m");
1725 n
= sd_listen_fds(true);
1727 return log_error_errno(n
, "Failed to read listening file descriptors from environment: %m");
1729 for (fd
= SD_LISTEN_FDS_START
; fd
< SD_LISTEN_FDS_START
+ n
; fd
++) {
1731 if (sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/run/systemd/journal/socket", 0) > 0) {
1733 if (s
->native_fd
>= 0) {
1734 log_error("Too many native sockets passed.");
1740 } else if (sd_is_socket_unix(fd
, SOCK_STREAM
, 1, "/run/systemd/journal/stdout", 0) > 0) {
1742 if (s
->stdout_fd
>= 0) {
1743 log_error("Too many stdout sockets passed.");
1749 } else if (sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/dev/log", 0) > 0 ||
1750 sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/run/systemd/journal/dev-log", 0) > 0) {
1752 if (s
->syslog_fd
>= 0) {
1753 log_error("Too many /dev/log sockets passed.");
1759 } else if (sd_is_socket(fd
, AF_NETLINK
, SOCK_RAW
, -1) > 0) {
1761 if (s
->audit_fd
>= 0) {
1762 log_error("Too many audit sockets passed.");
1776 r
= fdset_put(fds
, fd
);
1782 /* Try to restore streams, but don't bother if this fails */
1783 (void) server_restore_streams(s
, fds
);
1785 if (fdset_size(fds
) > 0) {
1786 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds
));
1787 fds
= fdset_free(fds
);
1790 no_sockets
= s
->native_fd
< 0 && s
->stdout_fd
< 0 && s
->syslog_fd
< 0 && s
->audit_fd
< 0;
1792 /* always open stdout, syslog, native, and kmsg sockets */
1794 /* systemd-journald.socket: /run/systemd/journal/stdout */
1795 r
= server_open_stdout_socket(s
);
1799 /* systemd-journald-dev-log.socket: /run/systemd/journal/dev-log */
1800 r
= server_open_syslog_socket(s
);
1804 /* systemd-journald.socket: /run/systemd/journal/socket */
1805 r
= server_open_native_socket(s
);
1810 r
= server_open_dev_kmsg(s
);
1814 /* Unless we got *some* sockets and not audit, open audit socket */
1815 if (s
->audit_fd
>= 0 || no_sockets
) {
1816 r
= server_open_audit(s
);
1821 r
= server_open_kernel_seqnum(s
);
1825 r
= server_open_hostname(s
);
1829 r
= setup_signals(s
);
1833 s
->udev
= udev_new();
1837 s
->rate_limit
= journal_rate_limit_new(s
->rate_limit_interval
, s
->rate_limit_burst
);
1841 r
= cg_get_root_path(&s
->cgroup_root
);
1845 server_cache_hostname(s
);
1846 server_cache_boot_id(s
);
1847 server_cache_machine_id(s
);
1849 s
->runtime_storage
.name
= "Runtime journal";
1850 s
->system_storage
.name
= "System journal";
1852 s
->runtime_storage
.path
= strjoin("/run/log/journal/", SERVER_MACHINE_ID(s
));
1853 s
->system_storage
.path
= strjoin("/var/log/journal/", SERVER_MACHINE_ID(s
));
1854 if (!s
->runtime_storage
.path
|| !s
->system_storage
.path
)
1857 (void) server_connect_notify(s
);
1859 (void) client_context_acquire_default(s
);
1861 return system_journal_open(s
, false);
1864 void server_maybe_append_tags(Server
*s
) {
1870 n
= now(CLOCK_REALTIME
);
1872 if (s
->system_journal
)
1873 journal_file_maybe_append_tag(s
->system_journal
, n
);
1875 ORDERED_HASHMAP_FOREACH(f
, s
->user_journals
, i
)
1876 journal_file_maybe_append_tag(f
, n
);
1880 void server_done(Server
*s
) {
1884 if (s
->deferred_closes
) {
1885 journal_file_close_set(s
->deferred_closes
);
1886 set_free(s
->deferred_closes
);
1889 while (s
->stdout_streams
)
1890 stdout_stream_free(s
->stdout_streams
);
1892 client_context_flush_all(s
);
1894 if (s
->system_journal
)
1895 (void) journal_file_close(s
->system_journal
);
1897 if (s
->runtime_journal
)
1898 (void) journal_file_close(s
->runtime_journal
);
1900 while ((f
= ordered_hashmap_steal_first(s
->user_journals
)))
1901 (void) journal_file_close(f
);
1903 ordered_hashmap_free(s
->user_journals
);
1905 sd_event_source_unref(s
->syslog_event_source
);
1906 sd_event_source_unref(s
->native_event_source
);
1907 sd_event_source_unref(s
->stdout_event_source
);
1908 sd_event_source_unref(s
->dev_kmsg_event_source
);
1909 sd_event_source_unref(s
->audit_event_source
);
1910 sd_event_source_unref(s
->sync_event_source
);
1911 sd_event_source_unref(s
->sigusr1_event_source
);
1912 sd_event_source_unref(s
->sigusr2_event_source
);
1913 sd_event_source_unref(s
->sigterm_event_source
);
1914 sd_event_source_unref(s
->sigint_event_source
);
1915 sd_event_source_unref(s
->sigrtmin1_event_source
);
1916 sd_event_source_unref(s
->hostname_event_source
);
1917 sd_event_source_unref(s
->notify_event_source
);
1918 sd_event_source_unref(s
->watchdog_event_source
);
1919 sd_event_unref(s
->event
);
1921 safe_close(s
->syslog_fd
);
1922 safe_close(s
->native_fd
);
1923 safe_close(s
->stdout_fd
);
1924 safe_close(s
->dev_kmsg_fd
);
1925 safe_close(s
->audit_fd
);
1926 safe_close(s
->hostname_fd
);
1927 safe_close(s
->notify_fd
);
1930 journal_rate_limit_free(s
->rate_limit
);
1932 if (s
->kernel_seqnum
)
1933 munmap(s
->kernel_seqnum
, sizeof(uint64_t));
1937 free(s
->cgroup_root
);
1938 free(s
->hostname_field
);
1939 free(s
->runtime_storage
.path
);
1940 free(s
->system_storage
.path
);
1943 mmap_cache_unref(s
->mmap
);
1945 udev_unref(s
->udev
);
1948 static const char* const storage_table
[_STORAGE_MAX
] = {
1949 [STORAGE_AUTO
] = "auto",
1950 [STORAGE_VOLATILE
] = "volatile",
1951 [STORAGE_PERSISTENT
] = "persistent",
1952 [STORAGE_NONE
] = "none"
1955 DEFINE_STRING_TABLE_LOOKUP(storage
, Storage
);
1956 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage
, storage
, Storage
, "Failed to parse storage setting");
1958 static const char* const split_mode_table
[_SPLIT_MAX
] = {
1959 [SPLIT_LOGIN
] = "login",
1960 [SPLIT_UID
] = "uid",
1961 [SPLIT_NONE
] = "none",
1964 DEFINE_STRING_TABLE_LOOKUP(split_mode
, SplitMode
);
1965 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode
, split_mode
, SplitMode
, "Failed to parse split mode setting");