1 /* SPDX-License-Identifier: LGPL-2.1+ */
4 #include <selinux/selinux.h>
8 #include <sys/signalfd.h>
9 #include <sys/statvfs.h>
10 #include <linux/sockios.h>
12 #include "sd-daemon.h"
13 #include "sd-journal.h"
14 #include "sd-messages.h"
17 #include "alloc-util.h"
18 #include "audit-util.h"
19 #include "cgroup-util.h"
20 #include "conf-parser.h"
21 #include "dirent-util.h"
22 #include "extract-word.h"
25 #include "format-util.h"
28 #include "hostname-util.h"
29 #include "id128-util.h"
31 #include "journal-authenticate.h"
32 #include "journal-file.h"
33 #include "journal-internal.h"
34 #include "journal-vacuum.h"
35 #include "journald-audit.h"
36 #include "journald-context.h"
37 #include "journald-kmsg.h"
38 #include "journald-native.h"
39 #include "journald-rate-limit.h"
40 #include "journald-server.h"
41 #include "journald-stream.h"
42 #include "journald-syslog.h"
44 #include "missing_audit.h"
46 #include "parse-util.h"
47 #include "path-util.h"
48 #include "proc-cmdline.h"
49 #include "process-util.h"
51 #include "selinux-util.h"
52 #include "signal-util.h"
53 #include "socket-util.h"
54 #include "stdio-util.h"
55 #include "string-table.h"
56 #include "string-util.h"
57 #include "syslog-util.h"
58 #include "user-util.h"
60 #define USER_JOURNALS_MAX 1024
62 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
63 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
64 #define DEFAULT_RATE_LIMIT_BURST 10000
65 #define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
67 #define RECHECK_SPACE_USEC (30*USEC_PER_SEC)
69 #define NOTIFY_SNDBUF_SIZE (8*1024*1024)
71 /* The period to insert between posting changes for coalescing */
72 #define POST_CHANGE_TIMER_INTERVAL_USEC (250*USEC_PER_MSEC)
74 /* Pick a good default that is likely to fit into AF_UNIX and AF_INET SOCK_DGRAM datagrams, and even leaves some room
75 * for a bit of additional metadata. */
76 #define DEFAULT_LINE_MAX (48*1024)
78 #define DEFERRED_CLOSES_MAX (4096)
80 #define IDLE_TIMEOUT_USEC (30*USEC_PER_SEC)
82 static int determine_path_usage(
88 _cleanup_closedir_
DIR *d
= NULL
;
99 return log_full_errno(errno
== ENOENT
? LOG_DEBUG
: LOG_ERR
,
100 errno
, "Failed to open %s: %m", path
);
102 if (fstatvfs(dirfd(d
), &ss
) < 0)
103 return log_error_errno(errno
, "Failed to fstatvfs(%s): %m", path
);
105 *ret_free
= ss
.f_bsize
* ss
.f_bavail
;
107 FOREACH_DIRENT_ALL(de
, d
, break) {
110 if (!endswith(de
->d_name
, ".journal") &&
111 !endswith(de
->d_name
, ".journal~"))
114 if (fstatat(dirfd(d
), de
->d_name
, &st
, AT_SYMLINK_NOFOLLOW
) < 0) {
115 log_debug_errno(errno
, "Failed to stat %s/%s, ignoring: %m", path
, de
->d_name
);
119 if (!S_ISREG(st
.st_mode
))
122 *ret_used
+= (uint64_t) st
.st_blocks
* 512UL;
128 static void cache_space_invalidate(JournalStorageSpace
*space
) {
132 static int cache_space_refresh(Server
*s
, JournalStorage
*storage
) {
133 JournalStorageSpace
*space
;
134 JournalMetrics
*metrics
;
135 uint64_t vfs_used
, vfs_avail
, avail
;
141 metrics
= &storage
->metrics
;
142 space
= &storage
->space
;
144 ts
= now(CLOCK_MONOTONIC
);
146 if (space
->timestamp
!= 0 && space
->timestamp
+ RECHECK_SPACE_USEC
> ts
)
149 r
= determine_path_usage(s
, storage
->path
, &vfs_used
, &vfs_avail
);
153 space
->vfs_used
= vfs_used
;
154 space
->vfs_available
= vfs_avail
;
156 avail
= LESS_BY(vfs_avail
, metrics
->keep_free
);
158 space
->limit
= MIN(MAX(vfs_used
+ avail
, metrics
->min_use
), metrics
->max_use
);
159 space
->available
= LESS_BY(space
->limit
, vfs_used
);
160 space
->timestamp
= ts
;
164 static void patch_min_use(JournalStorage
*storage
) {
167 /* Let's bump the min_use limit to the current usage on disk. We do
168 * this when starting up and first opening the journal files. This way
169 * sudden spikes in disk usage will not cause journald to vacuum files
170 * without bounds. Note that this means that only a restart of journald
171 * will make it reset this value. */
173 storage
->metrics
.min_use
= MAX(storage
->metrics
.min_use
, storage
->space
.vfs_used
);
176 static JournalStorage
* server_current_storage(Server
*s
) {
179 return s
->system_journal
? &s
->system_storage
: &s
->runtime_storage
;
182 static int determine_space(Server
*s
, uint64_t *available
, uint64_t *limit
) {
188 js
= server_current_storage(s
);
190 r
= cache_space_refresh(s
, js
);
193 *available
= js
->space
.available
;
195 *limit
= js
->space
.limit
;
200 void server_space_usage_message(Server
*s
, JournalStorage
*storage
) {
201 char fb1
[FORMAT_BYTES_MAX
], fb2
[FORMAT_BYTES_MAX
], fb3
[FORMAT_BYTES_MAX
],
202 fb4
[FORMAT_BYTES_MAX
], fb5
[FORMAT_BYTES_MAX
], fb6
[FORMAT_BYTES_MAX
];
203 JournalMetrics
*metrics
;
208 storage
= server_current_storage(s
);
210 if (cache_space_refresh(s
, storage
) < 0)
213 metrics
= &storage
->metrics
;
214 format_bytes(fb1
, sizeof(fb1
), storage
->space
.vfs_used
);
215 format_bytes(fb2
, sizeof(fb2
), metrics
->max_use
);
216 format_bytes(fb3
, sizeof(fb3
), metrics
->keep_free
);
217 format_bytes(fb4
, sizeof(fb4
), storage
->space
.vfs_available
);
218 format_bytes(fb5
, sizeof(fb5
), storage
->space
.limit
);
219 format_bytes(fb6
, sizeof(fb6
), storage
->space
.available
);
221 server_driver_message(s
, 0,
222 "MESSAGE_ID=" SD_MESSAGE_JOURNAL_USAGE_STR
,
223 LOG_MESSAGE("%s (%s) is %s, max %s, %s free.",
224 storage
->name
, storage
->path
, fb1
, fb5
, fb6
),
225 "JOURNAL_NAME=%s", storage
->name
,
226 "JOURNAL_PATH=%s", storage
->path
,
227 "CURRENT_USE=%"PRIu64
, storage
->space
.vfs_used
,
228 "CURRENT_USE_PRETTY=%s", fb1
,
229 "MAX_USE=%"PRIu64
, metrics
->max_use
,
230 "MAX_USE_PRETTY=%s", fb2
,
231 "DISK_KEEP_FREE=%"PRIu64
, metrics
->keep_free
,
232 "DISK_KEEP_FREE_PRETTY=%s", fb3
,
233 "DISK_AVAILABLE=%"PRIu64
, storage
->space
.vfs_available
,
234 "DISK_AVAILABLE_PRETTY=%s", fb4
,
235 "LIMIT=%"PRIu64
, storage
->space
.limit
,
236 "LIMIT_PRETTY=%s", fb5
,
237 "AVAILABLE=%"PRIu64
, storage
->space
.available
,
238 "AVAILABLE_PRETTY=%s", fb6
,
242 static bool uid_for_system_journal(uid_t uid
) {
244 /* Returns true if the specified UID shall get its data stored in the system journal*/
246 return uid_is_system(uid
) || uid_is_dynamic(uid
) || uid
== UID_NOBODY
;
249 static void server_add_acls(JournalFile
*f
, uid_t uid
) {
256 if (uid_for_system_journal(uid
))
259 r
= add_acls_for_user(f
->fd
, uid
);
261 log_warning_errno(r
, "Failed to set ACL on %s, ignoring: %m", f
->path
);
265 static int open_journal(
271 JournalMetrics
*metrics
,
274 _cleanup_(journal_file_closep
) JournalFile
*f
= NULL
;
282 r
= journal_file_open_reliably(fname
, flags
, 0640, s
->compress
.enabled
, s
->compress
.threshold_bytes
,
283 seal
, metrics
, s
->mmap
, s
->deferred_closes
, NULL
, &f
);
285 r
= journal_file_open(-1, fname
, flags
, 0640, s
->compress
.enabled
, s
->compress
.threshold_bytes
, seal
,
286 metrics
, s
->mmap
, s
->deferred_closes
, NULL
, &f
);
291 r
= journal_file_enable_post_change_timer(f
, s
->event
, POST_CHANGE_TIMER_INTERVAL_USEC
);
299 static bool flushed_flag_is_set(Server
*s
) {
304 /* We don't support the "flushing" concept for namespace instances, we assume them to always have
309 fn
= strjoina(s
->runtime_directory
, "/flushed");
310 return access(fn
, F_OK
) >= 0;
313 static int system_journal_open(Server
*s
, bool flush_requested
, bool relinquish_requested
) {
317 if (!s
->system_journal
&&
318 IN_SET(s
->storage
, STORAGE_PERSISTENT
, STORAGE_AUTO
) &&
319 (flush_requested
|| flushed_flag_is_set(s
)) &&
320 !relinquish_requested
) {
322 /* If in auto mode: first try to create the machine path, but not the prefix.
324 * If in persistent mode: create /var/log/journal and the machine path */
326 if (s
->storage
== STORAGE_PERSISTENT
)
327 (void) mkdir_parents(s
->system_storage
.path
, 0755);
329 (void) mkdir(s
->system_storage
.path
, 0755);
331 fn
= strjoina(s
->system_storage
.path
, "/system.journal");
332 r
= open_journal(s
, true, fn
, O_RDWR
|O_CREAT
, s
->seal
, &s
->system_storage
.metrics
, &s
->system_journal
);
334 server_add_acls(s
->system_journal
, 0);
335 (void) cache_space_refresh(s
, &s
->system_storage
);
336 patch_min_use(&s
->system_storage
);
338 if (!IN_SET(r
, -ENOENT
, -EROFS
))
339 log_warning_errno(r
, "Failed to open system journal: %m");
344 /* If the runtime journal is open, and we're post-flush, we're recovering from a failed
345 * system journal rotate (ENOSPC) for which the runtime journal was reopened.
347 * Perform an implicit flush to var, leaving the runtime journal closed, now that the system
350 if (!flush_requested
)
351 (void) server_flush_to_var(s
, true);
354 if (!s
->runtime_journal
&&
355 (s
->storage
!= STORAGE_NONE
)) {
357 fn
= strjoina(s
->runtime_storage
.path
, "/system.journal");
359 if (s
->system_journal
&& !relinquish_requested
) {
361 /* Try to open the runtime journal, but only
362 * if it already exists, so that we can flush
363 * it into the system journal */
365 r
= open_journal(s
, false, fn
, O_RDWR
, false, &s
->runtime_storage
.metrics
, &s
->runtime_journal
);
368 log_warning_errno(r
, "Failed to open runtime journal: %m");
375 /* OK, we really need the runtime journal, so create it if necessary. */
377 (void) mkdir_parents(s
->runtime_storage
.path
, 0755);
378 (void) mkdir(s
->runtime_storage
.path
, 0750);
380 r
= open_journal(s
, true, fn
, O_RDWR
|O_CREAT
, false, &s
->runtime_storage
.metrics
, &s
->runtime_journal
);
382 return log_error_errno(r
, "Failed to open runtime journal: %m");
385 if (s
->runtime_journal
) {
386 server_add_acls(s
->runtime_journal
, 0);
387 (void) cache_space_refresh(s
, &s
->runtime_storage
);
388 patch_min_use(&s
->runtime_storage
);
395 static JournalFile
* find_journal(Server
*s
, uid_t uid
) {
396 _cleanup_free_
char *p
= NULL
;
402 /* A rotate that fails to create the new journal (ENOSPC) leaves the rotated journal as NULL. Unless
403 * we revisit opening, even after space is made available we'll continue to return NULL indefinitely.
405 * system_journal_open() is a noop if the journals are already open, so we can just call it here to
406 * recover from failed rotates (or anything else that's left the journals as NULL).
408 * Fixes https://github.com/systemd/systemd/issues/3968 */
409 (void) system_journal_open(s
, false, false);
411 /* We split up user logs only on /var, not on /run. If the runtime file is open, we write to it
412 * exclusively, in order to guarantee proper order as soon as we flush /run to /var and close the
415 if (s
->runtime_journal
)
416 return s
->runtime_journal
;
418 if (uid_for_system_journal(uid
))
419 return s
->system_journal
;
421 f
= ordered_hashmap_get(s
->user_journals
, UID_TO_PTR(uid
));
425 if (asprintf(&p
, "%s/user-" UID_FMT
".journal", s
->system_storage
.path
, uid
) < 0) {
427 return s
->system_journal
;
430 /* Too many open? Then let's close one (or more) */
431 while (ordered_hashmap_size(s
->user_journals
) >= USER_JOURNALS_MAX
) {
432 assert_se(f
= ordered_hashmap_steal_first(s
->user_journals
));
433 (void) journal_file_close(f
);
436 r
= open_journal(s
, true, p
, O_RDWR
|O_CREAT
, s
->seal
, &s
->system_storage
.metrics
, &f
);
438 return s
->system_journal
;
440 r
= ordered_hashmap_put(s
->user_journals
, UID_TO_PTR(uid
), f
);
442 (void) journal_file_close(f
);
443 return s
->system_journal
;
446 server_add_acls(f
, uid
);
450 static int do_rotate(
463 r
= journal_file_rotate(f
, s
->compress
.enabled
, s
->compress
.threshold_bytes
, seal
, s
->deferred_closes
);
466 return log_error_errno(r
, "Failed to rotate %s: %m", (*f
)->path
);
468 return log_error_errno(r
, "Failed to create new %s journal: %m", name
);
471 server_add_acls(*f
, uid
);
475 static void server_process_deferred_closes(Server
*s
) {
479 /* Perform any deferred closes which aren't still offlining. */
480 SET_FOREACH(f
, s
->deferred_closes
, i
) {
481 if (journal_file_is_offlining(f
))
484 (void) set_remove(s
->deferred_closes
, f
);
485 (void) journal_file_close(f
);
489 static void server_vacuum_deferred_closes(Server
*s
) {
492 /* Make some room in the deferred closes list, so that it doesn't grow without bounds */
493 if (set_size(s
->deferred_closes
) < DEFERRED_CLOSES_MAX
)
496 /* Let's first remove all journal files that might already have completed closing */
497 server_process_deferred_closes(s
);
499 /* And now, let's close some more until we reach the limit again. */
500 while (set_size(s
->deferred_closes
) >= DEFERRED_CLOSES_MAX
) {
503 assert_se(f
= set_steal_first(s
->deferred_closes
));
504 journal_file_close(f
);
508 static int vacuum_offline_user_journals(Server
*s
) {
509 _cleanup_closedir_
DIR *d
= NULL
;
514 d
= opendir(s
->system_storage
.path
);
519 return log_error_errno(errno
, "Failed to open %s: %m", s
->system_storage
.path
);
523 _cleanup_free_
char *u
= NULL
, *full
= NULL
;
524 _cleanup_close_
int fd
= -1;
531 de
= readdir_no_dot(d
);
534 log_warning_errno(errno
, "Failed to enumerate %s, ignoring: %m", s
->system_storage
.path
);
539 a
= startswith(de
->d_name
, "user-");
542 b
= endswith(de
->d_name
, ".journal");
550 r
= parse_uid(u
, &uid
);
552 log_debug_errno(r
, "Failed to parse UID from file name '%s', ignoring: %m", de
->d_name
);
556 /* Already rotated in the above loop? i.e. is it an open user journal? */
557 if (ordered_hashmap_contains(s
->user_journals
, UID_TO_PTR(uid
)))
560 full
= path_join(s
->system_storage
.path
, de
->d_name
);
564 fd
= openat(dirfd(d
), de
->d_name
, O_RDWR
|O_CLOEXEC
|O_NOCTTY
|O_NOFOLLOW
|O_NONBLOCK
);
566 log_full_errno(IN_SET(errno
, ELOOP
, ENOENT
) ? LOG_DEBUG
: LOG_WARNING
, errno
,
567 "Failed to open journal file '%s' for rotation: %m", full
);
571 /* Make some room in the set of deferred close()s */
572 server_vacuum_deferred_closes(s
);
574 /* Open the file briefly, so that we can archive it */
575 r
= journal_file_open(fd
,
580 s
->compress
.threshold_bytes
,
582 &s
->system_storage
.metrics
,
588 log_warning_errno(r
, "Failed to read journal file %s for rotation, trying to move it out of the way: %m", full
);
590 r
= journal_file_dispose(dirfd(d
), de
->d_name
);
592 log_warning_errno(r
, "Failed to move %s out of the way, ignoring: %m", full
);
594 log_debug("Successfully moved %s out of the way.", full
);
599 TAKE_FD(fd
); /* Donated to journal_file_open() */
601 r
= journal_file_archive(f
);
603 log_debug_errno(r
, "Failed to archive journal file '%s', ignoring: %m", full
);
605 f
= journal_initiate_close(f
, s
->deferred_closes
);
611 void server_rotate(Server
*s
) {
617 log_debug("Rotating...");
619 /* First, rotate the system journal (either in its runtime flavour or in its runtime flavour) */
620 (void) do_rotate(s
, &s
->runtime_journal
, "runtime", false, 0);
621 (void) do_rotate(s
, &s
->system_journal
, "system", s
->seal
, 0);
623 /* Then, rotate all user journals we have open (keeping them open) */
624 ORDERED_HASHMAP_FOREACH_KEY(f
, k
, s
->user_journals
, i
) {
625 r
= do_rotate(s
, &f
, "user", s
->seal
, PTR_TO_UID(k
));
627 ordered_hashmap_replace(s
->user_journals
, k
, f
);
629 /* Old file has been closed and deallocated */
630 ordered_hashmap_remove(s
->user_journals
, k
);
633 /* Finally, also rotate all user journals we currently do not have open. (But do so only if we
634 * actually have access to /var, i.e. are not in the log-to-runtime-journal mode). */
635 if (!s
->runtime_journal
)
636 (void) vacuum_offline_user_journals(s
);
638 server_process_deferred_closes(s
);
641 void server_sync(Server
*s
) {
646 if (s
->system_journal
) {
647 r
= journal_file_set_offline(s
->system_journal
, false);
649 log_warning_errno(r
, "Failed to sync system journal, ignoring: %m");
652 ORDERED_HASHMAP_FOREACH(f
, s
->user_journals
, i
) {
653 r
= journal_file_set_offline(f
, false);
655 log_warning_errno(r
, "Failed to sync user journal, ignoring: %m");
658 if (s
->sync_event_source
) {
659 r
= sd_event_source_set_enabled(s
->sync_event_source
, SD_EVENT_OFF
);
661 log_error_errno(r
, "Failed to disable sync timer source: %m");
664 s
->sync_scheduled
= false;
667 static void do_vacuum(Server
*s
, JournalStorage
*storage
, bool verbose
) {
674 (void) cache_space_refresh(s
, storage
);
677 server_space_usage_message(s
, storage
);
679 r
= journal_directory_vacuum(storage
->path
, storage
->space
.limit
,
680 storage
->metrics
.n_max_files
, s
->max_retention_usec
,
681 &s
->oldest_file_usec
, verbose
);
682 if (r
< 0 && r
!= -ENOENT
)
683 log_warning_errno(r
, "Failed to vacuum %s, ignoring: %m", storage
->path
);
685 cache_space_invalidate(&storage
->space
);
688 int server_vacuum(Server
*s
, bool verbose
) {
691 log_debug("Vacuuming...");
693 s
->oldest_file_usec
= 0;
695 if (s
->system_journal
)
696 do_vacuum(s
, &s
->system_storage
, verbose
);
697 if (s
->runtime_journal
)
698 do_vacuum(s
, &s
->runtime_storage
, verbose
);
703 static void server_cache_machine_id(Server
*s
) {
709 r
= sd_id128_get_machine(&id
);
713 sd_id128_to_string(id
, stpcpy(s
->machine_id_field
, "_MACHINE_ID="));
716 static void server_cache_boot_id(Server
*s
) {
722 r
= sd_id128_get_boot(&id
);
726 sd_id128_to_string(id
, stpcpy(s
->boot_id_field
, "_BOOT_ID="));
729 static void server_cache_hostname(Server
*s
) {
730 _cleanup_free_
char *t
= NULL
;
735 t
= gethostname_malloc();
739 x
= strjoin("_HOSTNAME=", t
);
743 free_and_replace(s
->hostname_field
, x
);
746 static bool shall_try_append_again(JournalFile
*f
, int r
) {
749 case -E2BIG
: /* Hit configured limit */
750 case -EFBIG
: /* Hit fs limit */
751 case -EDQUOT
: /* Quota limit hit */
752 case -ENOSPC
: /* Disk full */
753 log_debug("%s: Allocation limit reached, rotating.", f
->path
);
756 case -EIO
: /* I/O error of some kind (mmap) */
757 log_warning("%s: IO error, rotating.", f
->path
);
760 case -EHOSTDOWN
: /* Other machine */
761 log_info("%s: Journal file from other machine, rotating.", f
->path
);
764 case -EBUSY
: /* Unclean shutdown */
765 log_info("%s: Unclean shutdown, rotating.", f
->path
);
768 case -EPROTONOSUPPORT
: /* Unsupported feature */
769 log_info("%s: Unsupported feature, rotating.", f
->path
);
772 case -EBADMSG
: /* Corrupted */
773 case -ENODATA
: /* Truncated */
774 case -ESHUTDOWN
: /* Already archived */
775 log_warning("%s: Journal file corrupted, rotating.", f
->path
);
778 case -EIDRM
: /* Journal file has been deleted */
779 log_warning("%s: Journal file has been deleted, rotating.", f
->path
);
782 case -ETXTBSY
: /* Journal file is from the future */
783 log_warning("%s: Journal file is from the future, rotating.", f
->path
);
787 log_warning("%s: underlying file system does not support memory mapping or another required file system feature.", f
->path
);
795 static void write_to_journal(Server
*s
, uid_t uid
, struct iovec
*iovec
, size_t n
, int priority
) {
796 bool vacuumed
= false, rotate
= false;
797 struct dual_timestamp ts
;
805 /* Get the closest, linearized time we have for this log event from the event loop. (Note that we do not use
806 * the source time, and not even the time the event was originally seen, but instead simply the time we started
807 * processing it, as we want strictly linear ordering in what we write out.) */
808 assert_se(sd_event_now(s
->event
, CLOCK_REALTIME
, &ts
.realtime
) >= 0);
809 assert_se(sd_event_now(s
->event
, CLOCK_MONOTONIC
, &ts
.monotonic
) >= 0);
811 if (ts
.realtime
< s
->last_realtime_clock
) {
812 /* When the time jumps backwards, let's immediately rotate. Of course, this should not happen during
813 * regular operation. However, when it does happen, then we should make sure that we start fresh files
814 * to ensure that the entries in the journal files are strictly ordered by time, in order to ensure
815 * bisection works correctly. */
817 log_debug("Time jumped backwards, rotating.");
821 f
= find_journal(s
, uid
);
825 if (journal_file_rotate_suggested(f
, s
->max_file_usec
)) {
826 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f
->path
);
833 server_vacuum(s
, false);
836 f
= find_journal(s
, uid
);
841 s
->last_realtime_clock
= ts
.realtime
;
843 r
= journal_file_append_entry(f
, &ts
, NULL
, iovec
, n
, &s
->seqnum
, NULL
, NULL
);
845 server_schedule_sync(s
, priority
);
849 if (vacuumed
|| !shall_try_append_again(f
, r
)) {
850 log_error_errno(r
, "Failed to write entry (%zu items, %zu bytes), ignoring: %m", n
, IOVEC_TOTAL_SIZE(iovec
, n
));
855 server_vacuum(s
, false);
857 f
= find_journal(s
, uid
);
861 log_debug("Retrying write.");
862 r
= journal_file_append_entry(f
, &ts
, NULL
, iovec
, n
, &s
->seqnum
, NULL
, NULL
);
864 log_error_errno(r
, "Failed to write entry (%zu items, %zu bytes) despite vacuuming, ignoring: %m", n
, IOVEC_TOTAL_SIZE(iovec
, n
));
866 server_schedule_sync(s
, priority
);
869 #define IOVEC_ADD_NUMERIC_FIELD(iovec, n, value, type, isset, format, field) \
870 if (isset(value)) { \
872 k = newa(char, STRLEN(field "=") + DECIMAL_STR_MAX(type) + 1); \
873 sprintf(k, field "=" format, value); \
874 iovec[n++] = IOVEC_MAKE_STRING(k); \
877 #define IOVEC_ADD_STRING_FIELD(iovec, n, value, field) \
878 if (!isempty(value)) { \
880 k = strjoina(field "=", value); \
881 iovec[n++] = IOVEC_MAKE_STRING(k); \
884 #define IOVEC_ADD_ID128_FIELD(iovec, n, value, field) \
885 if (!sd_id128_is_null(value)) { \
887 k = newa(char, STRLEN(field "=") + SD_ID128_STRING_MAX); \
888 sd_id128_to_string(value, stpcpy(k, field "=")); \
889 iovec[n++] = IOVEC_MAKE_STRING(k); \
892 #define IOVEC_ADD_SIZED_FIELD(iovec, n, value, value_size, field) \
893 if (value_size > 0) { \
895 k = newa(char, STRLEN(field "=") + value_size + 1); \
896 *((char*) mempcpy(stpcpy(k, field "="), value, value_size)) = 0; \
897 iovec[n++] = IOVEC_MAKE_STRING(k); \
900 static void dispatch_message_real(
902 struct iovec
*iovec
, size_t n
, size_t m
,
903 const ClientContext
*c
,
904 const struct timeval
*tv
,
908 char source_time
[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t
)];
909 _cleanup_free_
char *cmdline1
= NULL
, *cmdline2
= NULL
;
917 N_IOVEC_META_FIELDS
+
918 (pid_is_valid(object_pid
) ? N_IOVEC_OBJECT_FIELDS
: 0) +
919 client_context_extra_fields_n_iovec(c
) <= m
);
922 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->pid
, pid_t
, pid_is_valid
, PID_FMT
, "_PID");
923 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->uid
, uid_t
, uid_is_valid
, UID_FMT
, "_UID");
924 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->gid
, gid_t
, gid_is_valid
, GID_FMT
, "_GID");
926 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->comm
, "_COMM"); /* At most TASK_COMM_LENGTH (16 bytes) */
927 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->exe
, "_EXE"); /* A path, so at most PATH_MAX (4096 bytes) */
930 /* At most _SC_ARG_MAX (2MB usually), which is too much to put on stack.
931 * Let's use a heap allocation for this one. */
932 cmdline1
= set_iovec_string_field(iovec
, &n
, "_CMDLINE=", c
->cmdline
);
934 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->capeff
, "_CAP_EFFECTIVE"); /* Read from /proc/.../status */
935 IOVEC_ADD_SIZED_FIELD(iovec
, n
, c
->label
, c
->label_size
, "_SELINUX_CONTEXT");
936 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->auditid
, uint32_t, audit_session_is_valid
, "%" PRIu32
, "_AUDIT_SESSION");
937 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->loginuid
, uid_t
, uid_is_valid
, UID_FMT
, "_AUDIT_LOGINUID");
939 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->cgroup
, "_SYSTEMD_CGROUP"); /* A path */
940 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->session
, "_SYSTEMD_SESSION");
941 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->owner_uid
, uid_t
, uid_is_valid
, UID_FMT
, "_SYSTEMD_OWNER_UID");
942 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->unit
, "_SYSTEMD_UNIT"); /* Unit names are bounded by UNIT_NAME_MAX */
943 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->user_unit
, "_SYSTEMD_USER_UNIT");
944 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->slice
, "_SYSTEMD_SLICE");
945 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->user_slice
, "_SYSTEMD_USER_SLICE");
947 IOVEC_ADD_ID128_FIELD(iovec
, n
, c
->invocation_id
, "_SYSTEMD_INVOCATION_ID");
949 if (c
->extra_fields_n_iovec
> 0) {
950 memcpy(iovec
+ n
, c
->extra_fields_iovec
, c
->extra_fields_n_iovec
* sizeof(struct iovec
));
951 n
+= c
->extra_fields_n_iovec
;
957 if (pid_is_valid(object_pid
) && client_context_get(s
, object_pid
, NULL
, NULL
, 0, NULL
, &o
) >= 0) {
959 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->pid
, pid_t
, pid_is_valid
, PID_FMT
, "OBJECT_PID");
960 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->uid
, uid_t
, uid_is_valid
, UID_FMT
, "OBJECT_UID");
961 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->gid
, gid_t
, gid_is_valid
, GID_FMT
, "OBJECT_GID");
963 /* See above for size limits, only ->cmdline may be large, so use a heap allocation for it. */
964 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->comm
, "OBJECT_COMM");
965 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->exe
, "OBJECT_EXE");
967 cmdline2
= set_iovec_string_field(iovec
, &n
, "OBJECT_CMDLINE=", o
->cmdline
);
969 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->capeff
, "OBJECT_CAP_EFFECTIVE");
970 IOVEC_ADD_SIZED_FIELD(iovec
, n
, o
->label
, o
->label_size
, "OBJECT_SELINUX_CONTEXT");
971 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->auditid
, uint32_t, audit_session_is_valid
, "%" PRIu32
, "OBJECT_AUDIT_SESSION");
972 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->loginuid
, uid_t
, uid_is_valid
, UID_FMT
, "OBJECT_AUDIT_LOGINUID");
974 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->cgroup
, "OBJECT_SYSTEMD_CGROUP");
975 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->session
, "OBJECT_SYSTEMD_SESSION");
976 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->owner_uid
, uid_t
, uid_is_valid
, UID_FMT
, "OBJECT_SYSTEMD_OWNER_UID");
977 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->unit
, "OBJECT_SYSTEMD_UNIT");
978 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->user_unit
, "OBJECT_SYSTEMD_USER_UNIT");
979 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->slice
, "OBJECT_SYSTEMD_SLICE");
980 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->user_slice
, "OBJECT_SYSTEMD_USER_SLICE");
982 IOVEC_ADD_ID128_FIELD(iovec
, n
, o
->invocation_id
, "OBJECT_SYSTEMD_INVOCATION_ID=");
988 sprintf(source_time
, "_SOURCE_REALTIME_TIMESTAMP=" USEC_FMT
, timeval_load(tv
));
989 iovec
[n
++] = IOVEC_MAKE_STRING(source_time
);
992 /* Note that strictly speaking storing the boot id here is
993 * redundant since the entry includes this in-line
994 * anyway. However, we need this indexed, too. */
995 if (!isempty(s
->boot_id_field
))
996 iovec
[n
++] = IOVEC_MAKE_STRING(s
->boot_id_field
);
998 if (!isempty(s
->machine_id_field
))
999 iovec
[n
++] = IOVEC_MAKE_STRING(s
->machine_id_field
);
1001 if (!isempty(s
->hostname_field
))
1002 iovec
[n
++] = IOVEC_MAKE_STRING(s
->hostname_field
);
1004 if (!isempty(s
->namespace_field
))
1005 iovec
[n
++] = IOVEC_MAKE_STRING(s
->namespace_field
);
1009 if (s
->split_mode
== SPLIT_UID
&& c
&& uid_is_valid(c
->uid
))
1010 /* Split up strictly by (non-root) UID */
1011 journal_uid
= c
->uid
;
1012 else if (s
->split_mode
== SPLIT_LOGIN
&& c
&& c
->uid
> 0 && uid_is_valid(c
->owner_uid
))
1013 /* Split up by login UIDs. We do this only if the
1014 * realuid is not root, in order not to accidentally
1015 * leak privileged information to the user that is
1016 * logged by a privileged process that is part of an
1017 * unprivileged session. */
1018 journal_uid
= c
->owner_uid
;
1022 write_to_journal(s
, journal_uid
, iovec
, n
, priority
);
1025 void server_driver_message(Server
*s
, pid_t object_pid
, const char *message_id
, const char *format
, ...) {
1027 struct iovec
*iovec
;
1035 m
= N_IOVEC_META_FIELDS
+ 5 + N_IOVEC_PAYLOAD_FIELDS
+ client_context_extra_fields_n_iovec(s
->my_context
) + N_IOVEC_OBJECT_FIELDS
;
1036 iovec
= newa(struct iovec
, m
);
1038 assert_cc(3 == LOG_FAC(LOG_DAEMON
));
1039 iovec
[n
++] = IOVEC_MAKE_STRING("SYSLOG_FACILITY=3");
1040 iovec
[n
++] = IOVEC_MAKE_STRING("SYSLOG_IDENTIFIER=systemd-journald");
1042 iovec
[n
++] = IOVEC_MAKE_STRING("_TRANSPORT=driver");
1043 assert_cc(6 == LOG_INFO
);
1044 iovec
[n
++] = IOVEC_MAKE_STRING("PRIORITY=6");
1047 iovec
[n
++] = IOVEC_MAKE_STRING(message_id
);
1050 va_start(ap
, format
);
1051 r
= log_format_iovec(iovec
, m
, &n
, false, 0, format
, ap
);
1052 /* Error handling below */
1056 dispatch_message_real(s
, iovec
, n
, m
, s
->my_context
, NULL
, LOG_INFO
, object_pid
);
1059 free(iovec
[k
++].iov_base
);
1062 /* We failed to format the message. Emit a warning instead. */
1065 xsprintf(buf
, "MESSAGE=Entry printing failed: %s", strerror_safe(r
));
1068 iovec
[n
++] = IOVEC_MAKE_STRING("PRIORITY=4");
1069 iovec
[n
++] = IOVEC_MAKE_STRING(buf
);
1070 dispatch_message_real(s
, iovec
, n
, m
, s
->my_context
, NULL
, LOG_INFO
, object_pid
);
1074 void server_dispatch_message(
1076 struct iovec
*iovec
, size_t n
, size_t m
,
1078 const struct timeval
*tv
,
1082 uint64_t available
= 0;
1086 assert(iovec
|| n
== 0);
1091 if (LOG_PRI(priority
) > s
->max_level_store
)
1094 /* Stop early in case the information will not be stored
1096 if (s
->storage
== STORAGE_NONE
)
1100 (void) determine_space(s
, &available
, NULL
);
1102 rl
= journal_ratelimit_test(s
->ratelimit
, c
->unit
, c
->log_ratelimit_interval
, c
->log_ratelimit_burst
, priority
& LOG_PRIMASK
, available
);
1106 /* Write a suppression message if we suppressed something */
1108 server_driver_message(s
, c
->pid
,
1109 "MESSAGE_ID=" SD_MESSAGE_JOURNAL_DROPPED_STR
,
1110 LOG_MESSAGE("Suppressed %i messages from %s", rl
- 1, c
->unit
),
1111 "N_DROPPED=%i", rl
- 1,
1115 dispatch_message_real(s
, iovec
, n
, m
, c
, tv
, priority
, object_pid
);
1118 int server_flush_to_var(Server
*s
, bool require_flag_file
) {
1119 char ts
[FORMAT_TIMESPAN_MAX
];
1120 sd_journal
*j
= NULL
;
1128 if (!IN_SET(s
->storage
, STORAGE_AUTO
, STORAGE_PERSISTENT
))
1131 if (s
->namespace) /* Flushing concept does not exist for namespace instances */
1134 if (!s
->runtime_journal
) /* Nothing to flush? */
1137 if (require_flag_file
&& !flushed_flag_is_set(s
))
1140 (void) system_journal_open(s
, true, false);
1142 if (!s
->system_journal
)
1145 log_debug("Flushing to %s...", s
->system_storage
.path
);
1147 start
= now(CLOCK_MONOTONIC
);
1149 r
= sd_journal_open(&j
, SD_JOURNAL_RUNTIME_ONLY
);
1151 return log_error_errno(r
, "Failed to read runtime journal: %m");
1153 sd_journal_set_data_threshold(j
, 0);
1155 SD_JOURNAL_FOREACH(j
) {
1159 f
= j
->current_file
;
1160 assert(f
&& f
->current_offset
> 0);
1164 r
= journal_file_move_to_object(f
, OBJECT_ENTRY
, f
->current_offset
, &o
);
1166 log_error_errno(r
, "Can't read entry: %m");
1170 r
= journal_file_copy_entry(f
, s
->system_journal
, o
, f
->current_offset
);
1174 if (!shall_try_append_again(s
->system_journal
, r
)) {
1175 log_error_errno(r
, "Can't write entry: %m");
1180 server_vacuum(s
, false);
1182 if (!s
->system_journal
) {
1183 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1188 log_debug("Retrying write.");
1189 r
= journal_file_copy_entry(f
, s
->system_journal
, o
, f
->current_offset
);
1191 log_error_errno(r
, "Can't write entry: %m");
1199 if (s
->system_journal
)
1200 journal_file_post_change(s
->system_journal
);
1202 s
->runtime_journal
= journal_file_close(s
->runtime_journal
);
1205 (void) rm_rf(s
->runtime_storage
.path
, REMOVE_ROOT
);
1207 sd_journal_close(j
);
1209 server_driver_message(s
, 0, NULL
,
1210 LOG_MESSAGE("Time spent on flushing to %s is %s for %u entries.",
1211 s
->system_storage
.path
,
1212 format_timespan(ts
, sizeof(ts
), now(CLOCK_MONOTONIC
) - start
, 0),
1216 fn
= strjoina(s
->runtime_directory
, "/flushed");
1219 log_warning_errno(k
, "Failed to touch %s, ignoring: %m", fn
);
1221 server_refresh_idle_timer(s
);
1225 static int server_relinquish_var(Server
*s
) {
1229 if (s
->storage
== STORAGE_NONE
)
1232 if (s
->namespace) /* Concept does not exist for namespaced instances */
1235 if (s
->runtime_journal
&& !s
->system_journal
)
1238 log_debug("Relinquishing %s...", s
->system_storage
.path
);
1240 (void) system_journal_open(s
, false, true);
1242 s
->system_journal
= journal_file_close(s
->system_journal
);
1243 ordered_hashmap_clear_with_destructor(s
->user_journals
, journal_file_close
);
1244 set_clear_with_destructor(s
->deferred_closes
, journal_file_close
);
1246 fn
= strjoina(s
->runtime_directory
, "/flushed");
1247 if (unlink(fn
) < 0 && errno
!= ENOENT
)
1248 log_warning_errno(errno
, "Failed to unlink %s, ignoring: %m", fn
);
1250 server_refresh_idle_timer(s
);
1254 int server_process_datagram(
1255 sd_event_source
*es
,
1260 Server
*s
= userdata
;
1261 struct ucred
*ucred
= NULL
;
1262 struct timeval
*tv
= NULL
;
1263 struct cmsghdr
*cmsg
;
1265 size_t label_len
= 0, m
;
1268 int *fds
= NULL
, v
= 0;
1271 /* We use NAME_MAX space for the SELinux label here. The kernel currently enforces no limit, but
1272 * according to suggestions from the SELinux people this will change and it will probably be
1273 * identical to NAME_MAX. For now we use that, but this should be updated one day when the final
1274 * limit is known. */
1275 CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(struct ucred
)) +
1276 CMSG_SPACE(sizeof(struct timeval
)) +
1277 CMSG_SPACE(sizeof(int)) + /* fd */
1278 CMSG_SPACE(NAME_MAX
) /* selinux label */) control
;
1280 union sockaddr_union sa
= {};
1282 struct msghdr msghdr
= {
1285 .msg_control
= &control
,
1286 .msg_controllen
= sizeof(control
),
1288 .msg_namelen
= sizeof(sa
),
1292 assert(fd
== s
->native_fd
|| fd
== s
->syslog_fd
|| fd
== s
->audit_fd
);
1294 if (revents
!= EPOLLIN
)
1295 return log_error_errno(SYNTHETIC_ERRNO(EIO
),
1296 "Got invalid event from epoll for datagram fd: %" PRIx32
,
1299 /* Try to get the right size, if we can. (Not all sockets support SIOCINQ, hence we just try, but don't rely on
1301 (void) ioctl(fd
, SIOCINQ
, &v
);
1303 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1304 m
= PAGE_ALIGN(MAX3((size_t) v
+ 1,
1306 ALIGN(sizeof(struct nlmsghdr
)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH
)) + 1);
1308 if (!GREEDY_REALLOC(s
->buffer
, s
->buffer_size
, m
))
1311 iovec
= IOVEC_MAKE(s
->buffer
, s
->buffer_size
- 1); /* Leave room for trailing NUL we add later */
1313 n
= recvmsg_safe(fd
, &msghdr
, MSG_DONTWAIT
|MSG_CMSG_CLOEXEC
);
1314 if (IN_SET(n
, -EINTR
, -EAGAIN
))
1317 log_warning("Got message with truncated control data (too many fds sent?), ignoring.");
1321 return log_error_errno(n
, "recvmsg() failed: %m");
1323 CMSG_FOREACH(cmsg
, &msghdr
)
1324 if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1325 cmsg
->cmsg_type
== SCM_CREDENTIALS
&&
1326 cmsg
->cmsg_len
== CMSG_LEN(sizeof(struct ucred
))) {
1328 ucred
= (struct ucred
*) CMSG_DATA(cmsg
);
1329 } else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1330 cmsg
->cmsg_type
== SCM_SECURITY
) {
1332 label
= (char*) CMSG_DATA(cmsg
);
1333 label_len
= cmsg
->cmsg_len
- CMSG_LEN(0);
1334 } else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1335 cmsg
->cmsg_type
== SO_TIMESTAMP
&&
1336 cmsg
->cmsg_len
== CMSG_LEN(sizeof(struct timeval
))) {
1338 tv
= (struct timeval
*) CMSG_DATA(cmsg
);
1339 } else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1340 cmsg
->cmsg_type
== SCM_RIGHTS
) {
1342 fds
= (int*) CMSG_DATA(cmsg
);
1343 n_fds
= (cmsg
->cmsg_len
- CMSG_LEN(0)) / sizeof(int);
1346 /* And a trailing NUL, just in case */
1349 if (fd
== s
->syslog_fd
) {
1350 if (n
> 0 && n_fds
== 0)
1351 server_process_syslog_message(s
, s
->buffer
, n
, ucred
, tv
, label
, label_len
);
1353 log_warning("Got file descriptors via syslog socket. Ignoring.");
1355 } else if (fd
== s
->native_fd
) {
1356 if (n
> 0 && n_fds
== 0)
1357 server_process_native_message(s
, s
->buffer
, n
, ucred
, tv
, label
, label_len
);
1358 else if (n
== 0 && n_fds
== 1)
1359 server_process_native_file(s
, fds
[0], ucred
, tv
, label
, label_len
);
1361 log_warning("Got too many file descriptors via native socket. Ignoring.");
1364 assert(fd
== s
->audit_fd
);
1366 if (n
> 0 && n_fds
== 0)
1367 server_process_audit_message(s
, s
->buffer
, n
, ucred
, &sa
, msghdr
.msg_namelen
);
1369 log_warning("Got file descriptors via audit socket. Ignoring.");
1372 close_many(fds
, n_fds
);
1374 server_refresh_idle_timer(s
);
1378 static void server_full_flush(Server
*s
) {
1381 (void) server_flush_to_var(s
, false);
1383 server_vacuum(s
, false);
1385 server_space_usage_message(s
, NULL
);
1387 server_refresh_idle_timer(s
);
1390 static int dispatch_sigusr1(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1391 Server
*s
= userdata
;
1396 log_error("Received SIGUSR1 signal from PID " PID_FMT
", but flushing runtime journals not supported for namespaced instances.", si
->ssi_pid
);
1400 log_info("Received SIGUSR1 signal from PID " PID_FMT
", as request to flush runtime journal.", si
->ssi_pid
);
1401 server_full_flush(s
);
1406 static void server_full_rotate(Server
*s
) {
1413 server_vacuum(s
, true);
1415 if (s
->system_journal
)
1416 patch_min_use(&s
->system_storage
);
1417 if (s
->runtime_journal
)
1418 patch_min_use(&s
->runtime_storage
);
1420 /* Let clients know when the most recent rotation happened. */
1421 fn
= strjoina(s
->runtime_directory
, "/rotated");
1422 r
= write_timestamp_file_atomic(fn
, now(CLOCK_MONOTONIC
));
1424 log_warning_errno(r
, "Failed to write %s, ignoring: %m", fn
);
1427 static int dispatch_sigusr2(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1428 Server
*s
= userdata
;
1432 log_info("Received SIGUSR2 signal from PID " PID_FMT
", as request to rotate journal.", si
->ssi_pid
);
1433 server_full_rotate(s
);
1438 static int dispatch_sigterm(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1439 Server
*s
= userdata
;
1443 log_received_signal(LOG_INFO
, si
);
1445 sd_event_exit(s
->event
, 0);
1449 static void server_full_sync(Server
*s
) {
1457 /* Let clients know when the most recent sync happened. */
1458 fn
= strjoina(s
->runtime_directory
, "/synced");
1459 r
= write_timestamp_file_atomic(fn
, now(CLOCK_MONOTONIC
));
1461 log_warning_errno(r
, "Failed to write %s, ignoring: %m", fn
);
1466 static int dispatch_sigrtmin1(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1467 Server
*s
= userdata
;
1471 log_debug("Received SIGRTMIN1 signal from PID " PID_FMT
", as request to sync.", si
->ssi_pid
);
1472 server_full_sync(s
);
1477 static int setup_signals(Server
*s
) {
1482 assert_se(sigprocmask_many(SIG_SETMASK
, NULL
, SIGINT
, SIGTERM
, SIGUSR1
, SIGUSR2
, SIGRTMIN
+1, -1) >= 0);
1484 r
= sd_event_add_signal(s
->event
, &s
->sigusr1_event_source
, SIGUSR1
, dispatch_sigusr1
, s
);
1488 r
= sd_event_add_signal(s
->event
, &s
->sigusr2_event_source
, SIGUSR2
, dispatch_sigusr2
, s
);
1492 r
= sd_event_add_signal(s
->event
, &s
->sigterm_event_source
, SIGTERM
, dispatch_sigterm
, s
);
1496 /* Let's process SIGTERM late, so that we flush all queued messages to disk before we exit */
1497 r
= sd_event_source_set_priority(s
->sigterm_event_source
, SD_EVENT_PRIORITY_NORMAL
+20);
1501 /* When journald is invoked on the terminal (when debugging), it's useful if C-c is handled
1502 * equivalent to SIGTERM. */
1503 r
= sd_event_add_signal(s
->event
, &s
->sigint_event_source
, SIGINT
, dispatch_sigterm
, s
);
1507 r
= sd_event_source_set_priority(s
->sigint_event_source
, SD_EVENT_PRIORITY_NORMAL
+20);
1511 /* SIGRTMIN+1 causes an immediate sync. We process this very late, so that everything else queued at
1512 * this point is really written to disk. Clients can watch /run/systemd/journal/synced with inotify
1513 * until its mtime changes to see when a sync happened. */
1514 r
= sd_event_add_signal(s
->event
, &s
->sigrtmin1_event_source
, SIGRTMIN
+1, dispatch_sigrtmin1
, s
);
1518 r
= sd_event_source_set_priority(s
->sigrtmin1_event_source
, SD_EVENT_PRIORITY_NORMAL
+15);
1525 static int parse_proc_cmdline_item(const char *key
, const char *value
, void *data
) {
1531 if (proc_cmdline_key_streq(key
, "systemd.journald.forward_to_syslog")) {
1533 r
= value
? parse_boolean(value
) : true;
1535 log_warning("Failed to parse forward to syslog switch \"%s\". Ignoring.", value
);
1537 s
->forward_to_syslog
= r
;
1539 } else if (proc_cmdline_key_streq(key
, "systemd.journald.forward_to_kmsg")) {
1541 r
= value
? parse_boolean(value
) : true;
1543 log_warning("Failed to parse forward to kmsg switch \"%s\". Ignoring.", value
);
1545 s
->forward_to_kmsg
= r
;
1547 } else if (proc_cmdline_key_streq(key
, "systemd.journald.forward_to_console")) {
1549 r
= value
? parse_boolean(value
) : true;
1551 log_warning("Failed to parse forward to console switch \"%s\". Ignoring.", value
);
1553 s
->forward_to_console
= r
;
1555 } else if (proc_cmdline_key_streq(key
, "systemd.journald.forward_to_wall")) {
1557 r
= value
? parse_boolean(value
) : true;
1559 log_warning("Failed to parse forward to wall switch \"%s\". Ignoring.", value
);
1561 s
->forward_to_wall
= r
;
1563 } else if (proc_cmdline_key_streq(key
, "systemd.journald.max_level_console")) {
1565 if (proc_cmdline_value_missing(key
, value
))
1568 r
= log_level_from_string(value
);
1570 log_warning("Failed to parse max level console value \"%s\". Ignoring.", value
);
1572 s
->max_level_console
= r
;
1574 } else if (proc_cmdline_key_streq(key
, "systemd.journald.max_level_store")) {
1576 if (proc_cmdline_value_missing(key
, value
))
1579 r
= log_level_from_string(value
);
1581 log_warning("Failed to parse max level store value \"%s\". Ignoring.", value
);
1583 s
->max_level_store
= r
;
1585 } else if (proc_cmdline_key_streq(key
, "systemd.journald.max_level_syslog")) {
1587 if (proc_cmdline_value_missing(key
, value
))
1590 r
= log_level_from_string(value
);
1592 log_warning("Failed to parse max level syslog value \"%s\". Ignoring.", value
);
1594 s
->max_level_syslog
= r
;
1596 } else if (proc_cmdline_key_streq(key
, "systemd.journald.max_level_kmsg")) {
1598 if (proc_cmdline_value_missing(key
, value
))
1601 r
= log_level_from_string(value
);
1603 log_warning("Failed to parse max level kmsg value \"%s\". Ignoring.", value
);
1605 s
->max_level_kmsg
= r
;
1607 } else if (proc_cmdline_key_streq(key
, "systemd.journald.max_level_wall")) {
1609 if (proc_cmdline_value_missing(key
, value
))
1612 r
= log_level_from_string(value
);
1614 log_warning("Failed to parse max level wall value \"%s\". Ignoring.", value
);
1616 s
->max_level_wall
= r
;
1618 } else if (startswith(key
, "systemd.journald"))
1619 log_warning("Unknown journald kernel command line option \"%s\". Ignoring.", key
);
1621 /* do not warn about state here, since probably systemd already did */
1625 static int server_parse_config_file(Server
*s
) {
1631 const char *namespaced
;
1633 /* If we are running in namespace mode, load the namespace specific configuration file, and nothing else */
1634 namespaced
= strjoina(PKGSYSCONFDIR
"/journald@", s
->namespace, ".conf");
1640 config_item_perf_lookup
, journald_gperf_lookup
,
1641 CONFIG_PARSE_WARN
, s
);
1648 return config_parse_many_nulstr(PKGSYSCONFDIR
"/journald.conf",
1649 CONF_PATHS_NULSTR("systemd/journald.conf.d"),
1651 config_item_perf_lookup
, journald_gperf_lookup
,
1652 CONFIG_PARSE_WARN
, s
);
1655 static int server_dispatch_sync(sd_event_source
*es
, usec_t t
, void *userdata
) {
1656 Server
*s
= userdata
;
1664 int server_schedule_sync(Server
*s
, int priority
) {
1669 if (priority
<= LOG_CRIT
) {
1670 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1675 if (s
->sync_scheduled
)
1678 if (s
->sync_interval_usec
> 0) {
1681 r
= sd_event_now(s
->event
, CLOCK_MONOTONIC
, &when
);
1685 when
+= s
->sync_interval_usec
;
1687 if (!s
->sync_event_source
) {
1688 r
= sd_event_add_time(
1690 &s
->sync_event_source
,
1693 server_dispatch_sync
, s
);
1697 r
= sd_event_source_set_priority(s
->sync_event_source
, SD_EVENT_PRIORITY_IMPORTANT
);
1699 r
= sd_event_source_set_time(s
->sync_event_source
, when
);
1703 r
= sd_event_source_set_enabled(s
->sync_event_source
, SD_EVENT_ONESHOT
);
1708 s
->sync_scheduled
= true;
1714 static int dispatch_hostname_change(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1715 Server
*s
= userdata
;
1719 server_cache_hostname(s
);
1723 static int server_open_hostname(Server
*s
) {
1728 s
->hostname_fd
= open("/proc/sys/kernel/hostname",
1729 O_RDONLY
|O_CLOEXEC
|O_NONBLOCK
|O_NOCTTY
);
1730 if (s
->hostname_fd
< 0)
1731 return log_error_errno(errno
, "Failed to open /proc/sys/kernel/hostname: %m");
1733 r
= sd_event_add_io(s
->event
, &s
->hostname_event_source
, s
->hostname_fd
, 0, dispatch_hostname_change
, s
);
1735 /* kernels prior to 3.2 don't support polling this file. Ignore
1738 log_warning_errno(r
, "Failed to register hostname fd in event loop, ignoring: %m");
1739 s
->hostname_fd
= safe_close(s
->hostname_fd
);
1743 return log_error_errno(r
, "Failed to register hostname fd in event loop: %m");
1746 r
= sd_event_source_set_priority(s
->hostname_event_source
, SD_EVENT_PRIORITY_IMPORTANT
-10);
1748 return log_error_errno(r
, "Failed to adjust priority of hostname event source: %m");
1753 static int dispatch_notify_event(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1754 Server
*s
= userdata
;
1758 assert(s
->notify_event_source
== es
);
1759 assert(s
->notify_fd
== fd
);
1761 /* The $NOTIFY_SOCKET is writable again, now send exactly one
1762 * message on it. Either it's the watchdog event, the initial
1763 * READY=1 event or an stdout stream event. If there's nothing
1764 * to write anymore, turn our event source off. The next time
1765 * there's something to send it will be turned on again. */
1767 if (!s
->sent_notify_ready
) {
1768 static const char p
[] =
1770 "STATUS=Processing requests...";
1773 l
= send(s
->notify_fd
, p
, strlen(p
), MSG_DONTWAIT
);
1775 if (errno
== EAGAIN
)
1778 return log_error_errno(errno
, "Failed to send READY=1 notification message: %m");
1781 s
->sent_notify_ready
= true;
1782 log_debug("Sent READY=1 notification.");
1784 } else if (s
->send_watchdog
) {
1786 static const char p
[] =
1791 l
= send(s
->notify_fd
, p
, strlen(p
), MSG_DONTWAIT
);
1793 if (errno
== EAGAIN
)
1796 return log_error_errno(errno
, "Failed to send WATCHDOG=1 notification message: %m");
1799 s
->send_watchdog
= false;
1800 log_debug("Sent WATCHDOG=1 notification.");
1802 } else if (s
->stdout_streams_notify_queue
)
1803 /* Dispatch one stream notification event */
1804 stdout_stream_send_notify(s
->stdout_streams_notify_queue
);
1806 /* Leave us enabled if there's still more to do. */
1807 if (s
->send_watchdog
|| s
->stdout_streams_notify_queue
)
1810 /* There was nothing to do anymore, let's turn ourselves off. */
1811 r
= sd_event_source_set_enabled(es
, SD_EVENT_OFF
);
1813 return log_error_errno(r
, "Failed to turn off notify event source: %m");
1818 static int dispatch_watchdog(sd_event_source
*es
, uint64_t usec
, void *userdata
) {
1819 Server
*s
= userdata
;
1824 s
->send_watchdog
= true;
1826 r
= sd_event_source_set_enabled(s
->notify_event_source
, SD_EVENT_ON
);
1828 log_warning_errno(r
, "Failed to turn on notify event source: %m");
1830 r
= sd_event_source_set_time(s
->watchdog_event_source
, usec
+ s
->watchdog_usec
/ 2);
1832 return log_error_errno(r
, "Failed to restart watchdog event source: %m");
1834 r
= sd_event_source_set_enabled(s
->watchdog_event_source
, SD_EVENT_ON
);
1836 return log_error_errno(r
, "Failed to enable watchdog event source: %m");
1841 static int server_connect_notify(Server
*s
) {
1842 union sockaddr_union sa
;
1848 assert(s
->notify_fd
< 0);
1849 assert(!s
->notify_event_source
);
1852 * So here's the problem: we'd like to send notification messages to PID 1, but we cannot do that via
1853 * sd_notify(), since that's synchronous, and we might end up blocking on it. Specifically: given
1854 * that PID 1 might block on dbus-daemon during IPC, and dbus-daemon is logging to us, and might
1855 * hence block on us, we might end up in a deadlock if we block on sending PID 1 notification
1856 * messages — by generating a full blocking circle. To avoid this, let's create a non-blocking
1857 * socket, and connect it to the notification socket, and then wait for POLLOUT before we send
1858 * anything. This should efficiently avoid any deadlocks, as we'll never block on PID 1, hence PID 1
1859 * can safely block on dbus-daemon which can safely block on us again.
1861 * Don't think that this issue is real? It is, see: https://github.com/systemd/systemd/issues/1505
1864 e
= getenv("NOTIFY_SOCKET");
1868 r
= sockaddr_un_set_path(&sa
.un
, e
);
1870 return log_error_errno(r
, "NOTIFY_SOCKET set to invalid value '%s': %m", e
);
1873 s
->notify_fd
= socket(AF_UNIX
, SOCK_DGRAM
|SOCK_CLOEXEC
|SOCK_NONBLOCK
, 0);
1874 if (s
->notify_fd
< 0)
1875 return log_error_errno(errno
, "Failed to create notify socket: %m");
1877 (void) fd_inc_sndbuf(s
->notify_fd
, NOTIFY_SNDBUF_SIZE
);
1879 r
= connect(s
->notify_fd
, &sa
.sa
, sa_len
);
1881 return log_error_errno(errno
, "Failed to connect to notify socket: %m");
1883 r
= sd_event_add_io(s
->event
, &s
->notify_event_source
, s
->notify_fd
, EPOLLOUT
, dispatch_notify_event
, s
);
1885 return log_error_errno(r
, "Failed to watch notification socket: %m");
1887 if (sd_watchdog_enabled(false, &s
->watchdog_usec
) > 0) {
1888 s
->send_watchdog
= true;
1890 r
= sd_event_add_time(s
->event
, &s
->watchdog_event_source
, CLOCK_MONOTONIC
, now(CLOCK_MONOTONIC
) + s
->watchdog_usec
/2, s
->watchdog_usec
/4, dispatch_watchdog
, s
);
1892 return log_error_errno(r
, "Failed to add watchdog time event: %m");
1895 /* This should fire pretty soon, which we'll use to send the READY=1 event. */
1900 static int synchronize_second_half(sd_event_source
*event_source
, void *userdata
) {
1901 Varlink
*link
= userdata
;
1906 assert_se(s
= varlink_get_userdata(link
));
1908 /* This is the "second half" of the Synchronize() varlink method. This function is called as deferred
1909 * event source at a low priority to ensure the synchronization completes after all queued log
1910 * messages are processed. */
1911 server_full_sync(s
);
1913 /* Let's get rid of the event source now, by marking it as non-floating again. It then has no ref
1914 * anymore and is immediately destroyed after we return from this function, i.e. from this event
1915 * source handler at the end. */
1916 r
= sd_event_source_set_floating(event_source
, false);
1918 return log_error_errno(r
, "Failed to mark event source as non-floating: %m");
1920 return varlink_reply(link
, NULL
);
1923 static void synchronize_destroy(void *userdata
) {
1924 varlink_unref(userdata
);
1927 static int vl_method_synchronize(Varlink
*link
, JsonVariant
*parameters
, VarlinkMethodFlags flags
, void *userdata
) {
1928 _cleanup_(sd_event_source_unrefp
) sd_event_source
*event_source
= NULL
;
1929 Server
*s
= userdata
;
1935 if (json_variant_elements(parameters
) > 0)
1936 return varlink_error_invalid_parameter(link
, parameters
);
1938 log_info("Received client request to rotate journal.");
1940 /* We don't do the main work now, but instead enqueue a deferred event loop job which will do
1941 * it. That job is scheduled at low priority, so that we return from this method call only after all
1942 * queued but not processed log messages are written to disk, so that this method call returning can
1943 * be used as nice synchronization point. */
1944 r
= sd_event_add_defer(s
->event
, &event_source
, synchronize_second_half
, link
);
1946 return log_error_errno(r
, "Failed to allocate defer event source: %m");
1948 r
= sd_event_source_set_destroy_callback(event_source
, synchronize_destroy
);
1950 return log_error_errno(r
, "Failed to set event source destroy callback: %m");
1952 varlink_ref(link
); /* The varlink object is now left to the destroy callback to unref */
1954 r
= sd_event_source_set_priority(event_source
, SD_EVENT_PRIORITY_NORMAL
+15);
1956 return log_error_errno(r
, "Failed to set defer event source priority: %m");
1958 /* Give up ownership of this event source. It will now be destroyed along with event loop itself,
1959 * unless it destroys itself earlier. */
1960 r
= sd_event_source_set_floating(event_source
, true);
1962 return log_error_errno(r
, "Failed to mark event source as floating: %m");
1964 (void) sd_event_source_set_description(event_source
, "deferred-sync");
1969 static int vl_method_rotate(Varlink
*link
, JsonVariant
*parameters
, VarlinkMethodFlags flags
, void *userdata
) {
1970 Server
*s
= userdata
;
1975 if (json_variant_elements(parameters
) > 0)
1976 return varlink_error_invalid_parameter(link
, parameters
);
1978 log_info("Received client request to rotate journal.");
1979 server_full_rotate(s
);
1981 return varlink_reply(link
, NULL
);
1984 static int vl_method_flush_to_var(Varlink
*link
, JsonVariant
*parameters
, VarlinkMethodFlags flags
, void *userdata
) {
1985 Server
*s
= userdata
;
1990 if (json_variant_elements(parameters
) > 0)
1991 return varlink_error_invalid_parameter(link
, parameters
);
1993 return varlink_error(link
, "io.systemd.Journal.NotSupportedByNamespaces", NULL
);
1995 log_info("Received client request to flush runtime journal.");
1996 server_full_flush(s
);
1998 return varlink_reply(link
, NULL
);
2001 static int vl_method_relinquish_var(Varlink
*link
, JsonVariant
*parameters
, VarlinkMethodFlags flags
, void *userdata
) {
2002 Server
*s
= userdata
;
2007 if (json_variant_elements(parameters
) > 0)
2008 return varlink_error_invalid_parameter(link
, parameters
);
2010 return varlink_error(link
, "io.systemd.Journal.NotSupportedByNamespaces", NULL
);
2012 log_info("Received client request to relinquish %s access.", s
->system_storage
.path
);
2013 server_relinquish_var(s
);
2015 return varlink_reply(link
, NULL
);
2018 static int vl_connect(VarlinkServer
*server
, Varlink
*link
, void *userdata
) {
2019 Server
*s
= userdata
;
2025 (void) server_start_or_stop_idle_timer(s
); /* maybe we are no longer idle */
2030 static void vl_disconnect(VarlinkServer
*server
, Varlink
*link
, void *userdata
) {
2031 Server
*s
= userdata
;
2037 (void) server_start_or_stop_idle_timer(s
); /* maybe we are idle now */
2040 static int server_open_varlink(Server
*s
, const char *socket
, int fd
) {
2045 r
= varlink_server_new(&s
->varlink_server
, VARLINK_SERVER_ROOT_ONLY
);
2049 varlink_server_set_userdata(s
->varlink_server
, s
);
2051 r
= varlink_server_bind_method_many(
2053 "io.systemd.Journal.Synchronize", vl_method_synchronize
,
2054 "io.systemd.Journal.Rotate", vl_method_rotate
,
2055 "io.systemd.Journal.FlushToVar", vl_method_flush_to_var
,
2056 "io.systemd.Journal.RelinquishVar", vl_method_relinquish_var
);
2060 r
= varlink_server_bind_connect(s
->varlink_server
, vl_connect
);
2064 r
= varlink_server_bind_disconnect(s
->varlink_server
, vl_disconnect
);
2069 r
= varlink_server_listen_address(s
->varlink_server
, socket
, 0600);
2071 r
= varlink_server_listen_fd(s
->varlink_server
, fd
);
2075 r
= varlink_server_attach_event(s
->varlink_server
, s
->event
, SD_EVENT_PRIORITY_NORMAL
);
2082 static bool server_is_idle(Server
*s
) {
2085 /* The server for the main namespace is never idle */
2089 /* If a retention maximum is set larger than the idle time we need to be running to enforce it, hence
2090 * turn off the idle logic. */
2091 if (s
->max_retention_usec
> IDLE_TIMEOUT_USEC
)
2094 /* We aren't idle if we have a varlink client */
2095 if (varlink_server_current_connections(s
->varlink_server
) > 0)
2098 /* If we have stdout streams we aren't idle */
2099 if (s
->n_stdout_streams
> 0)
2105 static int server_idle_handler(sd_event_source
*source
, uint64_t usec
, void *userdata
) {
2106 Server
*s
= userdata
;
2111 log_debug("Server is idle, exiting.");
2112 sd_event_exit(s
->event
, 0);
2116 int server_start_or_stop_idle_timer(Server
*s
) {
2117 _cleanup_(sd_event_source_unrefp
) sd_event_source
*source
= NULL
;
2123 if (!server_is_idle(s
)) {
2124 s
->idle_event_source
= sd_event_source_disable_unref(s
->idle_event_source
);
2128 if (s
->idle_event_source
)
2131 r
= sd_event_now(s
->event
, CLOCK_MONOTONIC
, &when
);
2133 return log_error_errno(r
, "Failed to determine current time: %m");
2135 r
= sd_event_add_time(s
->event
, &source
, CLOCK_MONOTONIC
, usec_add(when
, IDLE_TIMEOUT_USEC
), 0, server_idle_handler
, s
);
2137 return log_error_errno(r
, "Failed to allocate idle timer: %m");
2139 r
= sd_event_source_set_priority(source
, SD_EVENT_PRIORITY_IDLE
);
2141 return log_error_errno(r
, "Failed to set idle timer priority: %m");
2143 (void) sd_event_source_set_description(source
, "idle-timer");
2145 s
->idle_event_source
= TAKE_PTR(source
);
2149 int server_refresh_idle_timer(Server
*s
) {
2155 if (!s
->idle_event_source
)
2158 r
= sd_event_now(s
->event
, CLOCK_MONOTONIC
, &when
);
2160 return log_error_errno(r
, "Failed to determine current time: %m");
2162 r
= sd_event_source_set_time(s
->idle_event_source
, usec_add(when
, IDLE_TIMEOUT_USEC
));
2164 return log_error_errno(r
, "Failed to refresh idle timer: %m");
2169 static int set_namespace(Server
*s
, const char *namespace) {
2175 if (!log_namespace_name_valid(namespace))
2176 return log_error_errno(SYNTHETIC_ERRNO(EINVAL
), "Specified namespace name not valid, refusing: %s", namespace);
2178 s
->namespace = strdup(namespace);
2182 s
->namespace_field
= strjoin("_NAMESPACE=", namespace);
2183 if (!s
->namespace_field
)
2189 int server_init(Server
*s
, const char *namespace) {
2190 const char *native_socket
, *syslog_socket
, *stdout_socket
, *varlink_socket
, *e
;
2191 _cleanup_fdset_free_ FDSet
*fds
= NULL
;
2192 int n
, r
, fd
, varlink_fd
= -1;
2206 .compress
.enabled
= true,
2207 .compress
.threshold_bytes
= (uint64_t) -1,
2212 .watchdog_usec
= USEC_INFINITY
,
2214 .sync_interval_usec
= DEFAULT_SYNC_INTERVAL_USEC
,
2215 .sync_scheduled
= false,
2217 .ratelimit_interval
= DEFAULT_RATE_LIMIT_INTERVAL
,
2218 .ratelimit_burst
= DEFAULT_RATE_LIMIT_BURST
,
2220 .forward_to_wall
= true,
2222 .max_file_usec
= DEFAULT_MAX_FILE_USEC
,
2224 .max_level_store
= LOG_DEBUG
,
2225 .max_level_syslog
= LOG_DEBUG
,
2226 .max_level_kmsg
= LOG_NOTICE
,
2227 .max_level_console
= LOG_INFO
,
2228 .max_level_wall
= LOG_EMERG
,
2230 .line_max
= DEFAULT_LINE_MAX
,
2232 .runtime_storage
.name
= "Runtime Journal",
2233 .system_storage
.name
= "System Journal",
2236 r
= set_namespace(s
, namespace);
2240 /* By default, only read from /dev/kmsg if are the main namespace */
2241 s
->read_kmsg
= !s
->namespace;
2242 s
->storage
= s
->namespace ? STORAGE_PERSISTENT
: STORAGE_AUTO
;
2244 journal_reset_metrics(&s
->system_storage
.metrics
);
2245 journal_reset_metrics(&s
->runtime_storage
.metrics
);
2247 server_parse_config_file(s
);
2249 if (!s
->namespace) {
2250 /* Parse kernel command line, but only if we are not a namespace instance */
2251 r
= proc_cmdline_parse(parse_proc_cmdline_item
, s
, PROC_CMDLINE_STRIP_RD_PREFIX
);
2253 log_warning_errno(r
, "Failed to parse kernel command line, ignoring: %m");
2256 if (!!s
->ratelimit_interval
!= !!s
->ratelimit_burst
) { /* One set to 0 and the other not? */
2257 log_debug("Setting both rate limit interval and burst from "USEC_FMT
",%u to 0,0",
2258 s
->ratelimit_interval
, s
->ratelimit_burst
);
2259 s
->ratelimit_interval
= s
->ratelimit_burst
= 0;
2262 e
= getenv("RUNTIME_DIRECTORY");
2264 s
->runtime_directory
= strdup(e
);
2265 else if (s
->namespace)
2266 s
->runtime_directory
= strjoin("/run/systemd/journal.", s
->namespace);
2268 s
->runtime_directory
= strdup("/run/systemd/journal");
2269 if (!s
->runtime_directory
)
2272 (void) mkdir_p(s
->runtime_directory
, 0755);
2274 s
->user_journals
= ordered_hashmap_new(NULL
);
2275 if (!s
->user_journals
)
2278 s
->mmap
= mmap_cache_new();
2282 s
->deferred_closes
= set_new(NULL
);
2283 if (!s
->deferred_closes
)
2286 r
= sd_event_default(&s
->event
);
2288 return log_error_errno(r
, "Failed to create event loop: %m");
2290 n
= sd_listen_fds(true);
2292 return log_error_errno(n
, "Failed to read listening file descriptors from environment: %m");
2294 native_socket
= strjoina(s
->runtime_directory
, "/socket");
2295 stdout_socket
= strjoina(s
->runtime_directory
, "/stdout");
2296 syslog_socket
= strjoina(s
->runtime_directory
, "/dev-log");
2297 varlink_socket
= strjoina(s
->runtime_directory
, "/io.systemd.journal");
2299 for (fd
= SD_LISTEN_FDS_START
; fd
< SD_LISTEN_FDS_START
+ n
; fd
++) {
2301 if (sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, native_socket
, 0) > 0) {
2303 if (s
->native_fd
>= 0)
2304 return log_error_errno(SYNTHETIC_ERRNO(EINVAL
),
2305 "Too many native sockets passed.");
2309 } else if (sd_is_socket_unix(fd
, SOCK_STREAM
, 1, stdout_socket
, 0) > 0) {
2311 if (s
->stdout_fd
>= 0)
2312 return log_error_errno(SYNTHETIC_ERRNO(EINVAL
),
2313 "Too many stdout sockets passed.");
2317 } else if (sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, syslog_socket
, 0) > 0) {
2319 if (s
->syslog_fd
>= 0)
2320 return log_error_errno(SYNTHETIC_ERRNO(EINVAL
),
2321 "Too many /dev/log sockets passed.");
2325 } else if (sd_is_socket_unix(fd
, SOCK_STREAM
, 1, varlink_socket
, 0) > 0) {
2327 if (varlink_fd
>= 0)
2328 return log_error_errno(SYNTHETIC_ERRNO(EINVAL
),
2329 "Too many varlink sockets passed.");
2332 } else if (sd_is_socket(fd
, AF_NETLINK
, SOCK_RAW
, -1) > 0) {
2334 if (s
->audit_fd
>= 0)
2335 return log_error_errno(SYNTHETIC_ERRNO(EINVAL
),
2336 "Too many audit sockets passed.");
2348 r
= fdset_put(fds
, fd
);
2354 /* Try to restore streams, but don't bother if this fails */
2355 (void) server_restore_streams(s
, fds
);
2357 if (fdset_size(fds
) > 0) {
2358 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds
));
2359 fds
= fdset_free(fds
);
2362 no_sockets
= s
->native_fd
< 0 && s
->stdout_fd
< 0 && s
->syslog_fd
< 0 && s
->audit_fd
< 0 && varlink_fd
< 0;
2364 /* always open stdout, syslog, native, and kmsg sockets */
2366 /* systemd-journald.socket: /run/systemd/journal/stdout */
2367 r
= server_open_stdout_socket(s
, stdout_socket
);
2371 /* systemd-journald-dev-log.socket: /run/systemd/journal/dev-log */
2372 r
= server_open_syslog_socket(s
, syslog_socket
);
2376 /* systemd-journald.socket: /run/systemd/journal/socket */
2377 r
= server_open_native_socket(s
, native_socket
);
2382 r
= server_open_dev_kmsg(s
);
2386 /* Unless we got *some* sockets and not audit, open audit socket */
2387 if (s
->audit_fd
>= 0 || no_sockets
) {
2388 r
= server_open_audit(s
);
2393 r
= server_open_varlink(s
, varlink_socket
, varlink_fd
);
2397 r
= server_open_kernel_seqnum(s
);
2401 r
= server_open_hostname(s
);
2405 r
= setup_signals(s
);
2409 s
->ratelimit
= journal_ratelimit_new();
2413 r
= cg_get_root_path(&s
->cgroup_root
);
2415 return log_error_errno(r
, "Failed to acquire cgroup root path: %m");
2417 server_cache_hostname(s
);
2418 server_cache_boot_id(s
);
2419 server_cache_machine_id(s
);
2422 s
->runtime_storage
.path
= strjoin("/run/log/journal/", SERVER_MACHINE_ID(s
), ".", s
->namespace);
2424 s
->runtime_storage
.path
= strjoin("/run/log/journal/", SERVER_MACHINE_ID(s
));
2425 if (!s
->runtime_storage
.path
)
2428 e
= getenv("LOGS_DIRECTORY");
2430 s
->system_storage
.path
= strdup(e
);
2431 else if (s
->namespace)
2432 s
->system_storage
.path
= strjoin("/var/log/journal/", SERVER_MACHINE_ID(s
), ".", s
->namespace);
2434 s
->system_storage
.path
= strjoin("/var/log/journal/", SERVER_MACHINE_ID(s
));
2435 if (!s
->system_storage
.path
)
2438 (void) server_connect_notify(s
);
2440 (void) client_context_acquire_default(s
);
2442 r
= system_journal_open(s
, false, false);
2446 server_start_or_stop_idle_timer(s
);
2450 void server_maybe_append_tags(Server
*s
) {
2456 n
= now(CLOCK_REALTIME
);
2458 if (s
->system_journal
)
2459 journal_file_maybe_append_tag(s
->system_journal
, n
);
2461 ORDERED_HASHMAP_FOREACH(f
, s
->user_journals
, i
)
2462 journal_file_maybe_append_tag(f
, n
);
2466 void server_done(Server
*s
) {
2470 free(s
->namespace_field
);
2472 set_free_with_destructor(s
->deferred_closes
, journal_file_close
);
2474 while (s
->stdout_streams
)
2475 stdout_stream_free(s
->stdout_streams
);
2477 client_context_flush_all(s
);
2479 (void) journal_file_close(s
->system_journal
);
2480 (void) journal_file_close(s
->runtime_journal
);
2482 ordered_hashmap_free_with_destructor(s
->user_journals
, journal_file_close
);
2484 varlink_server_unref(s
->varlink_server
);
2486 sd_event_source_unref(s
->syslog_event_source
);
2487 sd_event_source_unref(s
->native_event_source
);
2488 sd_event_source_unref(s
->stdout_event_source
);
2489 sd_event_source_unref(s
->dev_kmsg_event_source
);
2490 sd_event_source_unref(s
->audit_event_source
);
2491 sd_event_source_unref(s
->sync_event_source
);
2492 sd_event_source_unref(s
->sigusr1_event_source
);
2493 sd_event_source_unref(s
->sigusr2_event_source
);
2494 sd_event_source_unref(s
->sigterm_event_source
);
2495 sd_event_source_unref(s
->sigint_event_source
);
2496 sd_event_source_unref(s
->sigrtmin1_event_source
);
2497 sd_event_source_unref(s
->hostname_event_source
);
2498 sd_event_source_unref(s
->notify_event_source
);
2499 sd_event_source_unref(s
->watchdog_event_source
);
2500 sd_event_source_unref(s
->idle_event_source
);
2501 sd_event_unref(s
->event
);
2503 safe_close(s
->syslog_fd
);
2504 safe_close(s
->native_fd
);
2505 safe_close(s
->stdout_fd
);
2506 safe_close(s
->dev_kmsg_fd
);
2507 safe_close(s
->audit_fd
);
2508 safe_close(s
->hostname_fd
);
2509 safe_close(s
->notify_fd
);
2512 journal_ratelimit_free(s
->ratelimit
);
2514 if (s
->kernel_seqnum
)
2515 munmap(s
->kernel_seqnum
, sizeof(uint64_t));
2519 free(s
->cgroup_root
);
2520 free(s
->hostname_field
);
2521 free(s
->runtime_storage
.path
);
2522 free(s
->system_storage
.path
);
2523 free(s
->runtime_directory
);
2525 mmap_cache_unref(s
->mmap
);
2528 static const char* const storage_table
[_STORAGE_MAX
] = {
2529 [STORAGE_AUTO
] = "auto",
2530 [STORAGE_VOLATILE
] = "volatile",
2531 [STORAGE_PERSISTENT
] = "persistent",
2532 [STORAGE_NONE
] = "none"
2535 DEFINE_STRING_TABLE_LOOKUP(storage
, Storage
);
2536 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage
, storage
, Storage
, "Failed to parse storage setting");
2538 static const char* const split_mode_table
[_SPLIT_MAX
] = {
2539 [SPLIT_LOGIN
] = "login",
2540 [SPLIT_UID
] = "uid",
2541 [SPLIT_NONE
] = "none",
2544 DEFINE_STRING_TABLE_LOOKUP(split_mode
, SplitMode
);
2545 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode
, split_mode
, SplitMode
, "Failed to parse split mode setting");
2547 int config_parse_line_max(
2549 const char *filename
,
2551 const char *section
,
2552 unsigned section_line
,
2567 if (isempty(rvalue
))
2568 /* Empty assignment means default */
2569 *sz
= DEFAULT_LINE_MAX
;
2573 r
= parse_size(rvalue
, 1024, &v
);
2575 log_syntax(unit
, LOG_ERR
, filename
, line
, r
, "Failed to parse LineMax= value, ignoring: %s", rvalue
);
2580 /* Why specify 79 here as minimum line length? Simply, because the most common traditional
2581 * terminal size is 80ch, and it might make sense to break one character before the natural
2582 * line break would occur on that. */
2583 log_syntax(unit
, LOG_WARNING
, filename
, line
, 0, "LineMax= too small, clamping to 79: %s", rvalue
);
2585 } else if (v
> (uint64_t) (SSIZE_MAX
-1)) {
2586 /* So, why specify SSIZE_MAX-1 here? Because that's one below the largest size value read()
2587 * can return, and we need one extra byte for the trailing NUL byte. Of course IRL such large
2588 * memory allocations will fail anyway, hence this limit is mostly theoretical anyway, as we'll
2589 * fail much earlier anyway. */
2590 log_syntax(unit
, LOG_WARNING
, filename
, line
, 0, "LineMax= too large, clamping to %" PRIu64
": %s", (uint64_t) (SSIZE_MAX
-1), rvalue
);
2599 int config_parse_compress(
2601 const char *filename
,
2603 const char *section
,
2604 unsigned section_line
,
2611 JournalCompressOptions
* compress
= data
;
2614 if (isempty(rvalue
)) {
2615 compress
->enabled
= true;
2616 compress
->threshold_bytes
= (uint64_t) -1;
2617 } else if (streq(rvalue
, "1")) {
2618 log_syntax(unit
, LOG_WARNING
, filename
, line
, 0,
2619 "Compress= ambiguously specified as 1, enabling compression with default threshold");
2620 compress
->enabled
= true;
2621 } else if (streq(rvalue
, "0")) {
2622 log_syntax(unit
, LOG_WARNING
, filename
, line
, 0,
2623 "Compress= ambiguously specified as 0, disabling compression");
2624 compress
->enabled
= false;
2626 r
= parse_boolean(rvalue
);
2628 r
= parse_size(rvalue
, 1024, &compress
->threshold_bytes
);
2630 log_syntax(unit
, LOG_ERR
, filename
, line
, r
,
2631 "Failed to parse Compress= value, ignoring: %s", rvalue
);
2633 compress
->enabled
= true;
2635 compress
->enabled
= r
;