1 /* SPDX-License-Identifier: LGPL-2.1+ */
4 #include <selinux/selinux.h>
8 #include <sys/signalfd.h>
9 #include <sys/statvfs.h>
10 #include <linux/sockios.h>
12 #include "sd-daemon.h"
13 #include "sd-journal.h"
14 #include "sd-messages.h"
17 #include "alloc-util.h"
18 #include "audit-util.h"
19 #include "cgroup-util.h"
20 #include "conf-parser.h"
21 #include "dirent-util.h"
22 #include "extract-word.h"
25 #include "format-util.h"
28 #include "hostname-util.h"
29 #include "id128-util.h"
31 #include "journal-authenticate.h"
32 #include "journal-file.h"
33 #include "journal-internal.h"
34 #include "journal-vacuum.h"
35 #include "journald-audit.h"
36 #include "journald-context.h"
37 #include "journald-kmsg.h"
38 #include "journald-native.h"
39 #include "journald-rate-limit.h"
40 #include "journald-server.h"
41 #include "journald-stream.h"
42 #include "journald-syslog.h"
46 #include "parse-util.h"
47 #include "proc-cmdline.h"
48 #include "process-util.h"
50 #include "selinux-util.h"
51 #include "signal-util.h"
52 #include "socket-util.h"
53 #include "stdio-util.h"
54 #include "string-table.h"
55 #include "string-util.h"
56 #include "syslog-util.h"
57 #include "user-util.h"
59 #define USER_JOURNALS_MAX 1024
61 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
62 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
63 #define DEFAULT_RATE_LIMIT_BURST 10000
64 #define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
66 #define RECHECK_SPACE_USEC (30*USEC_PER_SEC)
68 #define NOTIFY_SNDBUF_SIZE (8*1024*1024)
70 /* The period to insert between posting changes for coalescing */
71 #define POST_CHANGE_TIMER_INTERVAL_USEC (250*USEC_PER_MSEC)
73 /* Pick a good default that is likely to fit into AF_UNIX and AF_INET SOCK_DGRAM datagrams, and even leaves some room
74 * for a bit of additional metadata. */
75 #define DEFAULT_LINE_MAX (48*1024)
77 #define DEFERRED_CLOSES_MAX (4096)
79 static int determine_path_usage(Server
*s
, const char *path
, uint64_t *ret_used
, uint64_t *ret_free
) {
80 _cleanup_closedir_
DIR *d
= NULL
;
89 return log_full_errno(errno
== ENOENT
? LOG_DEBUG
: LOG_ERR
,
90 errno
, "Failed to open %s: %m", path
);
92 if (fstatvfs(dirfd(d
), &ss
) < 0)
93 return log_error_errno(errno
, "Failed to fstatvfs(%s): %m", path
);
95 *ret_free
= ss
.f_bsize
* ss
.f_bavail
;
97 FOREACH_DIRENT_ALL(de
, d
, break) {
100 if (!endswith(de
->d_name
, ".journal") &&
101 !endswith(de
->d_name
, ".journal~"))
104 if (fstatat(dirfd(d
), de
->d_name
, &st
, AT_SYMLINK_NOFOLLOW
) < 0) {
105 log_debug_errno(errno
, "Failed to stat %s/%s, ignoring: %m", path
, de
->d_name
);
109 if (!S_ISREG(st
.st_mode
))
112 *ret_used
+= (uint64_t) st
.st_blocks
* 512UL;
118 static void cache_space_invalidate(JournalStorageSpace
*space
) {
122 static int cache_space_refresh(Server
*s
, JournalStorage
*storage
) {
123 JournalStorageSpace
*space
;
124 JournalMetrics
*metrics
;
125 uint64_t vfs_used
, vfs_avail
, avail
;
131 metrics
= &storage
->metrics
;
132 space
= &storage
->space
;
134 ts
= now(CLOCK_MONOTONIC
);
136 if (space
->timestamp
!= 0 && space
->timestamp
+ RECHECK_SPACE_USEC
> ts
)
139 r
= determine_path_usage(s
, storage
->path
, &vfs_used
, &vfs_avail
);
143 space
->vfs_used
= vfs_used
;
144 space
->vfs_available
= vfs_avail
;
146 avail
= LESS_BY(vfs_avail
, metrics
->keep_free
);
148 space
->limit
= MIN(MAX(vfs_used
+ avail
, metrics
->min_use
), metrics
->max_use
);
149 space
->available
= LESS_BY(space
->limit
, vfs_used
);
150 space
->timestamp
= ts
;
154 static void patch_min_use(JournalStorage
*storage
) {
157 /* Let's bump the min_use limit to the current usage on disk. We do
158 * this when starting up and first opening the journal files. This way
159 * sudden spikes in disk usage will not cause journald to vacuum files
160 * without bounds. Note that this means that only a restart of journald
161 * will make it reset this value. */
163 storage
->metrics
.min_use
= MAX(storage
->metrics
.min_use
, storage
->space
.vfs_used
);
166 static int determine_space(Server
*s
, uint64_t *available
, uint64_t *limit
) {
172 js
= s
->system_journal
? &s
->system_storage
: &s
->runtime_storage
;
174 r
= cache_space_refresh(s
, js
);
177 *available
= js
->space
.available
;
179 *limit
= js
->space
.limit
;
184 void server_space_usage_message(Server
*s
, JournalStorage
*storage
) {
185 char fb1
[FORMAT_BYTES_MAX
], fb2
[FORMAT_BYTES_MAX
], fb3
[FORMAT_BYTES_MAX
],
186 fb4
[FORMAT_BYTES_MAX
], fb5
[FORMAT_BYTES_MAX
], fb6
[FORMAT_BYTES_MAX
];
187 JournalMetrics
*metrics
;
192 storage
= s
->system_journal
? &s
->system_storage
: &s
->runtime_storage
;
194 if (cache_space_refresh(s
, storage
) < 0)
197 metrics
= &storage
->metrics
;
198 format_bytes(fb1
, sizeof(fb1
), storage
->space
.vfs_used
);
199 format_bytes(fb2
, sizeof(fb2
), metrics
->max_use
);
200 format_bytes(fb3
, sizeof(fb3
), metrics
->keep_free
);
201 format_bytes(fb4
, sizeof(fb4
), storage
->space
.vfs_available
);
202 format_bytes(fb5
, sizeof(fb5
), storage
->space
.limit
);
203 format_bytes(fb6
, sizeof(fb6
), storage
->space
.available
);
205 server_driver_message(s
, 0,
206 "MESSAGE_ID=" SD_MESSAGE_JOURNAL_USAGE_STR
,
207 LOG_MESSAGE("%s (%s) is %s, max %s, %s free.",
208 storage
->name
, storage
->path
, fb1
, fb5
, fb6
),
209 "JOURNAL_NAME=%s", storage
->name
,
210 "JOURNAL_PATH=%s", storage
->path
,
211 "CURRENT_USE=%"PRIu64
, storage
->space
.vfs_used
,
212 "CURRENT_USE_PRETTY=%s", fb1
,
213 "MAX_USE=%"PRIu64
, metrics
->max_use
,
214 "MAX_USE_PRETTY=%s", fb2
,
215 "DISK_KEEP_FREE=%"PRIu64
, metrics
->keep_free
,
216 "DISK_KEEP_FREE_PRETTY=%s", fb3
,
217 "DISK_AVAILABLE=%"PRIu64
, storage
->space
.vfs_available
,
218 "DISK_AVAILABLE_PRETTY=%s", fb4
,
219 "LIMIT=%"PRIu64
, storage
->space
.limit
,
220 "LIMIT_PRETTY=%s", fb5
,
221 "AVAILABLE=%"PRIu64
, storage
->space
.available
,
222 "AVAILABLE_PRETTY=%s", fb6
,
226 static bool uid_for_system_journal(uid_t uid
) {
228 /* Returns true if the specified UID shall get its data stored in the system journal*/
230 return uid_is_system(uid
) || uid_is_dynamic(uid
) || uid
== UID_NOBODY
;
233 static void server_add_acls(JournalFile
*f
, uid_t uid
) {
240 if (uid_for_system_journal(uid
))
243 r
= add_acls_for_user(f
->fd
, uid
);
245 log_warning_errno(r
, "Failed to set ACL on %s, ignoring: %m", f
->path
);
249 static int open_journal(
255 JournalMetrics
*metrics
,
266 r
= journal_file_open_reliably(fname
, flags
, 0640, s
->compress
.enabled
, s
->compress
.threshold_bytes
,
267 seal
, metrics
, s
->mmap
, s
->deferred_closes
, NULL
, &f
);
269 r
= journal_file_open(-1, fname
, flags
, 0640, s
->compress
.enabled
, s
->compress
.threshold_bytes
, seal
,
270 metrics
, s
->mmap
, s
->deferred_closes
, NULL
, &f
);
275 r
= journal_file_enable_post_change_timer(f
, s
->event
, POST_CHANGE_TIMER_INTERVAL_USEC
);
277 (void) journal_file_close(f
);
285 static bool flushed_flag_is_set(void) {
286 return access("/run/systemd/journal/flushed", F_OK
) >= 0;
289 static int system_journal_open(Server
*s
, bool flush_requested
) {
293 if (!s
->system_journal
&&
294 IN_SET(s
->storage
, STORAGE_PERSISTENT
, STORAGE_AUTO
) &&
295 (flush_requested
|| flushed_flag_is_set())) {
297 /* If in auto mode: first try to create the machine
298 * path, but not the prefix.
300 * If in persistent mode: create /var/log/journal and
301 * the machine path */
303 if (s
->storage
== STORAGE_PERSISTENT
)
304 (void) mkdir_p("/var/log/journal/", 0755);
306 (void) mkdir(s
->system_storage
.path
, 0755);
308 fn
= strjoina(s
->system_storage
.path
, "/system.journal");
309 r
= open_journal(s
, true, fn
, O_RDWR
|O_CREAT
, s
->seal
, &s
->system_storage
.metrics
, &s
->system_journal
);
311 server_add_acls(s
->system_journal
, 0);
312 (void) cache_space_refresh(s
, &s
->system_storage
);
313 patch_min_use(&s
->system_storage
);
315 if (!IN_SET(r
, -ENOENT
, -EROFS
))
316 log_warning_errno(r
, "Failed to open system journal: %m");
321 /* If the runtime journal is open, and we're post-flush, we're
322 * recovering from a failed system journal rotate (ENOSPC)
323 * for which the runtime journal was reopened.
325 * Perform an implicit flush to var, leaving the runtime
326 * journal closed, now that the system journal is back.
328 if (!flush_requested
)
329 (void) server_flush_to_var(s
, true);
332 if (!s
->runtime_journal
&&
333 (s
->storage
!= STORAGE_NONE
)) {
335 fn
= strjoina(s
->runtime_storage
.path
, "/system.journal");
337 if (s
->system_journal
) {
339 /* Try to open the runtime journal, but only
340 * if it already exists, so that we can flush
341 * it into the system journal */
343 r
= open_journal(s
, false, fn
, O_RDWR
, false, &s
->runtime_storage
.metrics
, &s
->runtime_journal
);
346 log_warning_errno(r
, "Failed to open runtime journal: %m");
353 /* OK, we really need the runtime journal, so create
354 * it if necessary. */
356 (void) mkdir("/run/log", 0755);
357 (void) mkdir("/run/log/journal", 0755);
358 (void) mkdir_parents(fn
, 0750);
360 r
= open_journal(s
, true, fn
, O_RDWR
|O_CREAT
, false, &s
->runtime_storage
.metrics
, &s
->runtime_journal
);
362 return log_error_errno(r
, "Failed to open runtime journal: %m");
365 if (s
->runtime_journal
) {
366 server_add_acls(s
->runtime_journal
, 0);
367 (void) cache_space_refresh(s
, &s
->runtime_storage
);
368 patch_min_use(&s
->runtime_storage
);
375 static JournalFile
* find_journal(Server
*s
, uid_t uid
) {
376 _cleanup_free_
char *p
= NULL
;
383 /* A rotate that fails to create the new journal (ENOSPC) leaves the
384 * rotated journal as NULL. Unless we revisit opening, even after
385 * space is made available we'll continue to return NULL indefinitely.
387 * system_journal_open() is a noop if the journals are already open, so
388 * we can just call it here to recover from failed rotates (or anything
389 * else that's left the journals as NULL).
391 * Fixes https://github.com/systemd/systemd/issues/3968 */
392 (void) system_journal_open(s
, false);
394 /* We split up user logs only on /var, not on /run. If the
395 * runtime file is open, we write to it exclusively, in order
396 * to guarantee proper order as soon as we flush /run to
397 * /var and close the runtime file. */
399 if (s
->runtime_journal
)
400 return s
->runtime_journal
;
402 if (uid_for_system_journal(uid
))
403 return s
->system_journal
;
405 f
= ordered_hashmap_get(s
->user_journals
, UID_TO_PTR(uid
));
409 r
= sd_id128_get_machine(&machine
);
411 log_debug_errno(r
, "Failed to determine machine ID, using system log: %m");
412 return s
->system_journal
;
415 if (asprintf(&p
, "/var/log/journal/" SD_ID128_FORMAT_STR
"/user-"UID_FMT
".journal",
416 SD_ID128_FORMAT_VAL(machine
), uid
) < 0) {
418 return s
->system_journal
;
421 while (ordered_hashmap_size(s
->user_journals
) >= USER_JOURNALS_MAX
) {
422 /* Too many open? Then let's close one */
423 f
= ordered_hashmap_steal_first(s
->user_journals
);
425 (void) journal_file_close(f
);
428 r
= open_journal(s
, true, p
, O_RDWR
|O_CREAT
, s
->seal
, &s
->system_storage
.metrics
, &f
);
430 return s
->system_journal
;
432 server_add_acls(f
, uid
);
434 r
= ordered_hashmap_put(s
->user_journals
, UID_TO_PTR(uid
), f
);
436 (void) journal_file_close(f
);
437 return s
->system_journal
;
443 static int do_rotate(
456 r
= journal_file_rotate(f
, s
->compress
.enabled
, s
->compress
.threshold_bytes
, seal
, s
->deferred_closes
);
459 return log_error_errno(r
, "Failed to rotate %s: %m", (*f
)->path
);
461 return log_error_errno(r
, "Failed to create new %s journal: %m", name
);
464 server_add_acls(*f
, uid
);
469 static void server_process_deferred_closes(Server
*s
) {
473 /* Perform any deferred closes which aren't still offlining. */
474 SET_FOREACH(f
, s
->deferred_closes
, i
) {
475 if (journal_file_is_offlining(f
))
478 (void) set_remove(s
->deferred_closes
, f
);
479 (void) journal_file_close(f
);
483 static void server_vacuum_deferred_closes(Server
*s
) {
486 /* Make some room in the deferred closes list, so that it doesn't grow without bounds */
487 if (set_size(s
->deferred_closes
) < DEFERRED_CLOSES_MAX
)
490 /* Let's first remove all journal files that might already have completed closing */
491 server_process_deferred_closes(s
);
493 /* And now, let's close some more until we reach the limit again. */
494 while (set_size(s
->deferred_closes
) >= DEFERRED_CLOSES_MAX
) {
497 assert_se(f
= set_steal_first(s
->deferred_closes
));
498 journal_file_close(f
);
502 static int open_user_journal_directory(Server
*s
, DIR **ret_dir
, char **ret_path
) {
503 _cleanup_closedir_
DIR *dir
= NULL
;
504 _cleanup_free_
char *path
= NULL
;
510 r
= sd_id128_get_machine(&machine
);
512 return log_error_errno(r
, "Failed to determine machine ID, ignoring: %m");
514 if (asprintf(&path
, "/var/log/journal/" SD_ID128_FORMAT_STR
"/", SD_ID128_FORMAT_VAL(machine
)) < 0)
519 return log_error_errno(errno
, "Failed to open user journal directory '%s': %m", path
);
522 *ret_dir
= TAKE_PTR(dir
);
524 *ret_path
= TAKE_PTR(path
);
529 void server_rotate(Server
*s
) {
530 _cleanup_free_
char *path
= NULL
;
531 _cleanup_closedir_
DIR *d
= NULL
;
537 log_debug("Rotating...");
539 /* First, rotate the system journal (either in its runtime flavour or in its runtime flavour) */
540 (void) do_rotate(s
, &s
->runtime_journal
, "runtime", false, 0);
541 (void) do_rotate(s
, &s
->system_journal
, "system", s
->seal
, 0);
543 /* Then, rotate all user journals we have open (keeping them open) */
544 ORDERED_HASHMAP_FOREACH_KEY(f
, k
, s
->user_journals
, i
) {
545 r
= do_rotate(s
, &f
, "user", s
->seal
, PTR_TO_UID(k
));
547 ordered_hashmap_replace(s
->user_journals
, k
, f
);
549 /* Old file has been closed and deallocated */
550 ordered_hashmap_remove(s
->user_journals
, k
);
553 /* Finally, also rotate all user journals we currently do not have open. (But do so only if we actually have
554 * access to /var, i.e. are not in the log-to-runtime-journal mode). */
555 if (!s
->runtime_journal
&&
556 open_user_journal_directory(s
, &d
, &path
) >= 0) {
560 FOREACH_DIRENT(de
, d
, log_warning_errno(errno
, "Failed to enumerate %s, ignoring: %m", path
)) {
561 _cleanup_free_
char *u
= NULL
, *full
= NULL
;
562 _cleanup_close_
int fd
= -1;
566 a
= startswith(de
->d_name
, "user-");
569 b
= endswith(de
->d_name
, ".journal");
579 r
= parse_uid(u
, &uid
);
581 log_debug_errno(r
, "Failed to parse UID from file name '%s', ignoring: %m", de
->d_name
);
585 /* Already rotated in the above loop? i.e. is it an open user journal? */
586 if (ordered_hashmap_contains(s
->user_journals
, UID_TO_PTR(uid
)))
589 full
= strjoin(path
, de
->d_name
);
595 fd
= openat(dirfd(d
), de
->d_name
, O_RDWR
|O_CLOEXEC
|O_NOCTTY
|O_NOFOLLOW
|O_NONBLOCK
);
597 log_full_errno(IN_SET(errno
, ELOOP
, ENOENT
) ? LOG_DEBUG
: LOG_WARNING
, errno
,
598 "Failed to open journal file '%s' for rotation: %m", full
);
602 /* Make some room in the set of deferred close()s */
603 server_vacuum_deferred_closes(s
);
605 /* Open the file briefly, so that we can archive it */
606 r
= journal_file_open(fd
,
611 s
->compress
.threshold_bytes
,
613 &s
->system_storage
.metrics
,
619 log_warning_errno(r
, "Failed to read journal file %s for rotation, trying to move it out of the way: %m", full
);
621 r
= journal_file_dispose(dirfd(d
), de
->d_name
);
623 log_warning_errno(r
, "Failed to move %s out of the way, ignoring: %m", full
);
625 log_debug("Successfully moved %s out of the way.", full
);
630 TAKE_FD(fd
); /* Donated to journal_file_open() */
632 r
= journal_file_archive(f
);
634 log_debug_errno(r
, "Failed to archive journal file '%s', ignoring: %m", full
);
636 f
= journal_initiate_close(f
, s
->deferred_closes
);
640 server_process_deferred_closes(s
);
643 void server_sync(Server
*s
) {
648 if (s
->system_journal
) {
649 r
= journal_file_set_offline(s
->system_journal
, false);
651 log_warning_errno(r
, "Failed to sync system journal, ignoring: %m");
654 ORDERED_HASHMAP_FOREACH(f
, s
->user_journals
, i
) {
655 r
= journal_file_set_offline(f
, false);
657 log_warning_errno(r
, "Failed to sync user journal, ignoring: %m");
660 if (s
->sync_event_source
) {
661 r
= sd_event_source_set_enabled(s
->sync_event_source
, SD_EVENT_OFF
);
663 log_error_errno(r
, "Failed to disable sync timer source: %m");
666 s
->sync_scheduled
= false;
669 static void do_vacuum(Server
*s
, JournalStorage
*storage
, bool verbose
) {
676 (void) cache_space_refresh(s
, storage
);
679 server_space_usage_message(s
, storage
);
681 r
= journal_directory_vacuum(storage
->path
, storage
->space
.limit
,
682 storage
->metrics
.n_max_files
, s
->max_retention_usec
,
683 &s
->oldest_file_usec
, verbose
);
684 if (r
< 0 && r
!= -ENOENT
)
685 log_warning_errno(r
, "Failed to vacuum %s, ignoring: %m", storage
->path
);
687 cache_space_invalidate(&storage
->space
);
690 int server_vacuum(Server
*s
, bool verbose
) {
693 log_debug("Vacuuming...");
695 s
->oldest_file_usec
= 0;
697 if (s
->system_journal
)
698 do_vacuum(s
, &s
->system_storage
, verbose
);
699 if (s
->runtime_journal
)
700 do_vacuum(s
, &s
->runtime_storage
, verbose
);
705 static void server_cache_machine_id(Server
*s
) {
711 r
= sd_id128_get_machine(&id
);
715 sd_id128_to_string(id
, stpcpy(s
->machine_id_field
, "_MACHINE_ID="));
718 static void server_cache_boot_id(Server
*s
) {
724 r
= sd_id128_get_boot(&id
);
728 sd_id128_to_string(id
, stpcpy(s
->boot_id_field
, "_BOOT_ID="));
731 static void server_cache_hostname(Server
*s
) {
732 _cleanup_free_
char *t
= NULL
;
737 t
= gethostname_malloc();
741 x
= strappend("_HOSTNAME=", t
);
745 free(s
->hostname_field
);
746 s
->hostname_field
= x
;
749 static bool shall_try_append_again(JournalFile
*f
, int r
) {
752 case -E2BIG
: /* Hit configured limit */
753 case -EFBIG
: /* Hit fs limit */
754 case -EDQUOT
: /* Quota limit hit */
755 case -ENOSPC
: /* Disk full */
756 log_debug("%s: Allocation limit reached, rotating.", f
->path
);
759 case -EIO
: /* I/O error of some kind (mmap) */
760 log_warning("%s: IO error, rotating.", f
->path
);
763 case -EHOSTDOWN
: /* Other machine */
764 log_info("%s: Journal file from other machine, rotating.", f
->path
);
767 case -EBUSY
: /* Unclean shutdown */
768 log_info("%s: Unclean shutdown, rotating.", f
->path
);
771 case -EPROTONOSUPPORT
: /* Unsupported feature */
772 log_info("%s: Unsupported feature, rotating.", f
->path
);
775 case -EBADMSG
: /* Corrupted */
776 case -ENODATA
: /* Truncated */
777 case -ESHUTDOWN
: /* Already archived */
778 log_warning("%s: Journal file corrupted, rotating.", f
->path
);
781 case -EIDRM
: /* Journal file has been deleted */
782 log_warning("%s: Journal file has been deleted, rotating.", f
->path
);
785 case -ETXTBSY
: /* Journal file is from the future */
786 log_warning("%s: Journal file is from the future, rotating.", f
->path
);
794 static void write_to_journal(Server
*s
, uid_t uid
, struct iovec
*iovec
, size_t n
, int priority
) {
795 bool vacuumed
= false, rotate
= false;
796 struct dual_timestamp ts
;
804 /* Get the closest, linearized time we have for this log event from the event loop. (Note that we do not use
805 * the source time, and not even the time the event was originally seen, but instead simply the time we started
806 * processing it, as we want strictly linear ordering in what we write out.) */
807 assert_se(sd_event_now(s
->event
, CLOCK_REALTIME
, &ts
.realtime
) >= 0);
808 assert_se(sd_event_now(s
->event
, CLOCK_MONOTONIC
, &ts
.monotonic
) >= 0);
810 if (ts
.realtime
< s
->last_realtime_clock
) {
811 /* When the time jumps backwards, let's immediately rotate. Of course, this should not happen during
812 * regular operation. However, when it does happen, then we should make sure that we start fresh files
813 * to ensure that the entries in the journal files are strictly ordered by time, in order to ensure
814 * bisection works correctly. */
816 log_debug("Time jumped backwards, rotating.");
820 f
= find_journal(s
, uid
);
824 if (journal_file_rotate_suggested(f
, s
->max_file_usec
)) {
825 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f
->path
);
832 server_vacuum(s
, false);
835 f
= find_journal(s
, uid
);
840 s
->last_realtime_clock
= ts
.realtime
;
842 r
= journal_file_append_entry(f
, &ts
, NULL
, iovec
, n
, &s
->seqnum
, NULL
, NULL
);
844 server_schedule_sync(s
, priority
);
848 if (vacuumed
|| !shall_try_append_again(f
, r
)) {
849 log_error_errno(r
, "Failed to write entry (%zu items, %zu bytes), ignoring: %m", n
, IOVEC_TOTAL_SIZE(iovec
, n
));
854 server_vacuum(s
, false);
856 f
= find_journal(s
, uid
);
860 log_debug("Retrying write.");
861 r
= journal_file_append_entry(f
, &ts
, NULL
, iovec
, n
, &s
->seqnum
, NULL
, NULL
);
863 log_error_errno(r
, "Failed to write entry (%zu items, %zu bytes) despite vacuuming, ignoring: %m", n
, IOVEC_TOTAL_SIZE(iovec
, n
));
865 server_schedule_sync(s
, priority
);
868 #define IOVEC_ADD_NUMERIC_FIELD(iovec, n, value, type, isset, format, field) \
869 if (isset(value)) { \
871 k = newa(char, STRLEN(field "=") + DECIMAL_STR_MAX(type) + 1); \
872 sprintf(k, field "=" format, value); \
873 iovec[n++] = IOVEC_MAKE_STRING(k); \
876 #define IOVEC_ADD_STRING_FIELD(iovec, n, value, field) \
877 if (!isempty(value)) { \
879 k = strjoina(field "=", value); \
880 iovec[n++] = IOVEC_MAKE_STRING(k); \
883 #define IOVEC_ADD_ID128_FIELD(iovec, n, value, field) \
884 if (!sd_id128_is_null(value)) { \
886 k = newa(char, STRLEN(field "=") + SD_ID128_STRING_MAX); \
887 sd_id128_to_string(value, stpcpy(k, field "=")); \
888 iovec[n++] = IOVEC_MAKE_STRING(k); \
891 #define IOVEC_ADD_SIZED_FIELD(iovec, n, value, value_size, field) \
892 if (value_size > 0) { \
894 k = newa(char, STRLEN(field "=") + value_size + 1); \
895 *((char*) mempcpy(stpcpy(k, field "="), value, value_size)) = 0; \
896 iovec[n++] = IOVEC_MAKE_STRING(k); \
899 static void dispatch_message_real(
901 struct iovec
*iovec
, size_t n
, size_t m
,
902 const ClientContext
*c
,
903 const struct timeval
*tv
,
907 char source_time
[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t
)];
908 _cleanup_free_
char *cmdline1
= NULL
, *cmdline2
= NULL
;
916 N_IOVEC_META_FIELDS
+
917 (pid_is_valid(object_pid
) ? N_IOVEC_OBJECT_FIELDS
: 0) +
918 client_context_extra_fields_n_iovec(c
) <= m
);
921 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->pid
, pid_t
, pid_is_valid
, PID_FMT
, "_PID");
922 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->uid
, uid_t
, uid_is_valid
, UID_FMT
, "_UID");
923 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->gid
, gid_t
, gid_is_valid
, GID_FMT
, "_GID");
925 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->comm
, "_COMM"); /* At most TASK_COMM_LENGTH (16 bytes) */
926 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->exe
, "_EXE"); /* A path, so at most PATH_MAX (4096 bytes) */
929 /* At most _SC_ARG_MAX (2MB usually), which is too much to put on stack.
930 * Let's use a heap allocation for this one. */
931 cmdline1
= set_iovec_string_field(iovec
, &n
, "_CMDLINE=", c
->cmdline
);
933 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->capeff
, "_CAP_EFFECTIVE"); /* Read from /proc/.../status */
934 IOVEC_ADD_SIZED_FIELD(iovec
, n
, c
->label
, c
->label_size
, "_SELINUX_CONTEXT");
935 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->auditid
, uint32_t, audit_session_is_valid
, "%" PRIu32
, "_AUDIT_SESSION");
936 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->loginuid
, uid_t
, uid_is_valid
, UID_FMT
, "_AUDIT_LOGINUID");
938 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->cgroup
, "_SYSTEMD_CGROUP"); /* A path */
939 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->session
, "_SYSTEMD_SESSION");
940 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->owner_uid
, uid_t
, uid_is_valid
, UID_FMT
, "_SYSTEMD_OWNER_UID");
941 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->unit
, "_SYSTEMD_UNIT"); /* Unit names are bounded by UNIT_NAME_MAX */
942 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->user_unit
, "_SYSTEMD_USER_UNIT");
943 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->slice
, "_SYSTEMD_SLICE");
944 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->user_slice
, "_SYSTEMD_USER_SLICE");
946 IOVEC_ADD_ID128_FIELD(iovec
, n
, c
->invocation_id
, "_SYSTEMD_INVOCATION_ID");
948 if (c
->extra_fields_n_iovec
> 0) {
949 memcpy(iovec
+ n
, c
->extra_fields_iovec
, c
->extra_fields_n_iovec
* sizeof(struct iovec
));
950 n
+= c
->extra_fields_n_iovec
;
956 if (pid_is_valid(object_pid
) && client_context_get(s
, object_pid
, NULL
, NULL
, 0, NULL
, &o
) >= 0) {
958 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->pid
, pid_t
, pid_is_valid
, PID_FMT
, "OBJECT_PID");
959 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->uid
, uid_t
, uid_is_valid
, UID_FMT
, "OBJECT_UID");
960 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->gid
, gid_t
, gid_is_valid
, GID_FMT
, "OBJECT_GID");
962 /* See above for size limits, only ->cmdline may be large, so use a heap allocation for it. */
963 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->comm
, "OBJECT_COMM");
964 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->exe
, "OBJECT_EXE");
966 cmdline2
= set_iovec_string_field(iovec
, &n
, "OBJECT_CMDLINE=", o
->cmdline
);
968 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->capeff
, "OBJECT_CAP_EFFECTIVE");
969 IOVEC_ADD_SIZED_FIELD(iovec
, n
, o
->label
, o
->label_size
, "OBJECT_SELINUX_CONTEXT");
970 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->auditid
, uint32_t, audit_session_is_valid
, "%" PRIu32
, "OBJECT_AUDIT_SESSION");
971 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->loginuid
, uid_t
, uid_is_valid
, UID_FMT
, "OBJECT_AUDIT_LOGINUID");
973 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->cgroup
, "OBJECT_SYSTEMD_CGROUP");
974 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->session
, "OBJECT_SYSTEMD_SESSION");
975 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->owner_uid
, uid_t
, uid_is_valid
, UID_FMT
, "OBJECT_SYSTEMD_OWNER_UID");
976 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->unit
, "OBJECT_SYSTEMD_UNIT");
977 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->user_unit
, "OBJECT_SYSTEMD_USER_UNIT");
978 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->slice
, "OBJECT_SYSTEMD_SLICE");
979 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->user_slice
, "OBJECT_SYSTEMD_USER_SLICE");
981 IOVEC_ADD_ID128_FIELD(iovec
, n
, o
->invocation_id
, "OBJECT_SYSTEMD_INVOCATION_ID=");
987 sprintf(source_time
, "_SOURCE_REALTIME_TIMESTAMP=" USEC_FMT
, timeval_load(tv
));
988 iovec
[n
++] = IOVEC_MAKE_STRING(source_time
);
991 /* Note that strictly speaking storing the boot id here is
992 * redundant since the entry includes this in-line
993 * anyway. However, we need this indexed, too. */
994 if (!isempty(s
->boot_id_field
))
995 iovec
[n
++] = IOVEC_MAKE_STRING(s
->boot_id_field
);
997 if (!isempty(s
->machine_id_field
))
998 iovec
[n
++] = IOVEC_MAKE_STRING(s
->machine_id_field
);
1000 if (!isempty(s
->hostname_field
))
1001 iovec
[n
++] = IOVEC_MAKE_STRING(s
->hostname_field
);
1005 if (s
->split_mode
== SPLIT_UID
&& c
&& uid_is_valid(c
->uid
))
1006 /* Split up strictly by (non-root) UID */
1007 journal_uid
= c
->uid
;
1008 else if (s
->split_mode
== SPLIT_LOGIN
&& c
&& c
->uid
> 0 && uid_is_valid(c
->owner_uid
))
1009 /* Split up by login UIDs. We do this only if the
1010 * realuid is not root, in order not to accidentally
1011 * leak privileged information to the user that is
1012 * logged by a privileged process that is part of an
1013 * unprivileged session. */
1014 journal_uid
= c
->owner_uid
;
1018 write_to_journal(s
, journal_uid
, iovec
, n
, priority
);
1021 void server_driver_message(Server
*s
, pid_t object_pid
, const char *message_id
, const char *format
, ...) {
1023 struct iovec
*iovec
;
1031 m
= N_IOVEC_META_FIELDS
+ 5 + N_IOVEC_PAYLOAD_FIELDS
+ client_context_extra_fields_n_iovec(s
->my_context
) + N_IOVEC_OBJECT_FIELDS
;
1032 iovec
= newa(struct iovec
, m
);
1034 assert_cc(3 == LOG_FAC(LOG_DAEMON
));
1035 iovec
[n
++] = IOVEC_MAKE_STRING("SYSLOG_FACILITY=3");
1036 iovec
[n
++] = IOVEC_MAKE_STRING("SYSLOG_IDENTIFIER=systemd-journald");
1038 iovec
[n
++] = IOVEC_MAKE_STRING("_TRANSPORT=driver");
1039 assert_cc(6 == LOG_INFO
);
1040 iovec
[n
++] = IOVEC_MAKE_STRING("PRIORITY=6");
1043 iovec
[n
++] = IOVEC_MAKE_STRING(message_id
);
1046 va_start(ap
, format
);
1047 r
= log_format_iovec(iovec
, m
, &n
, false, 0, format
, ap
);
1048 /* Error handling below */
1052 dispatch_message_real(s
, iovec
, n
, m
, s
->my_context
, NULL
, LOG_INFO
, object_pid
);
1055 free(iovec
[k
++].iov_base
);
1058 /* We failed to format the message. Emit a warning instead. */
1061 xsprintf(buf
, "MESSAGE=Entry printing failed: %s", strerror(-r
));
1064 iovec
[n
++] = IOVEC_MAKE_STRING("PRIORITY=4");
1065 iovec
[n
++] = IOVEC_MAKE_STRING(buf
);
1066 dispatch_message_real(s
, iovec
, n
, m
, s
->my_context
, NULL
, LOG_INFO
, object_pid
);
1070 void server_dispatch_message(
1072 struct iovec
*iovec
, size_t n
, size_t m
,
1074 const struct timeval
*tv
,
1078 uint64_t available
= 0;
1082 assert(iovec
|| n
== 0);
1087 if (LOG_PRI(priority
) > s
->max_level_store
)
1090 /* Stop early in case the information will not be stored
1092 if (s
->storage
== STORAGE_NONE
)
1096 (void) determine_space(s
, &available
, NULL
);
1098 rl
= journal_rate_limit_test(s
->rate_limit
, c
->unit
, c
->log_rate_limit_interval
, c
->log_rate_limit_burst
, priority
& LOG_PRIMASK
, available
);
1102 /* Write a suppression message if we suppressed something */
1104 server_driver_message(s
, c
->pid
,
1105 "MESSAGE_ID=" SD_MESSAGE_JOURNAL_DROPPED_STR
,
1106 LOG_MESSAGE("Suppressed %i messages from %s", rl
- 1, c
->unit
),
1107 "N_DROPPED=%i", rl
- 1,
1111 dispatch_message_real(s
, iovec
, n
, m
, c
, tv
, priority
, object_pid
);
1114 int server_flush_to_var(Server
*s
, bool require_flag_file
) {
1116 sd_journal
*j
= NULL
;
1117 char ts
[FORMAT_TIMESPAN_MAX
];
1124 if (!IN_SET(s
->storage
, STORAGE_AUTO
, STORAGE_PERSISTENT
))
1127 if (!s
->runtime_journal
)
1130 if (require_flag_file
&& !flushed_flag_is_set())
1133 (void) system_journal_open(s
, true);
1135 if (!s
->system_journal
)
1138 log_debug("Flushing to /var...");
1140 start
= now(CLOCK_MONOTONIC
);
1142 r
= sd_id128_get_machine(&machine
);
1146 r
= sd_journal_open(&j
, SD_JOURNAL_RUNTIME_ONLY
);
1148 return log_error_errno(r
, "Failed to read runtime journal: %m");
1150 sd_journal_set_data_threshold(j
, 0);
1152 SD_JOURNAL_FOREACH(j
) {
1156 f
= j
->current_file
;
1157 assert(f
&& f
->current_offset
> 0);
1161 r
= journal_file_move_to_object(f
, OBJECT_ENTRY
, f
->current_offset
, &o
);
1163 log_error_errno(r
, "Can't read entry: %m");
1167 r
= journal_file_copy_entry(f
, s
->system_journal
, o
, f
->current_offset
);
1171 if (!shall_try_append_again(s
->system_journal
, r
)) {
1172 log_error_errno(r
, "Can't write entry: %m");
1177 server_vacuum(s
, false);
1179 if (!s
->system_journal
) {
1180 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1185 log_debug("Retrying write.");
1186 r
= journal_file_copy_entry(f
, s
->system_journal
, o
, f
->current_offset
);
1188 log_error_errno(r
, "Can't write entry: %m");
1196 if (s
->system_journal
)
1197 journal_file_post_change(s
->system_journal
);
1199 s
->runtime_journal
= journal_file_close(s
->runtime_journal
);
1202 (void) rm_rf("/run/log/journal", REMOVE_ROOT
);
1204 sd_journal_close(j
);
1206 server_driver_message(s
, 0, NULL
,
1207 LOG_MESSAGE("Time spent on flushing to /var is %s for %u entries.",
1208 format_timespan(ts
, sizeof(ts
), now(CLOCK_MONOTONIC
) - start
, 0),
1215 int server_process_datagram(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1216 Server
*s
= userdata
;
1217 struct ucred
*ucred
= NULL
;
1218 struct timeval
*tv
= NULL
;
1219 struct cmsghdr
*cmsg
;
1221 size_t label_len
= 0, m
;
1224 int *fds
= NULL
, v
= 0;
1228 struct cmsghdr cmsghdr
;
1230 /* We use NAME_MAX space for the SELinux label
1231 * here. The kernel currently enforces no
1232 * limit, but according to suggestions from
1233 * the SELinux people this will change and it
1234 * will probably be identical to NAME_MAX. For
1235 * now we use that, but this should be updated
1236 * one day when the final limit is known. */
1237 uint8_t buf
[CMSG_SPACE(sizeof(struct ucred
)) +
1238 CMSG_SPACE(sizeof(struct timeval
)) +
1239 CMSG_SPACE(sizeof(int)) + /* fd */
1240 CMSG_SPACE(NAME_MAX
)]; /* selinux label */
1243 union sockaddr_union sa
= {};
1245 struct msghdr msghdr
= {
1248 .msg_control
= &control
,
1249 .msg_controllen
= sizeof(control
),
1251 .msg_namelen
= sizeof(sa
),
1255 assert(fd
== s
->native_fd
|| fd
== s
->syslog_fd
|| fd
== s
->audit_fd
);
1257 if (revents
!= EPOLLIN
)
1258 return log_error_errno(SYNTHETIC_ERRNO(EIO
),
1259 "Got invalid event from epoll for datagram fd: %" PRIx32
,
1262 /* Try to get the right size, if we can. (Not all sockets support SIOCINQ, hence we just try, but don't rely on
1264 (void) ioctl(fd
, SIOCINQ
, &v
);
1266 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1267 m
= PAGE_ALIGN(MAX3((size_t) v
+ 1,
1269 ALIGN(sizeof(struct nlmsghdr
)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH
)) + 1);
1271 if (!GREEDY_REALLOC(s
->buffer
, s
->buffer_size
, m
))
1274 iovec
= IOVEC_MAKE(s
->buffer
, s
->buffer_size
- 1); /* Leave room for trailing NUL we add later */
1276 n
= recvmsg(fd
, &msghdr
, MSG_DONTWAIT
|MSG_CMSG_CLOEXEC
);
1278 if (IN_SET(errno
, EINTR
, EAGAIN
))
1281 return log_error_errno(errno
, "recvmsg() failed: %m");
1284 CMSG_FOREACH(cmsg
, &msghdr
)
1285 if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1286 cmsg
->cmsg_type
== SCM_CREDENTIALS
&&
1287 cmsg
->cmsg_len
== CMSG_LEN(sizeof(struct ucred
)))
1288 ucred
= (struct ucred
*) CMSG_DATA(cmsg
);
1289 else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1290 cmsg
->cmsg_type
== SCM_SECURITY
) {
1291 label
= (char*) CMSG_DATA(cmsg
);
1292 label_len
= cmsg
->cmsg_len
- CMSG_LEN(0);
1293 } else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1294 cmsg
->cmsg_type
== SO_TIMESTAMP
&&
1295 cmsg
->cmsg_len
== CMSG_LEN(sizeof(struct timeval
)))
1296 tv
= (struct timeval
*) CMSG_DATA(cmsg
);
1297 else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1298 cmsg
->cmsg_type
== SCM_RIGHTS
) {
1299 fds
= (int*) CMSG_DATA(cmsg
);
1300 n_fds
= (cmsg
->cmsg_len
- CMSG_LEN(0)) / sizeof(int);
1303 /* And a trailing NUL, just in case */
1306 if (fd
== s
->syslog_fd
) {
1307 if (n
> 0 && n_fds
== 0)
1308 server_process_syslog_message(s
, s
->buffer
, n
, ucred
, tv
, label
, label_len
);
1310 log_warning("Got file descriptors via syslog socket. Ignoring.");
1312 } else if (fd
== s
->native_fd
) {
1313 if (n
> 0 && n_fds
== 0)
1314 server_process_native_message(s
, s
->buffer
, n
, ucred
, tv
, label
, label_len
);
1315 else if (n
== 0 && n_fds
== 1)
1316 server_process_native_file(s
, fds
[0], ucred
, tv
, label
, label_len
);
1318 log_warning("Got too many file descriptors via native socket. Ignoring.");
1321 assert(fd
== s
->audit_fd
);
1323 if (n
> 0 && n_fds
== 0)
1324 server_process_audit_message(s
, s
->buffer
, n
, ucred
, &sa
, msghdr
.msg_namelen
);
1326 log_warning("Got file descriptors via audit socket. Ignoring.");
1329 close_many(fds
, n_fds
);
1333 static int dispatch_sigusr1(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1334 Server
*s
= userdata
;
1339 log_info("Received request to flush runtime journal from PID " PID_FMT
, si
->ssi_pid
);
1341 (void) server_flush_to_var(s
, false);
1343 server_vacuum(s
, false);
1345 r
= touch("/run/systemd/journal/flushed");
1347 log_warning_errno(r
, "Failed to touch /run/systemd/journal/flushed, ignoring: %m");
1349 server_space_usage_message(s
, NULL
);
1353 static int dispatch_sigusr2(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1354 Server
*s
= userdata
;
1359 log_info("Received request to rotate journal from PID " PID_FMT
, si
->ssi_pid
);
1361 server_vacuum(s
, true);
1363 if (s
->system_journal
)
1364 patch_min_use(&s
->system_storage
);
1365 if (s
->runtime_journal
)
1366 patch_min_use(&s
->runtime_storage
);
1368 /* Let clients know when the most recent rotation happened. */
1369 r
= write_timestamp_file_atomic("/run/systemd/journal/rotated", now(CLOCK_MONOTONIC
));
1371 log_warning_errno(r
, "Failed to write /run/systemd/journal/rotated, ignoring: %m");
1376 static int dispatch_sigterm(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1377 Server
*s
= userdata
;
1381 log_received_signal(LOG_INFO
, si
);
1383 sd_event_exit(s
->event
, 0);
1387 static int dispatch_sigrtmin1(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1388 Server
*s
= userdata
;
1393 log_debug("Received request to sync from PID " PID_FMT
, si
->ssi_pid
);
1397 /* Let clients know when the most recent sync happened. */
1398 r
= write_timestamp_file_atomic("/run/systemd/journal/synced", now(CLOCK_MONOTONIC
));
1400 log_warning_errno(r
, "Failed to write /run/systemd/journal/synced, ignoring: %m");
1405 static int setup_signals(Server
*s
) {
1410 assert_se(sigprocmask_many(SIG_SETMASK
, NULL
, SIGINT
, SIGTERM
, SIGUSR1
, SIGUSR2
, SIGRTMIN
+1, -1) >= 0);
1412 r
= sd_event_add_signal(s
->event
, &s
->sigusr1_event_source
, SIGUSR1
, dispatch_sigusr1
, s
);
1416 r
= sd_event_add_signal(s
->event
, &s
->sigusr2_event_source
, SIGUSR2
, dispatch_sigusr2
, s
);
1420 r
= sd_event_add_signal(s
->event
, &s
->sigterm_event_source
, SIGTERM
, dispatch_sigterm
, s
);
1424 /* Let's process SIGTERM late, so that we flush all queued
1425 * messages to disk before we exit */
1426 r
= sd_event_source_set_priority(s
->sigterm_event_source
, SD_EVENT_PRIORITY_NORMAL
+20);
1430 /* When journald is invoked on the terminal (when debugging),
1431 * it's useful if C-c is handled equivalent to SIGTERM. */
1432 r
= sd_event_add_signal(s
->event
, &s
->sigint_event_source
, SIGINT
, dispatch_sigterm
, s
);
1436 r
= sd_event_source_set_priority(s
->sigint_event_source
, SD_EVENT_PRIORITY_NORMAL
+20);
1440 /* SIGRTMIN+1 causes an immediate sync. We process this very
1441 * late, so that everything else queued at this point is
1442 * really written to disk. Clients can watch
1443 * /run/systemd/journal/synced with inotify until its mtime
1444 * changes to see when a sync happened. */
1445 r
= sd_event_add_signal(s
->event
, &s
->sigrtmin1_event_source
, SIGRTMIN
+1, dispatch_sigrtmin1
, s
);
1449 r
= sd_event_source_set_priority(s
->sigrtmin1_event_source
, SD_EVENT_PRIORITY_NORMAL
+15);
1456 static int parse_proc_cmdline_item(const char *key
, const char *value
, void *data
) {
1462 if (proc_cmdline_key_streq(key
, "systemd.journald.forward_to_syslog")) {
1464 r
= value
? parse_boolean(value
) : true;
1466 log_warning("Failed to parse forward to syslog switch \"%s\". Ignoring.", value
);
1468 s
->forward_to_syslog
= r
;
1470 } else if (proc_cmdline_key_streq(key
, "systemd.journald.forward_to_kmsg")) {
1472 r
= value
? parse_boolean(value
) : true;
1474 log_warning("Failed to parse forward to kmsg switch \"%s\". Ignoring.", value
);
1476 s
->forward_to_kmsg
= r
;
1478 } else if (proc_cmdline_key_streq(key
, "systemd.journald.forward_to_console")) {
1480 r
= value
? parse_boolean(value
) : true;
1482 log_warning("Failed to parse forward to console switch \"%s\". Ignoring.", value
);
1484 s
->forward_to_console
= r
;
1486 } else if (proc_cmdline_key_streq(key
, "systemd.journald.forward_to_wall")) {
1488 r
= value
? parse_boolean(value
) : true;
1490 log_warning("Failed to parse forward to wall switch \"%s\". Ignoring.", value
);
1492 s
->forward_to_wall
= r
;
1494 } else if (proc_cmdline_key_streq(key
, "systemd.journald.max_level_console")) {
1496 if (proc_cmdline_value_missing(key
, value
))
1499 r
= log_level_from_string(value
);
1501 log_warning("Failed to parse max level console value \"%s\". Ignoring.", value
);
1503 s
->max_level_console
= r
;
1505 } else if (proc_cmdline_key_streq(key
, "systemd.journald.max_level_store")) {
1507 if (proc_cmdline_value_missing(key
, value
))
1510 r
= log_level_from_string(value
);
1512 log_warning("Failed to parse max level store value \"%s\". Ignoring.", value
);
1514 s
->max_level_store
= r
;
1516 } else if (proc_cmdline_key_streq(key
, "systemd.journald.max_level_syslog")) {
1518 if (proc_cmdline_value_missing(key
, value
))
1521 r
= log_level_from_string(value
);
1523 log_warning("Failed to parse max level syslog value \"%s\". Ignoring.", value
);
1525 s
->max_level_syslog
= r
;
1527 } else if (proc_cmdline_key_streq(key
, "systemd.journald.max_level_kmsg")) {
1529 if (proc_cmdline_value_missing(key
, value
))
1532 r
= log_level_from_string(value
);
1534 log_warning("Failed to parse max level kmsg value \"%s\". Ignoring.", value
);
1536 s
->max_level_kmsg
= r
;
1538 } else if (proc_cmdline_key_streq(key
, "systemd.journald.max_level_wall")) {
1540 if (proc_cmdline_value_missing(key
, value
))
1543 r
= log_level_from_string(value
);
1545 log_warning("Failed to parse max level wall value \"%s\". Ignoring.", value
);
1547 s
->max_level_wall
= r
;
1549 } else if (startswith(key
, "systemd.journald"))
1550 log_warning("Unknown journald kernel command line option \"%s\". Ignoring.", key
);
1552 /* do not warn about state here, since probably systemd already did */
1556 static int server_parse_config_file(Server
*s
) {
1559 return config_parse_many_nulstr(PKGSYSCONFDIR
"/journald.conf",
1560 CONF_PATHS_NULSTR("systemd/journald.conf.d"),
1562 config_item_perf_lookup
, journald_gperf_lookup
,
1563 CONFIG_PARSE_WARN
, s
);
1566 static int server_dispatch_sync(sd_event_source
*es
, usec_t t
, void *userdata
) {
1567 Server
*s
= userdata
;
1575 int server_schedule_sync(Server
*s
, int priority
) {
1580 if (priority
<= LOG_CRIT
) {
1581 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1586 if (s
->sync_scheduled
)
1589 if (s
->sync_interval_usec
> 0) {
1592 r
= sd_event_now(s
->event
, CLOCK_MONOTONIC
, &when
);
1596 when
+= s
->sync_interval_usec
;
1598 if (!s
->sync_event_source
) {
1599 r
= sd_event_add_time(
1601 &s
->sync_event_source
,
1604 server_dispatch_sync
, s
);
1608 r
= sd_event_source_set_priority(s
->sync_event_source
, SD_EVENT_PRIORITY_IMPORTANT
);
1610 r
= sd_event_source_set_time(s
->sync_event_source
, when
);
1614 r
= sd_event_source_set_enabled(s
->sync_event_source
, SD_EVENT_ONESHOT
);
1619 s
->sync_scheduled
= true;
1625 static int dispatch_hostname_change(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1626 Server
*s
= userdata
;
1630 server_cache_hostname(s
);
1634 static int server_open_hostname(Server
*s
) {
1639 s
->hostname_fd
= open("/proc/sys/kernel/hostname",
1640 O_RDONLY
|O_CLOEXEC
|O_NONBLOCK
|O_NOCTTY
);
1641 if (s
->hostname_fd
< 0)
1642 return log_error_errno(errno
, "Failed to open /proc/sys/kernel/hostname: %m");
1644 r
= sd_event_add_io(s
->event
, &s
->hostname_event_source
, s
->hostname_fd
, 0, dispatch_hostname_change
, s
);
1646 /* kernels prior to 3.2 don't support polling this file. Ignore
1649 log_warning_errno(r
, "Failed to register hostname fd in event loop, ignoring: %m");
1650 s
->hostname_fd
= safe_close(s
->hostname_fd
);
1654 return log_error_errno(r
, "Failed to register hostname fd in event loop: %m");
1657 r
= sd_event_source_set_priority(s
->hostname_event_source
, SD_EVENT_PRIORITY_IMPORTANT
-10);
1659 return log_error_errno(r
, "Failed to adjust priority of host name event source: %m");
1664 static int dispatch_notify_event(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1665 Server
*s
= userdata
;
1669 assert(s
->notify_event_source
== es
);
1670 assert(s
->notify_fd
== fd
);
1672 /* The $NOTIFY_SOCKET is writable again, now send exactly one
1673 * message on it. Either it's the watchdog event, the initial
1674 * READY=1 event or an stdout stream event. If there's nothing
1675 * to write anymore, turn our event source off. The next time
1676 * there's something to send it will be turned on again. */
1678 if (!s
->sent_notify_ready
) {
1679 static const char p
[] =
1681 "STATUS=Processing requests...";
1684 l
= send(s
->notify_fd
, p
, strlen(p
), MSG_DONTWAIT
);
1686 if (errno
== EAGAIN
)
1689 return log_error_errno(errno
, "Failed to send READY=1 notification message: %m");
1692 s
->sent_notify_ready
= true;
1693 log_debug("Sent READY=1 notification.");
1695 } else if (s
->send_watchdog
) {
1697 static const char p
[] =
1702 l
= send(s
->notify_fd
, p
, strlen(p
), MSG_DONTWAIT
);
1704 if (errno
== EAGAIN
)
1707 return log_error_errno(errno
, "Failed to send WATCHDOG=1 notification message: %m");
1710 s
->send_watchdog
= false;
1711 log_debug("Sent WATCHDOG=1 notification.");
1713 } else if (s
->stdout_streams_notify_queue
)
1714 /* Dispatch one stream notification event */
1715 stdout_stream_send_notify(s
->stdout_streams_notify_queue
);
1717 /* Leave us enabled if there's still more to do. */
1718 if (s
->send_watchdog
|| s
->stdout_streams_notify_queue
)
1721 /* There was nothing to do anymore, let's turn ourselves off. */
1722 r
= sd_event_source_set_enabled(es
, SD_EVENT_OFF
);
1724 return log_error_errno(r
, "Failed to turn off notify event source: %m");
1729 static int dispatch_watchdog(sd_event_source
*es
, uint64_t usec
, void *userdata
) {
1730 Server
*s
= userdata
;
1735 s
->send_watchdog
= true;
1737 r
= sd_event_source_set_enabled(s
->notify_event_source
, SD_EVENT_ON
);
1739 log_warning_errno(r
, "Failed to turn on notify event source: %m");
1741 r
= sd_event_source_set_time(s
->watchdog_event_source
, usec
+ s
->watchdog_usec
/ 2);
1743 return log_error_errno(r
, "Failed to restart watchdog event source: %m");
1745 r
= sd_event_source_set_enabled(s
->watchdog_event_source
, SD_EVENT_ON
);
1747 return log_error_errno(r
, "Failed to enable watchdog event source: %m");
1752 static int server_connect_notify(Server
*s
) {
1753 union sockaddr_union sa
= {};
1758 assert(s
->notify_fd
< 0);
1759 assert(!s
->notify_event_source
);
1762 So here's the problem: we'd like to send notification
1763 messages to PID 1, but we cannot do that via sd_notify(),
1764 since that's synchronous, and we might end up blocking on
1765 it. Specifically: given that PID 1 might block on
1766 dbus-daemon during IPC, and dbus-daemon is logging to us,
1767 and might hence block on us, we might end up in a deadlock
1768 if we block on sending PID 1 notification messages — by
1769 generating a full blocking circle. To avoid this, let's
1770 create a non-blocking socket, and connect it to the
1771 notification socket, and then wait for POLLOUT before we
1772 send anything. This should efficiently avoid any deadlocks,
1773 as we'll never block on PID 1, hence PID 1 can safely block
1774 on dbus-daemon which can safely block on us again.
1776 Don't think that this issue is real? It is, see:
1777 https://github.com/systemd/systemd/issues/1505
1780 e
= getenv("NOTIFY_SOCKET");
1784 salen
= sockaddr_un_set_path(&sa
.un
, e
);
1786 return log_error_errno(salen
, "NOTIFY_SOCKET set to invalid value '%s': %m", e
);
1788 s
->notify_fd
= socket(AF_UNIX
, SOCK_DGRAM
|SOCK_CLOEXEC
|SOCK_NONBLOCK
, 0);
1789 if (s
->notify_fd
< 0)
1790 return log_error_errno(errno
, "Failed to create notify socket: %m");
1792 (void) fd_inc_sndbuf(s
->notify_fd
, NOTIFY_SNDBUF_SIZE
);
1794 r
= connect(s
->notify_fd
, &sa
.sa
, salen
);
1796 return log_error_errno(errno
, "Failed to connect to notify socket: %m");
1798 r
= sd_event_add_io(s
->event
, &s
->notify_event_source
, s
->notify_fd
, EPOLLOUT
, dispatch_notify_event
, s
);
1800 return log_error_errno(r
, "Failed to watch notification socket: %m");
1802 if (sd_watchdog_enabled(false, &s
->watchdog_usec
) > 0) {
1803 s
->send_watchdog
= true;
1805 r
= sd_event_add_time(s
->event
, &s
->watchdog_event_source
, CLOCK_MONOTONIC
, now(CLOCK_MONOTONIC
) + s
->watchdog_usec
/2, s
->watchdog_usec
/4, dispatch_watchdog
, s
);
1807 return log_error_errno(r
, "Failed to add watchdog time event: %m");
1810 /* This should fire pretty soon, which we'll use to send the
1816 int server_init(Server
*s
) {
1817 _cleanup_fdset_free_ FDSet
*fds
= NULL
;
1832 .compress
.enabled
= true,
1833 .compress
.threshold_bytes
= (uint64_t) -1,
1837 .watchdog_usec
= USEC_INFINITY
,
1839 .sync_interval_usec
= DEFAULT_SYNC_INTERVAL_USEC
,
1840 .sync_scheduled
= false,
1842 .rate_limit_interval
= DEFAULT_RATE_LIMIT_INTERVAL
,
1843 .rate_limit_burst
= DEFAULT_RATE_LIMIT_BURST
,
1845 .forward_to_wall
= true,
1847 .max_file_usec
= DEFAULT_MAX_FILE_USEC
,
1849 .max_level_store
= LOG_DEBUG
,
1850 .max_level_syslog
= LOG_DEBUG
,
1851 .max_level_kmsg
= LOG_NOTICE
,
1852 .max_level_console
= LOG_INFO
,
1853 .max_level_wall
= LOG_EMERG
,
1855 .line_max
= DEFAULT_LINE_MAX
,
1857 .runtime_storage
.name
= "Runtime Journal",
1858 .system_storage
.name
= "System Journal",
1861 journal_reset_metrics(&s
->system_storage
.metrics
);
1862 journal_reset_metrics(&s
->runtime_storage
.metrics
);
1864 server_parse_config_file(s
);
1866 r
= proc_cmdline_parse(parse_proc_cmdline_item
, s
, PROC_CMDLINE_STRIP_RD_PREFIX
);
1868 log_warning_errno(r
, "Failed to parse kernel command line, ignoring: %m");
1870 if (!!s
->rate_limit_interval
^ !!s
->rate_limit_burst
) {
1871 log_debug("Setting both rate limit interval and burst from "USEC_FMT
",%u to 0,0",
1872 s
->rate_limit_interval
, s
->rate_limit_burst
);
1873 s
->rate_limit_interval
= s
->rate_limit_burst
= 0;
1876 (void) mkdir_p("/run/systemd/journal", 0755);
1878 s
->user_journals
= ordered_hashmap_new(NULL
);
1879 if (!s
->user_journals
)
1882 s
->mmap
= mmap_cache_new();
1886 s
->deferred_closes
= set_new(NULL
);
1887 if (!s
->deferred_closes
)
1890 r
= sd_event_default(&s
->event
);
1892 return log_error_errno(r
, "Failed to create event loop: %m");
1894 n
= sd_listen_fds(true);
1896 return log_error_errno(n
, "Failed to read listening file descriptors from environment: %m");
1898 for (fd
= SD_LISTEN_FDS_START
; fd
< SD_LISTEN_FDS_START
+ n
; fd
++) {
1900 if (sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/run/systemd/journal/socket", 0) > 0) {
1902 if (s
->native_fd
>= 0)
1903 return log_error_errno(SYNTHETIC_ERRNO(EINVAL
),
1904 "Too many native sockets passed.");
1908 } else if (sd_is_socket_unix(fd
, SOCK_STREAM
, 1, "/run/systemd/journal/stdout", 0) > 0) {
1910 if (s
->stdout_fd
>= 0)
1911 return log_error_errno(SYNTHETIC_ERRNO(EINVAL
),
1912 "Too many stdout sockets passed.");
1916 } else if (sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/dev/log", 0) > 0 ||
1917 sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/run/systemd/journal/dev-log", 0) > 0) {
1919 if (s
->syslog_fd
>= 0)
1920 return log_error_errno(SYNTHETIC_ERRNO(EINVAL
),
1921 "Too many /dev/log sockets passed.");
1925 } else if (sd_is_socket(fd
, AF_NETLINK
, SOCK_RAW
, -1) > 0) {
1927 if (s
->audit_fd
>= 0)
1928 return log_error_errno(SYNTHETIC_ERRNO(EINVAL
),
1929 "Too many audit sockets passed.");
1941 r
= fdset_put(fds
, fd
);
1947 /* Try to restore streams, but don't bother if this fails */
1948 (void) server_restore_streams(s
, fds
);
1950 if (fdset_size(fds
) > 0) {
1951 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds
));
1952 fds
= fdset_free(fds
);
1955 no_sockets
= s
->native_fd
< 0 && s
->stdout_fd
< 0 && s
->syslog_fd
< 0 && s
->audit_fd
< 0;
1957 /* always open stdout, syslog, native, and kmsg sockets */
1959 /* systemd-journald.socket: /run/systemd/journal/stdout */
1960 r
= server_open_stdout_socket(s
);
1964 /* systemd-journald-dev-log.socket: /run/systemd/journal/dev-log */
1965 r
= server_open_syslog_socket(s
);
1969 /* systemd-journald.socket: /run/systemd/journal/socket */
1970 r
= server_open_native_socket(s
);
1975 r
= server_open_dev_kmsg(s
);
1979 /* Unless we got *some* sockets and not audit, open audit socket */
1980 if (s
->audit_fd
>= 0 || no_sockets
) {
1981 r
= server_open_audit(s
);
1986 r
= server_open_kernel_seqnum(s
);
1990 r
= server_open_hostname(s
);
1994 r
= setup_signals(s
);
1998 s
->rate_limit
= journal_rate_limit_new();
2002 r
= cg_get_root_path(&s
->cgroup_root
);
2006 server_cache_hostname(s
);
2007 server_cache_boot_id(s
);
2008 server_cache_machine_id(s
);
2010 s
->runtime_storage
.path
= strjoin("/run/log/journal/", SERVER_MACHINE_ID(s
));
2011 s
->system_storage
.path
= strjoin("/var/log/journal/", SERVER_MACHINE_ID(s
));
2012 if (!s
->runtime_storage
.path
|| !s
->system_storage
.path
)
2015 (void) server_connect_notify(s
);
2017 (void) client_context_acquire_default(s
);
2019 return system_journal_open(s
, false);
2022 void server_maybe_append_tags(Server
*s
) {
2028 n
= now(CLOCK_REALTIME
);
2030 if (s
->system_journal
)
2031 journal_file_maybe_append_tag(s
->system_journal
, n
);
2033 ORDERED_HASHMAP_FOREACH(f
, s
->user_journals
, i
)
2034 journal_file_maybe_append_tag(f
, n
);
2038 void server_done(Server
*s
) {
2041 set_free_with_destructor(s
->deferred_closes
, journal_file_close
);
2043 while (s
->stdout_streams
)
2044 stdout_stream_free(s
->stdout_streams
);
2046 client_context_flush_all(s
);
2048 if (s
->system_journal
)
2049 (void) journal_file_close(s
->system_journal
);
2051 if (s
->runtime_journal
)
2052 (void) journal_file_close(s
->runtime_journal
);
2054 ordered_hashmap_free_with_destructor(s
->user_journals
, journal_file_close
);
2056 sd_event_source_unref(s
->syslog_event_source
);
2057 sd_event_source_unref(s
->native_event_source
);
2058 sd_event_source_unref(s
->stdout_event_source
);
2059 sd_event_source_unref(s
->dev_kmsg_event_source
);
2060 sd_event_source_unref(s
->audit_event_source
);
2061 sd_event_source_unref(s
->sync_event_source
);
2062 sd_event_source_unref(s
->sigusr1_event_source
);
2063 sd_event_source_unref(s
->sigusr2_event_source
);
2064 sd_event_source_unref(s
->sigterm_event_source
);
2065 sd_event_source_unref(s
->sigint_event_source
);
2066 sd_event_source_unref(s
->sigrtmin1_event_source
);
2067 sd_event_source_unref(s
->hostname_event_source
);
2068 sd_event_source_unref(s
->notify_event_source
);
2069 sd_event_source_unref(s
->watchdog_event_source
);
2070 sd_event_unref(s
->event
);
2072 safe_close(s
->syslog_fd
);
2073 safe_close(s
->native_fd
);
2074 safe_close(s
->stdout_fd
);
2075 safe_close(s
->dev_kmsg_fd
);
2076 safe_close(s
->audit_fd
);
2077 safe_close(s
->hostname_fd
);
2078 safe_close(s
->notify_fd
);
2081 journal_rate_limit_free(s
->rate_limit
);
2083 if (s
->kernel_seqnum
)
2084 munmap(s
->kernel_seqnum
, sizeof(uint64_t));
2088 free(s
->cgroup_root
);
2089 free(s
->hostname_field
);
2090 free(s
->runtime_storage
.path
);
2091 free(s
->system_storage
.path
);
2094 mmap_cache_unref(s
->mmap
);
2097 static const char* const storage_table
[_STORAGE_MAX
] = {
2098 [STORAGE_AUTO
] = "auto",
2099 [STORAGE_VOLATILE
] = "volatile",
2100 [STORAGE_PERSISTENT
] = "persistent",
2101 [STORAGE_NONE
] = "none"
2104 DEFINE_STRING_TABLE_LOOKUP(storage
, Storage
);
2105 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage
, storage
, Storage
, "Failed to parse storage setting");
2107 static const char* const split_mode_table
[_SPLIT_MAX
] = {
2108 [SPLIT_LOGIN
] = "login",
2109 [SPLIT_UID
] = "uid",
2110 [SPLIT_NONE
] = "none",
2113 DEFINE_STRING_TABLE_LOOKUP(split_mode
, SplitMode
);
2114 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode
, split_mode
, SplitMode
, "Failed to parse split mode setting");
2116 int config_parse_line_max(
2118 const char *filename
,
2120 const char *section
,
2121 unsigned section_line
,
2136 if (isempty(rvalue
))
2137 /* Empty assignment means default */
2138 *sz
= DEFAULT_LINE_MAX
;
2142 r
= parse_size(rvalue
, 1024, &v
);
2144 log_syntax(unit
, LOG_ERR
, filename
, line
, r
, "Failed to parse LineMax= value, ignoring: %s", rvalue
);
2149 /* Why specify 79 here as minimum line length? Simply, because the most common traditional
2150 * terminal size is 80ch, and it might make sense to break one character before the natural
2151 * line break would occur on that. */
2152 log_syntax(unit
, LOG_WARNING
, filename
, line
, 0, "LineMax= too small, clamping to 79: %s", rvalue
);
2154 } else if (v
> (uint64_t) (SSIZE_MAX
-1)) {
2155 /* So, why specify SSIZE_MAX-1 here? Because that's one below the largest size value read()
2156 * can return, and we need one extra byte for the trailing NUL byte. Of course IRL such large
2157 * memory allocations will fail anyway, hence this limit is mostly theoretical anyway, as we'll
2158 * fail much earlier anyway. */
2159 log_syntax(unit
, LOG_WARNING
, filename
, line
, 0, "LineMax= too large, clamping to %" PRIu64
": %s", (uint64_t) (SSIZE_MAX
-1), rvalue
);
2168 int config_parse_compress(const char* unit
,
2169 const char *filename
,
2171 const char *section
,
2172 unsigned section_line
,
2178 JournalCompressOptions
* compress
= data
;
2181 if (streq(rvalue
, "1")) {
2182 log_syntax(unit
, LOG_WARNING
, filename
, line
, 0,
2183 "Compress= ambiguously specified as 1, enabling compression with default threshold");
2184 compress
->enabled
= true;
2185 } else if (streq(rvalue
, "0")) {
2186 log_syntax(unit
, LOG_WARNING
, filename
, line
, 0,
2187 "Compress= ambiguously specified as 0, disabling compression");
2188 compress
->enabled
= false;
2189 } else if ((r
= parse_boolean(rvalue
)) >= 0)
2190 compress
->enabled
= r
;
2191 else if (parse_size(rvalue
, 1024, &compress
->threshold_bytes
) == 0)
2192 compress
->enabled
= true;
2193 else if (isempty(rvalue
)) {
2194 compress
->enabled
= true;
2195 compress
->threshold_bytes
= (uint64_t) -1;
2197 log_syntax(unit
, LOG_ERR
, filename
, line
, r
, "Failed to parse Compress= value, ignoring: %s", rvalue
);