1 /* SPDX-License-Identifier: LGPL-2.1+ */
4 #include <selinux/selinux.h>
8 #include <sys/signalfd.h>
9 #include <sys/statvfs.h>
10 #include <linux/sockios.h>
13 #include "sd-daemon.h"
14 #include "sd-journal.h"
15 #include "sd-messages.h"
18 #include "alloc-util.h"
19 #include "audit-util.h"
20 #include "cgroup-util.h"
21 #include "conf-parser.h"
22 #include "dirent-util.h"
23 #include "extract-word.h"
26 #include "format-util.h"
29 #include "hostname-util.h"
30 #include "id128-util.h"
32 #include "journal-authenticate.h"
33 #include "journal-file.h"
34 #include "journal-internal.h"
35 #include "journal-vacuum.h"
36 #include "journald-audit.h"
37 #include "journald-context.h"
38 #include "journald-kmsg.h"
39 #include "journald-native.h"
40 #include "journald-rate-limit.h"
41 #include "journald-server.h"
42 #include "journald-stream.h"
43 #include "journald-syslog.h"
47 #include "parse-util.h"
48 #include "proc-cmdline.h"
49 #include "process-util.h"
51 #include "selinux-util.h"
52 #include "signal-util.h"
53 #include "socket-util.h"
54 #include "stdio-util.h"
55 #include "string-table.h"
56 #include "string-util.h"
57 #include "syslog-util.h"
58 #include "user-util.h"
60 #define USER_JOURNALS_MAX 1024
62 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
63 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
64 #define DEFAULT_RATE_LIMIT_BURST 10000
65 #define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
67 #define RECHECK_SPACE_USEC (30*USEC_PER_SEC)
69 #define NOTIFY_SNDBUF_SIZE (8*1024*1024)
71 /* The period to insert between posting changes for coalescing */
72 #define POST_CHANGE_TIMER_INTERVAL_USEC (250*USEC_PER_MSEC)
74 /* Pick a good default that is likely to fit into AF_UNIX and AF_INET SOCK_DGRAM datagrams, and even leaves some room
75 * for a bit of additional metadata. */
76 #define DEFAULT_LINE_MAX (48*1024)
78 static int determine_path_usage(Server
*s
, const char *path
, uint64_t *ret_used
, uint64_t *ret_free
) {
79 _cleanup_closedir_
DIR *d
= NULL
;
88 return log_full_errno(errno
== ENOENT
? LOG_DEBUG
: LOG_ERR
,
89 errno
, "Failed to open %s: %m", path
);
91 if (fstatvfs(dirfd(d
), &ss
) < 0)
92 return log_error_errno(errno
, "Failed to fstatvfs(%s): %m", path
);
94 *ret_free
= ss
.f_bsize
* ss
.f_bavail
;
96 FOREACH_DIRENT_ALL(de
, d
, break) {
99 if (!endswith(de
->d_name
, ".journal") &&
100 !endswith(de
->d_name
, ".journal~"))
103 if (fstatat(dirfd(d
), de
->d_name
, &st
, AT_SYMLINK_NOFOLLOW
) < 0) {
104 log_debug_errno(errno
, "Failed to stat %s/%s, ignoring: %m", path
, de
->d_name
);
108 if (!S_ISREG(st
.st_mode
))
111 *ret_used
+= (uint64_t) st
.st_blocks
* 512UL;
117 static void cache_space_invalidate(JournalStorageSpace
*space
) {
121 static int cache_space_refresh(Server
*s
, JournalStorage
*storage
) {
122 JournalStorageSpace
*space
;
123 JournalMetrics
*metrics
;
124 uint64_t vfs_used
, vfs_avail
, avail
;
130 metrics
= &storage
->metrics
;
131 space
= &storage
->space
;
133 ts
= now(CLOCK_MONOTONIC
);
135 if (space
->timestamp
!= 0 && space
->timestamp
+ RECHECK_SPACE_USEC
> ts
)
138 r
= determine_path_usage(s
, storage
->path
, &vfs_used
, &vfs_avail
);
142 space
->vfs_used
= vfs_used
;
143 space
->vfs_available
= vfs_avail
;
145 avail
= LESS_BY(vfs_avail
, metrics
->keep_free
);
147 space
->limit
= MIN(MAX(vfs_used
+ avail
, metrics
->min_use
), metrics
->max_use
);
148 space
->available
= LESS_BY(space
->limit
, vfs_used
);
149 space
->timestamp
= ts
;
153 static void patch_min_use(JournalStorage
*storage
) {
156 /* Let's bump the min_use limit to the current usage on disk. We do
157 * this when starting up and first opening the journal files. This way
158 * sudden spikes in disk usage will not cause journald to vacuum files
159 * without bounds. Note that this means that only a restart of journald
160 * will make it reset this value. */
162 storage
->metrics
.min_use
= MAX(storage
->metrics
.min_use
, storage
->space
.vfs_used
);
165 static int determine_space(Server
*s
, uint64_t *available
, uint64_t *limit
) {
171 js
= s
->system_journal
? &s
->system_storage
: &s
->runtime_storage
;
173 r
= cache_space_refresh(s
, js
);
176 *available
= js
->space
.available
;
178 *limit
= js
->space
.limit
;
183 void server_space_usage_message(Server
*s
, JournalStorage
*storage
) {
184 char fb1
[FORMAT_BYTES_MAX
], fb2
[FORMAT_BYTES_MAX
], fb3
[FORMAT_BYTES_MAX
],
185 fb4
[FORMAT_BYTES_MAX
], fb5
[FORMAT_BYTES_MAX
], fb6
[FORMAT_BYTES_MAX
];
186 JournalMetrics
*metrics
;
191 storage
= s
->system_journal
? &s
->system_storage
: &s
->runtime_storage
;
193 if (cache_space_refresh(s
, storage
) < 0)
196 metrics
= &storage
->metrics
;
197 format_bytes(fb1
, sizeof(fb1
), storage
->space
.vfs_used
);
198 format_bytes(fb2
, sizeof(fb2
), metrics
->max_use
);
199 format_bytes(fb3
, sizeof(fb3
), metrics
->keep_free
);
200 format_bytes(fb4
, sizeof(fb4
), storage
->space
.vfs_available
);
201 format_bytes(fb5
, sizeof(fb5
), storage
->space
.limit
);
202 format_bytes(fb6
, sizeof(fb6
), storage
->space
.available
);
204 server_driver_message(s
, 0,
205 "MESSAGE_ID=" SD_MESSAGE_JOURNAL_USAGE_STR
,
206 LOG_MESSAGE("%s (%s) is %s, max %s, %s free.",
207 storage
->name
, storage
->path
, fb1
, fb5
, fb6
),
208 "JOURNAL_NAME=%s", storage
->name
,
209 "JOURNAL_PATH=%s", storage
->path
,
210 "CURRENT_USE=%"PRIu64
, storage
->space
.vfs_used
,
211 "CURRENT_USE_PRETTY=%s", fb1
,
212 "MAX_USE=%"PRIu64
, metrics
->max_use
,
213 "MAX_USE_PRETTY=%s", fb2
,
214 "DISK_KEEP_FREE=%"PRIu64
, metrics
->keep_free
,
215 "DISK_KEEP_FREE_PRETTY=%s", fb3
,
216 "DISK_AVAILABLE=%"PRIu64
, storage
->space
.vfs_available
,
217 "DISK_AVAILABLE_PRETTY=%s", fb4
,
218 "LIMIT=%"PRIu64
, storage
->space
.limit
,
219 "LIMIT_PRETTY=%s", fb5
,
220 "AVAILABLE=%"PRIu64
, storage
->space
.available
,
221 "AVAILABLE_PRETTY=%s", fb6
,
225 static bool uid_for_system_journal(uid_t uid
) {
227 /* Returns true if the specified UID shall get its data stored in the system journal*/
229 return uid_is_system(uid
) || uid_is_dynamic(uid
) || uid
== UID_NOBODY
;
232 static void server_add_acls(JournalFile
*f
, uid_t uid
) {
239 if (uid_for_system_journal(uid
))
242 r
= add_acls_for_user(f
->fd
, uid
);
244 log_warning_errno(r
, "Failed to set ACL on %s, ignoring: %m", f
->path
);
248 static int open_journal(
254 JournalMetrics
*metrics
,
264 r
= journal_file_open_reliably(fname
, flags
, 0640, s
->compress
.enabled
, s
->compress
.threshold_bytes
,
265 seal
, metrics
, s
->mmap
, s
->deferred_closes
, NULL
, &f
);
267 r
= journal_file_open(-1, fname
, flags
, 0640, s
->compress
.enabled
, s
->compress
.threshold_bytes
, seal
,
268 metrics
, s
->mmap
, s
->deferred_closes
, NULL
, &f
);
273 r
= journal_file_enable_post_change_timer(f
, s
->event
, POST_CHANGE_TIMER_INTERVAL_USEC
);
275 (void) journal_file_close(f
);
283 static bool flushed_flag_is_set(void) {
284 return access("/run/systemd/journal/flushed", F_OK
) >= 0;
287 static int system_journal_open(Server
*s
, bool flush_requested
) {
291 if (!s
->system_journal
&&
292 IN_SET(s
->storage
, STORAGE_PERSISTENT
, STORAGE_AUTO
) &&
293 (flush_requested
|| flushed_flag_is_set())) {
295 /* If in auto mode: first try to create the machine
296 * path, but not the prefix.
298 * If in persistent mode: create /var/log/journal and
299 * the machine path */
301 if (s
->storage
== STORAGE_PERSISTENT
)
302 (void) mkdir_p("/var/log/journal/", 0755);
304 (void) mkdir(s
->system_storage
.path
, 0755);
306 fn
= strjoina(s
->system_storage
.path
, "/system.journal");
307 r
= open_journal(s
, true, fn
, O_RDWR
|O_CREAT
, s
->seal
, &s
->system_storage
.metrics
, &s
->system_journal
);
309 server_add_acls(s
->system_journal
, 0);
310 (void) cache_space_refresh(s
, &s
->system_storage
);
311 patch_min_use(&s
->system_storage
);
313 if (!IN_SET(r
, -ENOENT
, -EROFS
))
314 log_warning_errno(r
, "Failed to open system journal: %m");
319 /* If the runtime journal is open, and we're post-flush, we're
320 * recovering from a failed system journal rotate (ENOSPC)
321 * for which the runtime journal was reopened.
323 * Perform an implicit flush to var, leaving the runtime
324 * journal closed, now that the system journal is back.
326 if (!flush_requested
)
327 (void) server_flush_to_var(s
, true);
330 if (!s
->runtime_journal
&&
331 (s
->storage
!= STORAGE_NONE
)) {
333 fn
= strjoina(s
->runtime_storage
.path
, "/system.journal");
335 if (s
->system_journal
) {
337 /* Try to open the runtime journal, but only
338 * if it already exists, so that we can flush
339 * it into the system journal */
341 r
= open_journal(s
, false, fn
, O_RDWR
, false, &s
->runtime_storage
.metrics
, &s
->runtime_journal
);
344 log_warning_errno(r
, "Failed to open runtime journal: %m");
351 /* OK, we really need the runtime journal, so create
352 * it if necessary. */
354 (void) mkdir("/run/log", 0755);
355 (void) mkdir("/run/log/journal", 0755);
356 (void) mkdir_parents(fn
, 0750);
358 r
= open_journal(s
, true, fn
, O_RDWR
|O_CREAT
, false, &s
->runtime_storage
.metrics
, &s
->runtime_journal
);
360 return log_error_errno(r
, "Failed to open runtime journal: %m");
363 if (s
->runtime_journal
) {
364 server_add_acls(s
->runtime_journal
, 0);
365 (void) cache_space_refresh(s
, &s
->runtime_storage
);
366 patch_min_use(&s
->runtime_storage
);
373 static JournalFile
* find_journal(Server
*s
, uid_t uid
) {
374 _cleanup_free_
char *p
= NULL
;
381 /* A rotate that fails to create the new journal (ENOSPC) leaves the
382 * rotated journal as NULL. Unless we revisit opening, even after
383 * space is made available we'll continue to return NULL indefinitely.
385 * system_journal_open() is a noop if the journals are already open, so
386 * we can just call it here to recover from failed rotates (or anything
387 * else that's left the journals as NULL).
389 * Fixes https://github.com/systemd/systemd/issues/3968 */
390 (void) system_journal_open(s
, false);
392 /* We split up user logs only on /var, not on /run. If the
393 * runtime file is open, we write to it exclusively, in order
394 * to guarantee proper order as soon as we flush /run to
395 * /var and close the runtime file. */
397 if (s
->runtime_journal
)
398 return s
->runtime_journal
;
400 if (uid_for_system_journal(uid
))
401 return s
->system_journal
;
403 r
= sd_id128_get_machine(&machine
);
405 return s
->system_journal
;
407 f
= ordered_hashmap_get(s
->user_journals
, UID_TO_PTR(uid
));
411 if (asprintf(&p
, "/var/log/journal/" SD_ID128_FORMAT_STR
"/user-"UID_FMT
".journal",
412 SD_ID128_FORMAT_VAL(machine
), uid
) < 0)
413 return s
->system_journal
;
415 while (ordered_hashmap_size(s
->user_journals
) >= USER_JOURNALS_MAX
) {
416 /* Too many open? Then let's close one */
417 f
= ordered_hashmap_steal_first(s
->user_journals
);
419 (void) journal_file_close(f
);
422 r
= open_journal(s
, true, p
, O_RDWR
|O_CREAT
, s
->seal
, &s
->system_storage
.metrics
, &f
);
424 return s
->system_journal
;
426 server_add_acls(f
, uid
);
428 r
= ordered_hashmap_put(s
->user_journals
, UID_TO_PTR(uid
), f
);
430 (void) journal_file_close(f
);
431 return s
->system_journal
;
437 static int do_rotate(
450 r
= journal_file_rotate(f
, s
->compress
.enabled
, s
->compress
.threshold_bytes
, seal
, s
->deferred_closes
);
453 return log_error_errno(r
, "Failed to rotate %s: %m", (*f
)->path
);
455 return log_error_errno(r
, "Failed to create new %s journal: %m", name
);
458 server_add_acls(*f
, uid
);
463 void server_rotate(Server
*s
) {
469 log_debug("Rotating...");
471 (void) do_rotate(s
, &s
->runtime_journal
, "runtime", false, 0);
472 (void) do_rotate(s
, &s
->system_journal
, "system", s
->seal
, 0);
474 ORDERED_HASHMAP_FOREACH_KEY(f
, k
, s
->user_journals
, i
) {
475 r
= do_rotate(s
, &f
, "user", s
->seal
, PTR_TO_UID(k
));
477 ordered_hashmap_replace(s
->user_journals
, k
, f
);
479 /* Old file has been closed and deallocated */
480 ordered_hashmap_remove(s
->user_journals
, k
);
483 /* Perform any deferred closes which aren't still offlining. */
484 SET_FOREACH(f
, s
->deferred_closes
, i
)
485 if (!journal_file_is_offlining(f
)) {
486 (void) set_remove(s
->deferred_closes
, f
);
487 (void) journal_file_close(f
);
491 void server_sync(Server
*s
) {
496 if (s
->system_journal
) {
497 r
= journal_file_set_offline(s
->system_journal
, false);
499 log_warning_errno(r
, "Failed to sync system journal, ignoring: %m");
502 ORDERED_HASHMAP_FOREACH(f
, s
->user_journals
, i
) {
503 r
= journal_file_set_offline(f
, false);
505 log_warning_errno(r
, "Failed to sync user journal, ignoring: %m");
508 if (s
->sync_event_source
) {
509 r
= sd_event_source_set_enabled(s
->sync_event_source
, SD_EVENT_OFF
);
511 log_error_errno(r
, "Failed to disable sync timer source: %m");
514 s
->sync_scheduled
= false;
517 static void do_vacuum(Server
*s
, JournalStorage
*storage
, bool verbose
) {
524 (void) cache_space_refresh(s
, storage
);
527 server_space_usage_message(s
, storage
);
529 r
= journal_directory_vacuum(storage
->path
, storage
->space
.limit
,
530 storage
->metrics
.n_max_files
, s
->max_retention_usec
,
531 &s
->oldest_file_usec
, verbose
);
532 if (r
< 0 && r
!= -ENOENT
)
533 log_warning_errno(r
, "Failed to vacuum %s, ignoring: %m", storage
->path
);
535 cache_space_invalidate(&storage
->space
);
538 int server_vacuum(Server
*s
, bool verbose
) {
541 log_debug("Vacuuming...");
543 s
->oldest_file_usec
= 0;
545 if (s
->system_journal
)
546 do_vacuum(s
, &s
->system_storage
, verbose
);
547 if (s
->runtime_journal
)
548 do_vacuum(s
, &s
->runtime_storage
, verbose
);
553 static void server_cache_machine_id(Server
*s
) {
559 r
= sd_id128_get_machine(&id
);
563 sd_id128_to_string(id
, stpcpy(s
->machine_id_field
, "_MACHINE_ID="));
566 static void server_cache_boot_id(Server
*s
) {
572 r
= sd_id128_get_boot(&id
);
576 sd_id128_to_string(id
, stpcpy(s
->boot_id_field
, "_BOOT_ID="));
579 static void server_cache_hostname(Server
*s
) {
580 _cleanup_free_
char *t
= NULL
;
585 t
= gethostname_malloc();
589 x
= strappend("_HOSTNAME=", t
);
593 free(s
->hostname_field
);
594 s
->hostname_field
= x
;
597 static bool shall_try_append_again(JournalFile
*f
, int r
) {
600 case -E2BIG
: /* Hit configured limit */
601 case -EFBIG
: /* Hit fs limit */
602 case -EDQUOT
: /* Quota limit hit */
603 case -ENOSPC
: /* Disk full */
604 log_debug("%s: Allocation limit reached, rotating.", f
->path
);
607 case -EIO
: /* I/O error of some kind (mmap) */
608 log_warning("%s: IO error, rotating.", f
->path
);
611 case -EHOSTDOWN
: /* Other machine */
612 log_info("%s: Journal file from other machine, rotating.", f
->path
);
615 case -EBUSY
: /* Unclean shutdown */
616 log_info("%s: Unclean shutdown, rotating.", f
->path
);
619 case -EPROTONOSUPPORT
: /* Unsupported feature */
620 log_info("%s: Unsupported feature, rotating.", f
->path
);
623 case -EBADMSG
: /* Corrupted */
624 case -ENODATA
: /* Truncated */
625 case -ESHUTDOWN
: /* Already archived */
626 log_warning("%s: Journal file corrupted, rotating.", f
->path
);
629 case -EIDRM
: /* Journal file has been deleted */
630 log_warning("%s: Journal file has been deleted, rotating.", f
->path
);
633 case -ETXTBSY
: /* Journal file is from the future */
634 log_warning("%s: Journal file is from the future, rotating.", f
->path
);
642 static void write_to_journal(Server
*s
, uid_t uid
, struct iovec
*iovec
, size_t n
, int priority
) {
643 bool vacuumed
= false, rotate
= false;
644 struct dual_timestamp ts
;
652 /* Get the closest, linearized time we have for this log event from the event loop. (Note that we do not use
653 * the source time, and not even the time the event was originally seen, but instead simply the time we started
654 * processing it, as we want strictly linear ordering in what we write out.) */
655 assert_se(sd_event_now(s
->event
, CLOCK_REALTIME
, &ts
.realtime
) >= 0);
656 assert_se(sd_event_now(s
->event
, CLOCK_MONOTONIC
, &ts
.monotonic
) >= 0);
658 if (ts
.realtime
< s
->last_realtime_clock
) {
659 /* When the time jumps backwards, let's immediately rotate. Of course, this should not happen during
660 * regular operation. However, when it does happen, then we should make sure that we start fresh files
661 * to ensure that the entries in the journal files are strictly ordered by time, in order to ensure
662 * bisection works correctly. */
664 log_debug("Time jumped backwards, rotating.");
668 f
= find_journal(s
, uid
);
672 if (journal_file_rotate_suggested(f
, s
->max_file_usec
)) {
673 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f
->path
);
680 server_vacuum(s
, false);
683 f
= find_journal(s
, uid
);
688 s
->last_realtime_clock
= ts
.realtime
;
690 r
= journal_file_append_entry(f
, &ts
, NULL
, iovec
, n
, &s
->seqnum
, NULL
, NULL
);
692 server_schedule_sync(s
, priority
);
696 if (vacuumed
|| !shall_try_append_again(f
, r
)) {
697 log_error_errno(r
, "Failed to write entry (%zu items, %zu bytes), ignoring: %m", n
, IOVEC_TOTAL_SIZE(iovec
, n
));
702 server_vacuum(s
, false);
704 f
= find_journal(s
, uid
);
708 log_debug("Retrying write.");
709 r
= journal_file_append_entry(f
, &ts
, NULL
, iovec
, n
, &s
->seqnum
, NULL
, NULL
);
711 log_error_errno(r
, "Failed to write entry (%zu items, %zu bytes) despite vacuuming, ignoring: %m", n
, IOVEC_TOTAL_SIZE(iovec
, n
));
713 server_schedule_sync(s
, priority
);
716 #define IOVEC_ADD_NUMERIC_FIELD(iovec, n, value, type, isset, format, field) \
717 if (isset(value)) { \
719 k = newa(char, STRLEN(field "=") + DECIMAL_STR_MAX(type) + 1); \
720 sprintf(k, field "=" format, value); \
721 iovec[n++] = IOVEC_MAKE_STRING(k); \
724 #define IOVEC_ADD_STRING_FIELD(iovec, n, value, field) \
725 if (!isempty(value)) { \
727 k = strjoina(field "=", value); \
728 iovec[n++] = IOVEC_MAKE_STRING(k); \
731 #define IOVEC_ADD_ID128_FIELD(iovec, n, value, field) \
732 if (!sd_id128_is_null(value)) { \
734 k = newa(char, STRLEN(field "=") + SD_ID128_STRING_MAX); \
735 sd_id128_to_string(value, stpcpy(k, field "=")); \
736 iovec[n++] = IOVEC_MAKE_STRING(k); \
739 #define IOVEC_ADD_SIZED_FIELD(iovec, n, value, value_size, field) \
740 if (value_size > 0) { \
742 k = newa(char, STRLEN(field "=") + value_size + 1); \
743 *((char*) mempcpy(stpcpy(k, field "="), value, value_size)) = 0; \
744 iovec[n++] = IOVEC_MAKE_STRING(k); \
747 static void dispatch_message_real(
749 struct iovec
*iovec
, size_t n
, size_t m
,
750 const ClientContext
*c
,
751 const struct timeval
*tv
,
755 char source_time
[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t
)];
763 N_IOVEC_META_FIELDS
+
764 (pid_is_valid(object_pid
) ? N_IOVEC_OBJECT_FIELDS
: 0) +
765 client_context_extra_fields_n_iovec(c
) <= m
);
768 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->pid
, pid_t
, pid_is_valid
, PID_FMT
, "_PID");
769 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->uid
, uid_t
, uid_is_valid
, UID_FMT
, "_UID");
770 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->gid
, gid_t
, gid_is_valid
, GID_FMT
, "_GID");
772 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->comm
, "_COMM");
773 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->exe
, "_EXE");
774 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->cmdline
, "_CMDLINE");
775 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->capeff
, "_CAP_EFFECTIVE");
777 IOVEC_ADD_SIZED_FIELD(iovec
, n
, c
->label
, c
->label_size
, "_SELINUX_CONTEXT");
779 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->auditid
, uint32_t, audit_session_is_valid
, "%" PRIu32
, "_AUDIT_SESSION");
780 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->loginuid
, uid_t
, uid_is_valid
, UID_FMT
, "_AUDIT_LOGINUID");
782 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->cgroup
, "_SYSTEMD_CGROUP");
783 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->session
, "_SYSTEMD_SESSION");
784 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->owner_uid
, uid_t
, uid_is_valid
, UID_FMT
, "_SYSTEMD_OWNER_UID");
785 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->unit
, "_SYSTEMD_UNIT");
786 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->user_unit
, "_SYSTEMD_USER_UNIT");
787 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->slice
, "_SYSTEMD_SLICE");
788 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->user_slice
, "_SYSTEMD_USER_SLICE");
790 IOVEC_ADD_ID128_FIELD(iovec
, n
, c
->invocation_id
, "_SYSTEMD_INVOCATION_ID");
792 if (c
->extra_fields_n_iovec
> 0) {
793 memcpy(iovec
+ n
, c
->extra_fields_iovec
, c
->extra_fields_n_iovec
* sizeof(struct iovec
));
794 n
+= c
->extra_fields_n_iovec
;
800 if (pid_is_valid(object_pid
) && client_context_get(s
, object_pid
, NULL
, NULL
, 0, NULL
, &o
) >= 0) {
802 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->pid
, pid_t
, pid_is_valid
, PID_FMT
, "OBJECT_PID");
803 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->uid
, uid_t
, uid_is_valid
, UID_FMT
, "OBJECT_UID");
804 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->gid
, gid_t
, gid_is_valid
, GID_FMT
, "OBJECT_GID");
806 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->comm
, "OBJECT_COMM");
807 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->exe
, "OBJECT_EXE");
808 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->cmdline
, "OBJECT_CMDLINE");
809 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->capeff
, "OBJECT_CAP_EFFECTIVE");
811 IOVEC_ADD_SIZED_FIELD(iovec
, n
, o
->label
, o
->label_size
, "OBJECT_SELINUX_CONTEXT");
813 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->auditid
, uint32_t, audit_session_is_valid
, "%" PRIu32
, "OBJECT_AUDIT_SESSION");
814 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->loginuid
, uid_t
, uid_is_valid
, UID_FMT
, "OBJECT_AUDIT_LOGINUID");
816 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->cgroup
, "OBJECT_SYSTEMD_CGROUP");
817 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->session
, "OBJECT_SYSTEMD_SESSION");
818 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->owner_uid
, uid_t
, uid_is_valid
, UID_FMT
, "OBJECT_SYSTEMD_OWNER_UID");
819 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->unit
, "OBJECT_SYSTEMD_UNIT");
820 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->user_unit
, "OBJECT_SYSTEMD_USER_UNIT");
821 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->slice
, "OBJECT_SYSTEMD_SLICE");
822 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->user_slice
, "OBJECT_SYSTEMD_USER_SLICE");
824 IOVEC_ADD_ID128_FIELD(iovec
, n
, o
->invocation_id
, "OBJECT_SYSTEMD_INVOCATION_ID=");
830 sprintf(source_time
, "_SOURCE_REALTIME_TIMESTAMP=" USEC_FMT
, timeval_load(tv
));
831 iovec
[n
++] = IOVEC_MAKE_STRING(source_time
);
834 /* Note that strictly speaking storing the boot id here is
835 * redundant since the entry includes this in-line
836 * anyway. However, we need this indexed, too. */
837 if (!isempty(s
->boot_id_field
))
838 iovec
[n
++] = IOVEC_MAKE_STRING(s
->boot_id_field
);
840 if (!isempty(s
->machine_id_field
))
841 iovec
[n
++] = IOVEC_MAKE_STRING(s
->machine_id_field
);
843 if (!isempty(s
->hostname_field
))
844 iovec
[n
++] = IOVEC_MAKE_STRING(s
->hostname_field
);
848 if (s
->split_mode
== SPLIT_UID
&& c
&& uid_is_valid(c
->uid
))
849 /* Split up strictly by (non-root) UID */
850 journal_uid
= c
->uid
;
851 else if (s
->split_mode
== SPLIT_LOGIN
&& c
&& c
->uid
> 0 && uid_is_valid(c
->owner_uid
))
852 /* Split up by login UIDs. We do this only if the
853 * realuid is not root, in order not to accidentally
854 * leak privileged information to the user that is
855 * logged by a privileged process that is part of an
856 * unprivileged session. */
857 journal_uid
= c
->owner_uid
;
861 write_to_journal(s
, journal_uid
, iovec
, n
, priority
);
864 void server_driver_message(Server
*s
, pid_t object_pid
, const char *message_id
, const char *format
, ...) {
874 m
= N_IOVEC_META_FIELDS
+ 5 + N_IOVEC_PAYLOAD_FIELDS
+ client_context_extra_fields_n_iovec(s
->my_context
) + N_IOVEC_OBJECT_FIELDS
;
875 iovec
= newa(struct iovec
, m
);
877 assert_cc(3 == LOG_FAC(LOG_DAEMON
));
878 iovec
[n
++] = IOVEC_MAKE_STRING("SYSLOG_FACILITY=3");
879 iovec
[n
++] = IOVEC_MAKE_STRING("SYSLOG_IDENTIFIER=systemd-journald");
881 iovec
[n
++] = IOVEC_MAKE_STRING("_TRANSPORT=driver");
882 assert_cc(6 == LOG_INFO
);
883 iovec
[n
++] = IOVEC_MAKE_STRING("PRIORITY=6");
886 iovec
[n
++] = IOVEC_MAKE_STRING(message_id
);
889 va_start(ap
, format
);
890 r
= log_format_iovec(iovec
, m
, &n
, false, 0, format
, ap
);
891 /* Error handling below */
895 dispatch_message_real(s
, iovec
, n
, m
, s
->my_context
, NULL
, LOG_INFO
, object_pid
);
898 free(iovec
[k
++].iov_base
);
901 /* We failed to format the message. Emit a warning instead. */
904 xsprintf(buf
, "MESSAGE=Entry printing failed: %s", strerror(-r
));
907 iovec
[n
++] = IOVEC_MAKE_STRING("PRIORITY=4");
908 iovec
[n
++] = IOVEC_MAKE_STRING(buf
);
909 dispatch_message_real(s
, iovec
, n
, m
, s
->my_context
, NULL
, LOG_INFO
, object_pid
);
913 void server_dispatch_message(
915 struct iovec
*iovec
, size_t n
, size_t m
,
917 const struct timeval
*tv
,
921 uint64_t available
= 0;
925 assert(iovec
|| n
== 0);
930 if (LOG_PRI(priority
) > s
->max_level_store
)
933 /* Stop early in case the information will not be stored
935 if (s
->storage
== STORAGE_NONE
)
939 (void) determine_space(s
, &available
, NULL
);
941 rl
= journal_rate_limit_test(s
->rate_limit
, c
->unit
, priority
& LOG_PRIMASK
, available
);
945 /* Write a suppression message if we suppressed something */
947 server_driver_message(s
, c
->pid
,
948 "MESSAGE_ID=" SD_MESSAGE_JOURNAL_DROPPED_STR
,
949 LOG_MESSAGE("Suppressed %i messages from %s", rl
- 1, c
->unit
),
950 "N_DROPPED=%i", rl
- 1,
954 dispatch_message_real(s
, iovec
, n
, m
, c
, tv
, priority
, object_pid
);
957 int server_flush_to_var(Server
*s
, bool require_flag_file
) {
959 sd_journal
*j
= NULL
;
960 char ts
[FORMAT_TIMESPAN_MAX
];
967 if (!IN_SET(s
->storage
, STORAGE_AUTO
, STORAGE_PERSISTENT
))
970 if (!s
->runtime_journal
)
973 if (require_flag_file
&& !flushed_flag_is_set())
976 (void) system_journal_open(s
, true);
978 if (!s
->system_journal
)
981 log_debug("Flushing to /var...");
983 start
= now(CLOCK_MONOTONIC
);
985 r
= sd_id128_get_machine(&machine
);
989 r
= sd_journal_open(&j
, SD_JOURNAL_RUNTIME_ONLY
);
991 return log_error_errno(r
, "Failed to read runtime journal: %m");
993 sd_journal_set_data_threshold(j
, 0);
995 SD_JOURNAL_FOREACH(j
) {
1000 assert(f
&& f
->current_offset
> 0);
1004 r
= journal_file_move_to_object(f
, OBJECT_ENTRY
, f
->current_offset
, &o
);
1006 log_error_errno(r
, "Can't read entry: %m");
1010 r
= journal_file_copy_entry(f
, s
->system_journal
, o
, f
->current_offset
);
1014 if (!shall_try_append_again(s
->system_journal
, r
)) {
1015 log_error_errno(r
, "Can't write entry: %m");
1020 server_vacuum(s
, false);
1022 if (!s
->system_journal
) {
1023 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1028 log_debug("Retrying write.");
1029 r
= journal_file_copy_entry(f
, s
->system_journal
, o
, f
->current_offset
);
1031 log_error_errno(r
, "Can't write entry: %m");
1039 journal_file_post_change(s
->system_journal
);
1041 s
->runtime_journal
= journal_file_close(s
->runtime_journal
);
1044 (void) rm_rf("/run/log/journal", REMOVE_ROOT
);
1046 sd_journal_close(j
);
1048 server_driver_message(s
, 0, NULL
,
1049 LOG_MESSAGE("Time spent on flushing to /var is %s for %u entries.",
1050 format_timespan(ts
, sizeof(ts
), now(CLOCK_MONOTONIC
) - start
, 0),
1057 int server_process_datagram(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1058 Server
*s
= userdata
;
1059 struct ucred
*ucred
= NULL
;
1060 struct timeval
*tv
= NULL
;
1061 struct cmsghdr
*cmsg
;
1063 size_t label_len
= 0, m
;
1066 int *fds
= NULL
, v
= 0;
1070 struct cmsghdr cmsghdr
;
1072 /* We use NAME_MAX space for the SELinux label
1073 * here. The kernel currently enforces no
1074 * limit, but according to suggestions from
1075 * the SELinux people this will change and it
1076 * will probably be identical to NAME_MAX. For
1077 * now we use that, but this should be updated
1078 * one day when the final limit is known. */
1079 uint8_t buf
[CMSG_SPACE(sizeof(struct ucred
)) +
1080 CMSG_SPACE(sizeof(struct timeval
)) +
1081 CMSG_SPACE(sizeof(int)) + /* fd */
1082 CMSG_SPACE(NAME_MAX
)]; /* selinux label */
1085 union sockaddr_union sa
= {};
1087 struct msghdr msghdr
= {
1090 .msg_control
= &control
,
1091 .msg_controllen
= sizeof(control
),
1093 .msg_namelen
= sizeof(sa
),
1097 assert(fd
== s
->native_fd
|| fd
== s
->syslog_fd
|| fd
== s
->audit_fd
);
1099 if (revents
!= EPOLLIN
) {
1100 log_error("Got invalid event from epoll for datagram fd: %"PRIx32
, revents
);
1104 /* Try to get the right size, if we can. (Not all sockets support SIOCINQ, hence we just try, but don't rely on
1106 (void) ioctl(fd
, SIOCINQ
, &v
);
1108 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1109 m
= PAGE_ALIGN(MAX3((size_t) v
+ 1,
1111 ALIGN(sizeof(struct nlmsghdr
)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH
)) + 1);
1113 if (!GREEDY_REALLOC(s
->buffer
, s
->buffer_size
, m
))
1116 iovec
.iov_base
= s
->buffer
;
1117 iovec
.iov_len
= s
->buffer_size
- 1; /* Leave room for trailing NUL we add later */
1119 n
= recvmsg(fd
, &msghdr
, MSG_DONTWAIT
|MSG_CMSG_CLOEXEC
);
1121 if (IN_SET(errno
, EINTR
, EAGAIN
))
1124 return log_error_errno(errno
, "recvmsg() failed: %m");
1127 CMSG_FOREACH(cmsg
, &msghdr
) {
1129 if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1130 cmsg
->cmsg_type
== SCM_CREDENTIALS
&&
1131 cmsg
->cmsg_len
== CMSG_LEN(sizeof(struct ucred
)))
1132 ucred
= (struct ucred
*) CMSG_DATA(cmsg
);
1133 else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1134 cmsg
->cmsg_type
== SCM_SECURITY
) {
1135 label
= (char*) CMSG_DATA(cmsg
);
1136 label_len
= cmsg
->cmsg_len
- CMSG_LEN(0);
1137 } else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1138 cmsg
->cmsg_type
== SO_TIMESTAMP
&&
1139 cmsg
->cmsg_len
== CMSG_LEN(sizeof(struct timeval
)))
1140 tv
= (struct timeval
*) CMSG_DATA(cmsg
);
1141 else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1142 cmsg
->cmsg_type
== SCM_RIGHTS
) {
1143 fds
= (int*) CMSG_DATA(cmsg
);
1144 n_fds
= (cmsg
->cmsg_len
- CMSG_LEN(0)) / sizeof(int);
1148 /* And a trailing NUL, just in case */
1151 if (fd
== s
->syslog_fd
) {
1152 if (n
> 0 && n_fds
== 0)
1153 server_process_syslog_message(s
, s
->buffer
, n
, ucred
, tv
, label
, label_len
);
1155 log_warning("Got file descriptors via syslog socket. Ignoring.");
1157 } else if (fd
== s
->native_fd
) {
1158 if (n
> 0 && n_fds
== 0)
1159 server_process_native_message(s
, s
->buffer
, n
, ucred
, tv
, label
, label_len
);
1160 else if (n
== 0 && n_fds
== 1)
1161 server_process_native_file(s
, fds
[0], ucred
, tv
, label
, label_len
);
1163 log_warning("Got too many file descriptors via native socket. Ignoring.");
1166 assert(fd
== s
->audit_fd
);
1168 if (n
> 0 && n_fds
== 0)
1169 server_process_audit_message(s
, s
->buffer
, n
, ucred
, &sa
, msghdr
.msg_namelen
);
1171 log_warning("Got file descriptors via audit socket. Ignoring.");
1174 close_many(fds
, n_fds
);
1178 static int dispatch_sigusr1(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1179 Server
*s
= userdata
;
1184 log_info("Received request to flush runtime journal from PID " PID_FMT
, si
->ssi_pid
);
1186 (void) server_flush_to_var(s
, false);
1188 server_vacuum(s
, false);
1190 r
= touch("/run/systemd/journal/flushed");
1192 log_warning_errno(r
, "Failed to touch /run/systemd/journal/flushed, ignoring: %m");
1194 server_space_usage_message(s
, NULL
);
1198 static int dispatch_sigusr2(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1199 Server
*s
= userdata
;
1204 log_info("Received request to rotate journal from PID " PID_FMT
, si
->ssi_pid
);
1206 server_vacuum(s
, true);
1208 if (s
->system_journal
)
1209 patch_min_use(&s
->system_storage
);
1210 if (s
->runtime_journal
)
1211 patch_min_use(&s
->runtime_storage
);
1213 /* Let clients know when the most recent rotation happened. */
1214 r
= write_timestamp_file_atomic("/run/systemd/journal/rotated", now(CLOCK_MONOTONIC
));
1216 log_warning_errno(r
, "Failed to write /run/systemd/journal/rotated, ignoring: %m");
1221 static int dispatch_sigterm(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1222 Server
*s
= userdata
;
1226 log_received_signal(LOG_INFO
, si
);
1228 sd_event_exit(s
->event
, 0);
1232 static int dispatch_sigrtmin1(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1233 Server
*s
= userdata
;
1238 log_debug("Received request to sync from PID " PID_FMT
, si
->ssi_pid
);
1242 /* Let clients know when the most recent sync happened. */
1243 r
= write_timestamp_file_atomic("/run/systemd/journal/synced", now(CLOCK_MONOTONIC
));
1245 log_warning_errno(r
, "Failed to write /run/systemd/journal/synced, ignoring: %m");
1250 static int setup_signals(Server
*s
) {
1255 assert_se(sigprocmask_many(SIG_SETMASK
, NULL
, SIGINT
, SIGTERM
, SIGUSR1
, SIGUSR2
, SIGRTMIN
+1, -1) >= 0);
1257 r
= sd_event_add_signal(s
->event
, &s
->sigusr1_event_source
, SIGUSR1
, dispatch_sigusr1
, s
);
1261 r
= sd_event_add_signal(s
->event
, &s
->sigusr2_event_source
, SIGUSR2
, dispatch_sigusr2
, s
);
1265 r
= sd_event_add_signal(s
->event
, &s
->sigterm_event_source
, SIGTERM
, dispatch_sigterm
, s
);
1269 /* Let's process SIGTERM late, so that we flush all queued
1270 * messages to disk before we exit */
1271 r
= sd_event_source_set_priority(s
->sigterm_event_source
, SD_EVENT_PRIORITY_NORMAL
+20);
1275 /* When journald is invoked on the terminal (when debugging),
1276 * it's useful if C-c is handled equivalent to SIGTERM. */
1277 r
= sd_event_add_signal(s
->event
, &s
->sigint_event_source
, SIGINT
, dispatch_sigterm
, s
);
1281 r
= sd_event_source_set_priority(s
->sigint_event_source
, SD_EVENT_PRIORITY_NORMAL
+20);
1285 /* SIGRTMIN+1 causes an immediate sync. We process this very
1286 * late, so that everything else queued at this point is
1287 * really written to disk. Clients can watch
1288 * /run/systemd/journal/synced with inotify until its mtime
1289 * changes to see when a sync happened. */
1290 r
= sd_event_add_signal(s
->event
, &s
->sigrtmin1_event_source
, SIGRTMIN
+1, dispatch_sigrtmin1
, s
);
1294 r
= sd_event_source_set_priority(s
->sigrtmin1_event_source
, SD_EVENT_PRIORITY_NORMAL
+15);
1301 static int parse_proc_cmdline_item(const char *key
, const char *value
, void *data
) {
1307 if (proc_cmdline_key_streq(key
, "systemd.journald.forward_to_syslog")) {
1309 r
= value
? parse_boolean(value
) : true;
1311 log_warning("Failed to parse forward to syslog switch \"%s\". Ignoring.", value
);
1313 s
->forward_to_syslog
= r
;
1315 } else if (proc_cmdline_key_streq(key
, "systemd.journald.forward_to_kmsg")) {
1317 r
= value
? parse_boolean(value
) : true;
1319 log_warning("Failed to parse forward to kmsg switch \"%s\". Ignoring.", value
);
1321 s
->forward_to_kmsg
= r
;
1323 } else if (proc_cmdline_key_streq(key
, "systemd.journald.forward_to_console")) {
1325 r
= value
? parse_boolean(value
) : true;
1327 log_warning("Failed to parse forward to console switch \"%s\". Ignoring.", value
);
1329 s
->forward_to_console
= r
;
1331 } else if (proc_cmdline_key_streq(key
, "systemd.journald.forward_to_wall")) {
1333 r
= value
? parse_boolean(value
) : true;
1335 log_warning("Failed to parse forward to wall switch \"%s\". Ignoring.", value
);
1337 s
->forward_to_wall
= r
;
1339 } else if (proc_cmdline_key_streq(key
, "systemd.journald.max_level_console")) {
1341 if (proc_cmdline_value_missing(key
, value
))
1344 r
= log_level_from_string(value
);
1346 log_warning("Failed to parse max level console value \"%s\". Ignoring.", value
);
1348 s
->max_level_console
= r
;
1350 } else if (proc_cmdline_key_streq(key
, "systemd.journald.max_level_store")) {
1352 if (proc_cmdline_value_missing(key
, value
))
1355 r
= log_level_from_string(value
);
1357 log_warning("Failed to parse max level store value \"%s\". Ignoring.", value
);
1359 s
->max_level_store
= r
;
1361 } else if (proc_cmdline_key_streq(key
, "systemd.journald.max_level_syslog")) {
1363 if (proc_cmdline_value_missing(key
, value
))
1366 r
= log_level_from_string(value
);
1368 log_warning("Failed to parse max level syslog value \"%s\". Ignoring.", value
);
1370 s
->max_level_syslog
= r
;
1372 } else if (proc_cmdline_key_streq(key
, "systemd.journald.max_level_kmsg")) {
1374 if (proc_cmdline_value_missing(key
, value
))
1377 r
= log_level_from_string(value
);
1379 log_warning("Failed to parse max level kmsg value \"%s\". Ignoring.", value
);
1381 s
->max_level_kmsg
= r
;
1383 } else if (proc_cmdline_key_streq(key
, "systemd.journald.max_level_wall")) {
1385 if (proc_cmdline_value_missing(key
, value
))
1388 r
= log_level_from_string(value
);
1390 log_warning("Failed to parse max level wall value \"%s\". Ignoring.", value
);
1392 s
->max_level_wall
= r
;
1394 } else if (startswith(key
, "systemd.journald"))
1395 log_warning("Unknown journald kernel command line option \"%s\". Ignoring.", key
);
1397 /* do not warn about state here, since probably systemd already did */
1401 static int server_parse_config_file(Server
*s
) {
1404 return config_parse_many_nulstr(PKGSYSCONFDIR
"/journald.conf",
1405 CONF_PATHS_NULSTR("systemd/journald.conf.d"),
1407 config_item_perf_lookup
, journald_gperf_lookup
,
1408 CONFIG_PARSE_WARN
, s
);
1411 static int server_dispatch_sync(sd_event_source
*es
, usec_t t
, void *userdata
) {
1412 Server
*s
= userdata
;
1420 int server_schedule_sync(Server
*s
, int priority
) {
1425 if (priority
<= LOG_CRIT
) {
1426 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1431 if (s
->sync_scheduled
)
1434 if (s
->sync_interval_usec
> 0) {
1437 r
= sd_event_now(s
->event
, CLOCK_MONOTONIC
, &when
);
1441 when
+= s
->sync_interval_usec
;
1443 if (!s
->sync_event_source
) {
1444 r
= sd_event_add_time(
1446 &s
->sync_event_source
,
1449 server_dispatch_sync
, s
);
1453 r
= sd_event_source_set_priority(s
->sync_event_source
, SD_EVENT_PRIORITY_IMPORTANT
);
1455 r
= sd_event_source_set_time(s
->sync_event_source
, when
);
1459 r
= sd_event_source_set_enabled(s
->sync_event_source
, SD_EVENT_ONESHOT
);
1464 s
->sync_scheduled
= true;
1470 static int dispatch_hostname_change(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1471 Server
*s
= userdata
;
1475 server_cache_hostname(s
);
1479 static int server_open_hostname(Server
*s
) {
1484 s
->hostname_fd
= open("/proc/sys/kernel/hostname",
1485 O_RDONLY
|O_CLOEXEC
|O_NONBLOCK
|O_NOCTTY
);
1486 if (s
->hostname_fd
< 0)
1487 return log_error_errno(errno
, "Failed to open /proc/sys/kernel/hostname: %m");
1489 r
= sd_event_add_io(s
->event
, &s
->hostname_event_source
, s
->hostname_fd
, 0, dispatch_hostname_change
, s
);
1491 /* kernels prior to 3.2 don't support polling this file. Ignore
1494 log_warning_errno(r
, "Failed to register hostname fd in event loop, ignoring: %m");
1495 s
->hostname_fd
= safe_close(s
->hostname_fd
);
1499 return log_error_errno(r
, "Failed to register hostname fd in event loop: %m");
1502 r
= sd_event_source_set_priority(s
->hostname_event_source
, SD_EVENT_PRIORITY_IMPORTANT
-10);
1504 return log_error_errno(r
, "Failed to adjust priority of host name event source: %m");
1509 static int dispatch_notify_event(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1510 Server
*s
= userdata
;
1514 assert(s
->notify_event_source
== es
);
1515 assert(s
->notify_fd
== fd
);
1517 /* The $NOTIFY_SOCKET is writable again, now send exactly one
1518 * message on it. Either it's the watchdog event, the initial
1519 * READY=1 event or an stdout stream event. If there's nothing
1520 * to write anymore, turn our event source off. The next time
1521 * there's something to send it will be turned on again. */
1523 if (!s
->sent_notify_ready
) {
1524 static const char p
[] =
1526 "STATUS=Processing requests...";
1529 l
= send(s
->notify_fd
, p
, strlen(p
), MSG_DONTWAIT
);
1531 if (errno
== EAGAIN
)
1534 return log_error_errno(errno
, "Failed to send READY=1 notification message: %m");
1537 s
->sent_notify_ready
= true;
1538 log_debug("Sent READY=1 notification.");
1540 } else if (s
->send_watchdog
) {
1542 static const char p
[] =
1547 l
= send(s
->notify_fd
, p
, strlen(p
), MSG_DONTWAIT
);
1549 if (errno
== EAGAIN
)
1552 return log_error_errno(errno
, "Failed to send WATCHDOG=1 notification message: %m");
1555 s
->send_watchdog
= false;
1556 log_debug("Sent WATCHDOG=1 notification.");
1558 } else if (s
->stdout_streams_notify_queue
)
1559 /* Dispatch one stream notification event */
1560 stdout_stream_send_notify(s
->stdout_streams_notify_queue
);
1562 /* Leave us enabled if there's still more to do. */
1563 if (s
->send_watchdog
|| s
->stdout_streams_notify_queue
)
1566 /* There was nothing to do anymore, let's turn ourselves off. */
1567 r
= sd_event_source_set_enabled(es
, SD_EVENT_OFF
);
1569 return log_error_errno(r
, "Failed to turn off notify event source: %m");
1574 static int dispatch_watchdog(sd_event_source
*es
, uint64_t usec
, void *userdata
) {
1575 Server
*s
= userdata
;
1580 s
->send_watchdog
= true;
1582 r
= sd_event_source_set_enabled(s
->notify_event_source
, SD_EVENT_ON
);
1584 log_warning_errno(r
, "Failed to turn on notify event source: %m");
1586 r
= sd_event_source_set_time(s
->watchdog_event_source
, usec
+ s
->watchdog_usec
/ 2);
1588 return log_error_errno(r
, "Failed to restart watchdog event source: %m");
1590 r
= sd_event_source_set_enabled(s
->watchdog_event_source
, SD_EVENT_ON
);
1592 return log_error_errno(r
, "Failed to enable watchdog event source: %m");
1597 static int server_connect_notify(Server
*s
) {
1598 union sockaddr_union sa
= {
1599 .un
.sun_family
= AF_UNIX
,
1605 assert(s
->notify_fd
< 0);
1606 assert(!s
->notify_event_source
);
1609 So here's the problem: we'd like to send notification
1610 messages to PID 1, but we cannot do that via sd_notify(),
1611 since that's synchronous, and we might end up blocking on
1612 it. Specifically: given that PID 1 might block on
1613 dbus-daemon during IPC, and dbus-daemon is logging to us,
1614 and might hence block on us, we might end up in a deadlock
1615 if we block on sending PID 1 notification messages — by
1616 generating a full blocking circle. To avoid this, let's
1617 create a non-blocking socket, and connect it to the
1618 notification socket, and then wait for POLLOUT before we
1619 send anything. This should efficiently avoid any deadlocks,
1620 as we'll never block on PID 1, hence PID 1 can safely block
1621 on dbus-daemon which can safely block on us again.
1623 Don't think that this issue is real? It is, see:
1624 https://github.com/systemd/systemd/issues/1505
1627 e
= getenv("NOTIFY_SOCKET");
1631 if (!IN_SET(e
[0], '@', '/') || e
[1] == 0) {
1632 log_error("NOTIFY_SOCKET set to an invalid value: %s", e
);
1636 if (strlen(e
) > sizeof(sa
.un
.sun_path
)) {
1637 log_error("NOTIFY_SOCKET path too long: %s", e
);
1641 s
->notify_fd
= socket(AF_UNIX
, SOCK_DGRAM
|SOCK_CLOEXEC
|SOCK_NONBLOCK
, 0);
1642 if (s
->notify_fd
< 0)
1643 return log_error_errno(errno
, "Failed to create notify socket: %m");
1645 (void) fd_inc_sndbuf(s
->notify_fd
, NOTIFY_SNDBUF_SIZE
);
1647 strncpy(sa
.un
.sun_path
, e
, sizeof(sa
.un
.sun_path
));
1648 if (sa
.un
.sun_path
[0] == '@')
1649 sa
.un
.sun_path
[0] = 0;
1651 r
= connect(s
->notify_fd
, &sa
.sa
, SOCKADDR_UN_LEN(sa
.un
));
1653 return log_error_errno(errno
, "Failed to connect to notify socket: %m");
1655 r
= sd_event_add_io(s
->event
, &s
->notify_event_source
, s
->notify_fd
, EPOLLOUT
, dispatch_notify_event
, s
);
1657 return log_error_errno(r
, "Failed to watch notification socket: %m");
1659 if (sd_watchdog_enabled(false, &s
->watchdog_usec
) > 0) {
1660 s
->send_watchdog
= true;
1662 r
= sd_event_add_time(s
->event
, &s
->watchdog_event_source
, CLOCK_MONOTONIC
, now(CLOCK_MONOTONIC
) + s
->watchdog_usec
/2, s
->watchdog_usec
/4, dispatch_watchdog
, s
);
1664 return log_error_errno(r
, "Failed to add watchdog time event: %m");
1667 /* This should fire pretty soon, which we'll use to send the
1673 int server_init(Server
*s
) {
1674 _cleanup_fdset_free_ FDSet
*fds
= NULL
;
1681 s
->syslog_fd
= s
->native_fd
= s
->stdout_fd
= s
->dev_kmsg_fd
= s
->audit_fd
= s
->hostname_fd
= s
->notify_fd
= -1;
1682 s
->compress
.enabled
= true;
1683 s
->compress
.threshold_bytes
= (uint64_t) -1;
1685 s
->read_kmsg
= true;
1687 s
->watchdog_usec
= USEC_INFINITY
;
1689 s
->sync_interval_usec
= DEFAULT_SYNC_INTERVAL_USEC
;
1690 s
->sync_scheduled
= false;
1692 s
->rate_limit_interval
= DEFAULT_RATE_LIMIT_INTERVAL
;
1693 s
->rate_limit_burst
= DEFAULT_RATE_LIMIT_BURST
;
1695 s
->forward_to_wall
= true;
1697 s
->max_file_usec
= DEFAULT_MAX_FILE_USEC
;
1699 s
->max_level_store
= LOG_DEBUG
;
1700 s
->max_level_syslog
= LOG_DEBUG
;
1701 s
->max_level_kmsg
= LOG_NOTICE
;
1702 s
->max_level_console
= LOG_INFO
;
1703 s
->max_level_wall
= LOG_EMERG
;
1705 s
->line_max
= DEFAULT_LINE_MAX
;
1707 journal_reset_metrics(&s
->system_storage
.metrics
);
1708 journal_reset_metrics(&s
->runtime_storage
.metrics
);
1710 server_parse_config_file(s
);
1712 r
= proc_cmdline_parse(parse_proc_cmdline_item
, s
, PROC_CMDLINE_STRIP_RD_PREFIX
);
1714 log_warning_errno(r
, "Failed to parse kernel command line, ignoring: %m");
1716 if (!!s
->rate_limit_interval
^ !!s
->rate_limit_burst
) {
1717 log_debug("Setting both rate limit interval and burst from "USEC_FMT
",%u to 0,0",
1718 s
->rate_limit_interval
, s
->rate_limit_burst
);
1719 s
->rate_limit_interval
= s
->rate_limit_burst
= 0;
1722 (void) mkdir_p("/run/systemd/journal", 0755);
1724 s
->user_journals
= ordered_hashmap_new(NULL
);
1725 if (!s
->user_journals
)
1728 s
->mmap
= mmap_cache_new();
1732 s
->deferred_closes
= set_new(NULL
);
1733 if (!s
->deferred_closes
)
1736 r
= sd_event_default(&s
->event
);
1738 return log_error_errno(r
, "Failed to create event loop: %m");
1740 n
= sd_listen_fds(true);
1742 return log_error_errno(n
, "Failed to read listening file descriptors from environment: %m");
1744 for (fd
= SD_LISTEN_FDS_START
; fd
< SD_LISTEN_FDS_START
+ n
; fd
++) {
1746 if (sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/run/systemd/journal/socket", 0) > 0) {
1748 if (s
->native_fd
>= 0) {
1749 log_error("Too many native sockets passed.");
1755 } else if (sd_is_socket_unix(fd
, SOCK_STREAM
, 1, "/run/systemd/journal/stdout", 0) > 0) {
1757 if (s
->stdout_fd
>= 0) {
1758 log_error("Too many stdout sockets passed.");
1764 } else if (sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/dev/log", 0) > 0 ||
1765 sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/run/systemd/journal/dev-log", 0) > 0) {
1767 if (s
->syslog_fd
>= 0) {
1768 log_error("Too many /dev/log sockets passed.");
1774 } else if (sd_is_socket(fd
, AF_NETLINK
, SOCK_RAW
, -1) > 0) {
1776 if (s
->audit_fd
>= 0) {
1777 log_error("Too many audit sockets passed.");
1791 r
= fdset_put(fds
, fd
);
1797 /* Try to restore streams, but don't bother if this fails */
1798 (void) server_restore_streams(s
, fds
);
1800 if (fdset_size(fds
) > 0) {
1801 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds
));
1802 fds
= fdset_free(fds
);
1805 no_sockets
= s
->native_fd
< 0 && s
->stdout_fd
< 0 && s
->syslog_fd
< 0 && s
->audit_fd
< 0;
1807 /* always open stdout, syslog, native, and kmsg sockets */
1809 /* systemd-journald.socket: /run/systemd/journal/stdout */
1810 r
= server_open_stdout_socket(s
);
1814 /* systemd-journald-dev-log.socket: /run/systemd/journal/dev-log */
1815 r
= server_open_syslog_socket(s
);
1819 /* systemd-journald.socket: /run/systemd/journal/socket */
1820 r
= server_open_native_socket(s
);
1825 r
= server_open_dev_kmsg(s
);
1829 /* Unless we got *some* sockets and not audit, open audit socket */
1830 if (s
->audit_fd
>= 0 || no_sockets
) {
1831 r
= server_open_audit(s
);
1836 r
= server_open_kernel_seqnum(s
);
1840 r
= server_open_hostname(s
);
1844 r
= setup_signals(s
);
1848 s
->udev
= udev_new();
1852 s
->rate_limit
= journal_rate_limit_new(s
->rate_limit_interval
, s
->rate_limit_burst
);
1856 r
= cg_get_root_path(&s
->cgroup_root
);
1860 server_cache_hostname(s
);
1861 server_cache_boot_id(s
);
1862 server_cache_machine_id(s
);
1864 s
->runtime_storage
.name
= "Runtime journal";
1865 s
->system_storage
.name
= "System journal";
1867 s
->runtime_storage
.path
= strjoin("/run/log/journal/", SERVER_MACHINE_ID(s
));
1868 s
->system_storage
.path
= strjoin("/var/log/journal/", SERVER_MACHINE_ID(s
));
1869 if (!s
->runtime_storage
.path
|| !s
->system_storage
.path
)
1872 (void) server_connect_notify(s
);
1874 (void) client_context_acquire_default(s
);
1876 return system_journal_open(s
, false);
1879 void server_maybe_append_tags(Server
*s
) {
1885 n
= now(CLOCK_REALTIME
);
1887 if (s
->system_journal
)
1888 journal_file_maybe_append_tag(s
->system_journal
, n
);
1890 ORDERED_HASHMAP_FOREACH(f
, s
->user_journals
, i
)
1891 journal_file_maybe_append_tag(f
, n
);
1895 void server_done(Server
*s
) {
1898 set_free_with_destructor(s
->deferred_closes
, journal_file_close
);
1900 while (s
->stdout_streams
)
1901 stdout_stream_free(s
->stdout_streams
);
1903 client_context_flush_all(s
);
1905 if (s
->system_journal
)
1906 (void) journal_file_close(s
->system_journal
);
1908 if (s
->runtime_journal
)
1909 (void) journal_file_close(s
->runtime_journal
);
1911 ordered_hashmap_free_with_destructor(s
->user_journals
, journal_file_close
);
1913 sd_event_source_unref(s
->syslog_event_source
);
1914 sd_event_source_unref(s
->native_event_source
);
1915 sd_event_source_unref(s
->stdout_event_source
);
1916 sd_event_source_unref(s
->dev_kmsg_event_source
);
1917 sd_event_source_unref(s
->audit_event_source
);
1918 sd_event_source_unref(s
->sync_event_source
);
1919 sd_event_source_unref(s
->sigusr1_event_source
);
1920 sd_event_source_unref(s
->sigusr2_event_source
);
1921 sd_event_source_unref(s
->sigterm_event_source
);
1922 sd_event_source_unref(s
->sigint_event_source
);
1923 sd_event_source_unref(s
->sigrtmin1_event_source
);
1924 sd_event_source_unref(s
->hostname_event_source
);
1925 sd_event_source_unref(s
->notify_event_source
);
1926 sd_event_source_unref(s
->watchdog_event_source
);
1927 sd_event_unref(s
->event
);
1929 safe_close(s
->syslog_fd
);
1930 safe_close(s
->native_fd
);
1931 safe_close(s
->stdout_fd
);
1932 safe_close(s
->dev_kmsg_fd
);
1933 safe_close(s
->audit_fd
);
1934 safe_close(s
->hostname_fd
);
1935 safe_close(s
->notify_fd
);
1938 journal_rate_limit_free(s
->rate_limit
);
1940 if (s
->kernel_seqnum
)
1941 munmap(s
->kernel_seqnum
, sizeof(uint64_t));
1945 free(s
->cgroup_root
);
1946 free(s
->hostname_field
);
1947 free(s
->runtime_storage
.path
);
1948 free(s
->system_storage
.path
);
1951 mmap_cache_unref(s
->mmap
);
1953 udev_unref(s
->udev
);
1956 static const char* const storage_table
[_STORAGE_MAX
] = {
1957 [STORAGE_AUTO
] = "auto",
1958 [STORAGE_VOLATILE
] = "volatile",
1959 [STORAGE_PERSISTENT
] = "persistent",
1960 [STORAGE_NONE
] = "none"
1963 DEFINE_STRING_TABLE_LOOKUP(storage
, Storage
);
1964 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage
, storage
, Storage
, "Failed to parse storage setting");
1966 static const char* const split_mode_table
[_SPLIT_MAX
] = {
1967 [SPLIT_LOGIN
] = "login",
1968 [SPLIT_UID
] = "uid",
1969 [SPLIT_NONE
] = "none",
1972 DEFINE_STRING_TABLE_LOOKUP(split_mode
, SplitMode
);
1973 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode
, split_mode
, SplitMode
, "Failed to parse split mode setting");
1975 int config_parse_line_max(
1977 const char *filename
,
1979 const char *section
,
1980 unsigned section_line
,
1995 if (isempty(rvalue
))
1996 /* Empty assignment means default */
1997 *sz
= DEFAULT_LINE_MAX
;
2001 r
= parse_size(rvalue
, 1024, &v
);
2003 log_syntax(unit
, LOG_ERR
, filename
, line
, r
, "Failed to parse LineMax= value, ignoring: %s", rvalue
);
2008 /* Why specify 79 here as minimum line length? Simply, because the most common traditional
2009 * terminal size is 80ch, and it might make sense to break one character before the natural
2010 * line break would occur on that. */
2011 log_syntax(unit
, LOG_WARNING
, filename
, line
, 0, "LineMax= too small, clamping to 79: %s", rvalue
);
2013 } else if (v
> (uint64_t) (SSIZE_MAX
-1)) {
2014 /* So, why specify SSIZE_MAX-1 here? Because that's one below the largest size value read()
2015 * can return, and we need one extra byte for the trailing NUL byte. Of course IRL such large
2016 * memory allocations will fail anyway, hence this limit is mostly theoretical anyway, as we'll
2017 * fail much earlier anyway. */
2018 log_syntax(unit
, LOG_WARNING
, filename
, line
, 0, "LineMax= too large, clamping to %" PRIu64
": %s", (uint64_t) (SSIZE_MAX
-1), rvalue
);
2027 int config_parse_compress(const char* unit
,
2028 const char *filename
,
2030 const char *section
,
2031 unsigned section_line
,
2037 JournalCompressOptions
* compress
= data
;
2040 if (streq(rvalue
, "1")) {
2041 log_syntax(unit
, LOG_WARNING
, filename
, line
, 0,
2042 "Compress= ambiguously specified as 1, enabling compression with default threshold");
2043 compress
->enabled
= true;
2044 } else if (streq(rvalue
, "0")) {
2045 log_syntax(unit
, LOG_WARNING
, filename
, line
, 0,
2046 "Compress= ambiguously specified as 0, disabling compression");
2047 compress
->enabled
= false;
2048 } else if ((r
= parse_boolean(rvalue
)) >= 0)
2049 compress
->enabled
= r
;
2050 else if (parse_size(rvalue
, 1024, &compress
->threshold_bytes
) == 0)
2051 compress
->enabled
= true;
2052 else if (isempty(rvalue
)) {
2053 compress
->enabled
= true;
2054 compress
->threshold_bytes
= (uint64_t) -1;
2056 log_syntax(unit
, LOG_ERR
, filename
, line
, r
, "Failed to parse Compress= value, ignoring: %s", rvalue
);