2 This file is part of systemd.
4 Copyright 2011 Lennart Poettering
6 systemd is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public License
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
21 #include <selinux/selinux.h>
23 #include <sys/ioctl.h>
25 #include <sys/signalfd.h>
26 #include <sys/statvfs.h>
27 #include <linux/sockios.h>
30 #include "sd-daemon.h"
31 #include "sd-journal.h"
32 #include "sd-messages.h"
35 #include "alloc-util.h"
36 #include "audit-util.h"
37 #include "cgroup-util.h"
38 #include "conf-parser.h"
39 #include "dirent-util.h"
40 #include "extract-word.h"
43 #include "format-util.h"
46 #include "hostname-util.h"
47 #include "id128-util.h"
49 #include "journal-authenticate.h"
50 #include "journal-file.h"
51 #include "journal-internal.h"
52 #include "journal-vacuum.h"
53 #include "journald-audit.h"
54 #include "journald-context.h"
55 #include "journald-kmsg.h"
56 #include "journald-native.h"
57 #include "journald-rate-limit.h"
58 #include "journald-server.h"
59 #include "journald-stream.h"
60 #include "journald-syslog.h"
64 #include "parse-util.h"
65 #include "proc-cmdline.h"
66 #include "process-util.h"
68 #include "selinux-util.h"
69 #include "signal-util.h"
70 #include "socket-util.h"
71 #include "stdio-util.h"
72 #include "string-table.h"
73 #include "string-util.h"
74 #include "syslog-util.h"
75 #include "user-util.h"
77 #define USER_JOURNALS_MAX 1024
79 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
80 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
81 #define DEFAULT_RATE_LIMIT_BURST 1000
82 #define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
84 #define RECHECK_SPACE_USEC (30*USEC_PER_SEC)
86 #define NOTIFY_SNDBUF_SIZE (8*1024*1024)
88 /* The period to insert between posting changes for coalescing */
89 #define POST_CHANGE_TIMER_INTERVAL_USEC (250*USEC_PER_MSEC)
91 /* Pick a good default that is likely to fit into AF_UNIX and AF_INET SOCK_DGRAM datagrams, and even leaves some room
92 * for a bit of additional metadata. */
93 #define DEFAULT_LINE_MAX (48*1024)
95 static int determine_path_usage(Server
*s
, const char *path
, uint64_t *ret_used
, uint64_t *ret_free
) {
96 _cleanup_closedir_
DIR *d
= NULL
;
105 return log_full_errno(errno
== ENOENT
? LOG_DEBUG
: LOG_ERR
,
106 errno
, "Failed to open %s: %m", path
);
108 if (fstatvfs(dirfd(d
), &ss
) < 0)
109 return log_error_errno(errno
, "Failed to fstatvfs(%s): %m", path
);
111 *ret_free
= ss
.f_bsize
* ss
.f_bavail
;
113 FOREACH_DIRENT_ALL(de
, d
, break) {
116 if (!endswith(de
->d_name
, ".journal") &&
117 !endswith(de
->d_name
, ".journal~"))
120 if (fstatat(dirfd(d
), de
->d_name
, &st
, AT_SYMLINK_NOFOLLOW
) < 0) {
121 log_debug_errno(errno
, "Failed to stat %s/%s, ignoring: %m", path
, de
->d_name
);
125 if (!S_ISREG(st
.st_mode
))
128 *ret_used
+= (uint64_t) st
.st_blocks
* 512UL;
134 static void cache_space_invalidate(JournalStorageSpace
*space
) {
135 memset(space
, 0, sizeof(*space
));
138 static int cache_space_refresh(Server
*s
, JournalStorage
*storage
) {
139 JournalStorageSpace
*space
;
140 JournalMetrics
*metrics
;
141 uint64_t vfs_used
, vfs_avail
, avail
;
147 metrics
= &storage
->metrics
;
148 space
= &storage
->space
;
150 ts
= now(CLOCK_MONOTONIC
);
152 if (space
->timestamp
!= 0 && space
->timestamp
+ RECHECK_SPACE_USEC
> ts
)
155 r
= determine_path_usage(s
, storage
->path
, &vfs_used
, &vfs_avail
);
159 space
->vfs_used
= vfs_used
;
160 space
->vfs_available
= vfs_avail
;
162 avail
= LESS_BY(vfs_avail
, metrics
->keep_free
);
164 space
->limit
= MIN(MAX(vfs_used
+ avail
, metrics
->min_use
), metrics
->max_use
);
165 space
->available
= LESS_BY(space
->limit
, vfs_used
);
166 space
->timestamp
= ts
;
170 static void patch_min_use(JournalStorage
*storage
) {
173 /* Let's bump the min_use limit to the current usage on disk. We do
174 * this when starting up and first opening the journal files. This way
175 * sudden spikes in disk usage will not cause journald to vacuum files
176 * without bounds. Note that this means that only a restart of journald
177 * will make it reset this value. */
179 storage
->metrics
.min_use
= MAX(storage
->metrics
.min_use
, storage
->space
.vfs_used
);
183 static int determine_space(Server
*s
, uint64_t *available
, uint64_t *limit
) {
189 js
= s
->system_journal
? &s
->system_storage
: &s
->runtime_storage
;
191 r
= cache_space_refresh(s
, js
);
194 *available
= js
->space
.available
;
196 *limit
= js
->space
.limit
;
201 void server_space_usage_message(Server
*s
, JournalStorage
*storage
) {
202 char fb1
[FORMAT_BYTES_MAX
], fb2
[FORMAT_BYTES_MAX
], fb3
[FORMAT_BYTES_MAX
],
203 fb4
[FORMAT_BYTES_MAX
], fb5
[FORMAT_BYTES_MAX
], fb6
[FORMAT_BYTES_MAX
];
204 JournalMetrics
*metrics
;
209 storage
= s
->system_journal
? &s
->system_storage
: &s
->runtime_storage
;
211 if (cache_space_refresh(s
, storage
) < 0)
214 metrics
= &storage
->metrics
;
215 format_bytes(fb1
, sizeof(fb1
), storage
->space
.vfs_used
);
216 format_bytes(fb2
, sizeof(fb2
), metrics
->max_use
);
217 format_bytes(fb3
, sizeof(fb3
), metrics
->keep_free
);
218 format_bytes(fb4
, sizeof(fb4
), storage
->space
.vfs_available
);
219 format_bytes(fb5
, sizeof(fb5
), storage
->space
.limit
);
220 format_bytes(fb6
, sizeof(fb6
), storage
->space
.available
);
222 server_driver_message(s
, "MESSAGE_ID=" SD_MESSAGE_JOURNAL_USAGE_STR
,
223 LOG_MESSAGE("%s (%s) is %s, max %s, %s free.",
224 storage
->name
, storage
->path
, fb1
, fb5
, fb6
),
225 "JOURNAL_NAME=%s", storage
->name
,
226 "JOURNAL_PATH=%s", storage
->path
,
227 "CURRENT_USE=%"PRIu64
, storage
->space
.vfs_used
,
228 "CURRENT_USE_PRETTY=%s", fb1
,
229 "MAX_USE=%"PRIu64
, metrics
->max_use
,
230 "MAX_USE_PRETTY=%s", fb2
,
231 "DISK_KEEP_FREE=%"PRIu64
, metrics
->keep_free
,
232 "DISK_KEEP_FREE_PRETTY=%s", fb3
,
233 "DISK_AVAILABLE=%"PRIu64
, storage
->space
.vfs_available
,
234 "DISK_AVAILABLE_PRETTY=%s", fb4
,
235 "LIMIT=%"PRIu64
, storage
->space
.limit
,
236 "LIMIT_PRETTY=%s", fb5
,
237 "AVAILABLE=%"PRIu64
, storage
->space
.available
,
238 "AVAILABLE_PRETTY=%s", fb6
,
242 static void server_add_acls(JournalFile
*f
, uid_t uid
) {
249 if (uid
<= SYSTEM_UID_MAX
)
252 r
= add_acls_for_user(f
->fd
, uid
);
254 log_warning_errno(r
, "Failed to set ACL on %s, ignoring: %m", f
->path
);
258 static int open_journal(
264 JournalMetrics
*metrics
,
274 r
= journal_file_open_reliably(fname
, flags
, 0640, s
->compress
, seal
, metrics
, s
->mmap
, s
->deferred_closes
, NULL
, &f
);
276 r
= journal_file_open(-1, fname
, flags
, 0640, s
->compress
, seal
, metrics
, s
->mmap
, s
->deferred_closes
, NULL
, &f
);
280 r
= journal_file_enable_post_change_timer(f
, s
->event
, POST_CHANGE_TIMER_INTERVAL_USEC
);
282 (void) journal_file_close(f
);
290 static bool flushed_flag_is_set(void) {
291 return access("/run/systemd/journal/flushed", F_OK
) >= 0;
294 static int system_journal_open(Server
*s
, bool flush_requested
) {
298 if (!s
->system_journal
&&
299 IN_SET(s
->storage
, STORAGE_PERSISTENT
, STORAGE_AUTO
) &&
300 (flush_requested
|| flushed_flag_is_set())) {
302 /* If in auto mode: first try to create the machine
303 * path, but not the prefix.
305 * If in persistent mode: create /var/log/journal and
306 * the machine path */
308 if (s
->storage
== STORAGE_PERSISTENT
)
309 (void) mkdir_p("/var/log/journal/", 0755);
311 (void) mkdir(s
->system_storage
.path
, 0755);
313 fn
= strjoina(s
->system_storage
.path
, "/system.journal");
314 r
= open_journal(s
, true, fn
, O_RDWR
|O_CREAT
, s
->seal
, &s
->system_storage
.metrics
, &s
->system_journal
);
316 server_add_acls(s
->system_journal
, 0);
317 (void) cache_space_refresh(s
, &s
->system_storage
);
318 patch_min_use(&s
->system_storage
);
320 if (!IN_SET(r
, -ENOENT
, -EROFS
))
321 log_warning_errno(r
, "Failed to open system journal: %m");
326 /* If the runtime journal is open, and we're post-flush, we're
327 * recovering from a failed system journal rotate (ENOSPC)
328 * for which the runtime journal was reopened.
330 * Perform an implicit flush to var, leaving the runtime
331 * journal closed, now that the system journal is back.
333 if (!flush_requested
)
334 (void) server_flush_to_var(s
, true);
337 if (!s
->runtime_journal
&&
338 (s
->storage
!= STORAGE_NONE
)) {
340 fn
= strjoina(s
->runtime_storage
.path
, "/system.journal");
342 if (s
->system_journal
) {
344 /* Try to open the runtime journal, but only
345 * if it already exists, so that we can flush
346 * it into the system journal */
348 r
= open_journal(s
, false, fn
, O_RDWR
, false, &s
->runtime_storage
.metrics
, &s
->runtime_journal
);
351 log_warning_errno(r
, "Failed to open runtime journal: %m");
358 /* OK, we really need the runtime journal, so create
359 * it if necessary. */
361 (void) mkdir("/run/log", 0755);
362 (void) mkdir("/run/log/journal", 0755);
363 (void) mkdir_parents(fn
, 0750);
365 r
= open_journal(s
, true, fn
, O_RDWR
|O_CREAT
, false, &s
->runtime_storage
.metrics
, &s
->runtime_journal
);
367 return log_error_errno(r
, "Failed to open runtime journal: %m");
370 if (s
->runtime_journal
) {
371 server_add_acls(s
->runtime_journal
, 0);
372 (void) cache_space_refresh(s
, &s
->runtime_storage
);
373 patch_min_use(&s
->runtime_storage
);
380 static JournalFile
* find_journal(Server
*s
, uid_t uid
) {
381 _cleanup_free_
char *p
= NULL
;
388 /* A rotate that fails to create the new journal (ENOSPC) leaves the
389 * rotated journal as NULL. Unless we revisit opening, even after
390 * space is made available we'll continue to return NULL indefinitely.
392 * system_journal_open() is a noop if the journals are already open, so
393 * we can just call it here to recover from failed rotates (or anything
394 * else that's left the journals as NULL).
396 * Fixes https://github.com/systemd/systemd/issues/3968 */
397 (void) system_journal_open(s
, false);
399 /* We split up user logs only on /var, not on /run. If the
400 * runtime file is open, we write to it exclusively, in order
401 * to guarantee proper order as soon as we flush /run to
402 * /var and close the runtime file. */
404 if (s
->runtime_journal
)
405 return s
->runtime_journal
;
407 if (uid
<= SYSTEM_UID_MAX
|| uid_is_dynamic(uid
))
408 return s
->system_journal
;
410 r
= sd_id128_get_machine(&machine
);
412 return s
->system_journal
;
414 f
= ordered_hashmap_get(s
->user_journals
, UID_TO_PTR(uid
));
418 if (asprintf(&p
, "/var/log/journal/" SD_ID128_FORMAT_STR
"/user-"UID_FMT
".journal",
419 SD_ID128_FORMAT_VAL(machine
), uid
) < 0)
420 return s
->system_journal
;
422 while (ordered_hashmap_size(s
->user_journals
) >= USER_JOURNALS_MAX
) {
423 /* Too many open? Then let's close one */
424 f
= ordered_hashmap_steal_first(s
->user_journals
);
426 (void) journal_file_close(f
);
429 r
= open_journal(s
, true, p
, O_RDWR
|O_CREAT
, s
->seal
, &s
->system_storage
.metrics
, &f
);
431 return s
->system_journal
;
433 server_add_acls(f
, uid
);
435 r
= ordered_hashmap_put(s
->user_journals
, UID_TO_PTR(uid
), f
);
437 (void) journal_file_close(f
);
438 return s
->system_journal
;
444 static int do_rotate(
457 r
= journal_file_rotate(f
, s
->compress
, seal
, s
->deferred_closes
);
460 log_error_errno(r
, "Failed to rotate %s: %m", (*f
)->path
);
462 log_error_errno(r
, "Failed to create new %s journal: %m", name
);
464 server_add_acls(*f
, uid
);
469 void server_rotate(Server
*s
) {
475 log_debug("Rotating...");
477 (void) do_rotate(s
, &s
->runtime_journal
, "runtime", false, 0);
478 (void) do_rotate(s
, &s
->system_journal
, "system", s
->seal
, 0);
480 ORDERED_HASHMAP_FOREACH_KEY(f
, k
, s
->user_journals
, i
) {
481 r
= do_rotate(s
, &f
, "user", s
->seal
, PTR_TO_UID(k
));
483 ordered_hashmap_replace(s
->user_journals
, k
, f
);
485 /* Old file has been closed and deallocated */
486 ordered_hashmap_remove(s
->user_journals
, k
);
489 /* Perform any deferred closes which aren't still offlining. */
490 SET_FOREACH(f
, s
->deferred_closes
, i
)
491 if (!journal_file_is_offlining(f
)) {
492 (void) set_remove(s
->deferred_closes
, f
);
493 (void) journal_file_close(f
);
497 void server_sync(Server
*s
) {
502 if (s
->system_journal
) {
503 r
= journal_file_set_offline(s
->system_journal
, false);
505 log_warning_errno(r
, "Failed to sync system journal, ignoring: %m");
508 ORDERED_HASHMAP_FOREACH(f
, s
->user_journals
, i
) {
509 r
= journal_file_set_offline(f
, false);
511 log_warning_errno(r
, "Failed to sync user journal, ignoring: %m");
514 if (s
->sync_event_source
) {
515 r
= sd_event_source_set_enabled(s
->sync_event_source
, SD_EVENT_OFF
);
517 log_error_errno(r
, "Failed to disable sync timer source: %m");
520 s
->sync_scheduled
= false;
523 static void do_vacuum(Server
*s
, JournalStorage
*storage
, bool verbose
) {
530 (void) cache_space_refresh(s
, storage
);
533 server_space_usage_message(s
, storage
);
535 r
= journal_directory_vacuum(storage
->path
, storage
->space
.limit
,
536 storage
->metrics
.n_max_files
, s
->max_retention_usec
,
537 &s
->oldest_file_usec
, verbose
);
538 if (r
< 0 && r
!= -ENOENT
)
539 log_warning_errno(r
, "Failed to vacuum %s, ignoring: %m", storage
->path
);
541 cache_space_invalidate(&storage
->space
);
544 int server_vacuum(Server
*s
, bool verbose
) {
547 log_debug("Vacuuming...");
549 s
->oldest_file_usec
= 0;
551 if (s
->system_journal
)
552 do_vacuum(s
, &s
->system_storage
, verbose
);
553 if (s
->runtime_journal
)
554 do_vacuum(s
, &s
->runtime_storage
, verbose
);
559 static void server_cache_machine_id(Server
*s
) {
565 r
= sd_id128_get_machine(&id
);
569 sd_id128_to_string(id
, stpcpy(s
->machine_id_field
, "_MACHINE_ID="));
572 static void server_cache_boot_id(Server
*s
) {
578 r
= sd_id128_get_boot(&id
);
582 sd_id128_to_string(id
, stpcpy(s
->boot_id_field
, "_BOOT_ID="));
585 static void server_cache_hostname(Server
*s
) {
586 _cleanup_free_
char *t
= NULL
;
591 t
= gethostname_malloc();
595 x
= strappend("_HOSTNAME=", t
);
599 free(s
->hostname_field
);
600 s
->hostname_field
= x
;
603 static bool shall_try_append_again(JournalFile
*f
, int r
) {
606 case -E2BIG
: /* Hit configured limit */
607 case -EFBIG
: /* Hit fs limit */
608 case -EDQUOT
: /* Quota limit hit */
609 case -ENOSPC
: /* Disk full */
610 log_debug("%s: Allocation limit reached, rotating.", f
->path
);
613 case -EIO
: /* I/O error of some kind (mmap) */
614 log_warning("%s: IO error, rotating.", f
->path
);
617 case -EHOSTDOWN
: /* Other machine */
618 log_info("%s: Journal file from other machine, rotating.", f
->path
);
621 case -EBUSY
: /* Unclean shutdown */
622 log_info("%s: Unclean shutdown, rotating.", f
->path
);
625 case -EPROTONOSUPPORT
: /* Unsupported feature */
626 log_info("%s: Unsupported feature, rotating.", f
->path
);
629 case -EBADMSG
: /* Corrupted */
630 case -ENODATA
: /* Truncated */
631 case -ESHUTDOWN
: /* Already archived */
632 log_warning("%s: Journal file corrupted, rotating.", f
->path
);
635 case -EIDRM
: /* Journal file has been deleted */
636 log_warning("%s: Journal file has been deleted, rotating.", f
->path
);
639 case -ETXTBSY
: /* Journal file is from the future */
640 log_warning("%s: Journal file is from the future, rotating.", f
->path
);
648 static void write_to_journal(Server
*s
, uid_t uid
, struct iovec
*iovec
, unsigned n
, int priority
) {
649 bool vacuumed
= false, rotate
= false;
650 struct dual_timestamp ts
;
658 /* Get the closest, linearized time we have for this log event from the event loop. (Note that we do not use
659 * the source time, and not even the time the event was originally seen, but instead simply the time we started
660 * processing it, as we want strictly linear ordering in what we write out.) */
661 assert_se(sd_event_now(s
->event
, CLOCK_REALTIME
, &ts
.realtime
) >= 0);
662 assert_se(sd_event_now(s
->event
, CLOCK_MONOTONIC
, &ts
.monotonic
) >= 0);
664 if (ts
.realtime
< s
->last_realtime_clock
) {
665 /* When the time jumps backwards, let's immediately rotate. Of course, this should not happen during
666 * regular operation. However, when it does happen, then we should make sure that we start fresh files
667 * to ensure that the entries in the journal files are strictly ordered by time, in order to ensure
668 * bisection works correctly. */
670 log_debug("Time jumped backwards, rotating.");
674 f
= find_journal(s
, uid
);
678 if (journal_file_rotate_suggested(f
, s
->max_file_usec
)) {
679 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f
->path
);
686 server_vacuum(s
, false);
689 f
= find_journal(s
, uid
);
694 s
->last_realtime_clock
= ts
.realtime
;
696 r
= journal_file_append_entry(f
, &ts
, iovec
, n
, &s
->seqnum
, NULL
, NULL
);
698 server_schedule_sync(s
, priority
);
702 if (vacuumed
|| !shall_try_append_again(f
, r
)) {
703 log_error_errno(r
, "Failed to write entry (%d items, %zu bytes), ignoring: %m", n
, IOVEC_TOTAL_SIZE(iovec
, n
));
708 server_vacuum(s
, false);
710 f
= find_journal(s
, uid
);
714 log_debug("Retrying write.");
715 r
= journal_file_append_entry(f
, &ts
, iovec
, n
, &s
->seqnum
, NULL
, NULL
);
717 log_error_errno(r
, "Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %m", n
, IOVEC_TOTAL_SIZE(iovec
, n
));
719 server_schedule_sync(s
, priority
);
722 #define IOVEC_ADD_NUMERIC_FIELD(iovec, n, value, type, isset, format, field) \
723 if (isset(value)) { \
725 k = newa(char, strlen(field "=") + DECIMAL_STR_MAX(type) + 1); \
726 sprintf(k, field "=" format, value); \
727 iovec[n++] = IOVEC_MAKE_STRING(k); \
730 #define IOVEC_ADD_STRING_FIELD(iovec, n, value, field) \
731 if (!isempty(value)) { \
733 k = strjoina(field "=", value); \
734 iovec[n++] = IOVEC_MAKE_STRING(k); \
737 #define IOVEC_ADD_ID128_FIELD(iovec, n, value, field) \
738 if (!sd_id128_is_null(value)) { \
740 k = newa(char, strlen(field "=") + SD_ID128_STRING_MAX); \
741 sd_id128_to_string(value, stpcpy(k, field "=")); \
742 iovec[n++] = IOVEC_MAKE_STRING(k); \
745 #define IOVEC_ADD_SIZED_FIELD(iovec, n, value, value_size, field) \
746 if (value_size > 0) { \
748 k = newa(char, strlen(field "=") + value_size + 1); \
749 *((char*) mempcpy(stpcpy(k, field "="), value, value_size)) = 0; \
750 iovec[n++] = IOVEC_MAKE_STRING(k); \
753 static void dispatch_message_real(
755 struct iovec
*iovec
, unsigned n
, unsigned m
,
756 const ClientContext
*c
,
757 const struct timeval
*tv
,
761 char source_time
[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t
)];
768 assert(n
+ N_IOVEC_META_FIELDS
+ (pid_is_valid(object_pid
) ? N_IOVEC_OBJECT_FIELDS
: 0) <= m
);
771 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->pid
, pid_t
, pid_is_valid
, PID_FMT
, "_PID");
772 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->uid
, uid_t
, uid_is_valid
, UID_FMT
, "_UID");
773 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->gid
, gid_t
, gid_is_valid
, GID_FMT
, "_GID");
775 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->comm
, "_COMM");
776 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->exe
, "_EXE");
777 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->cmdline
, "_CMDLINE");
778 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->capeff
, "_CAP_EFFECTIVE");
780 IOVEC_ADD_SIZED_FIELD(iovec
, n
, c
->label
, c
->label_size
, "_SELINUX_CONTEXT");
782 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->auditid
, uint32_t, audit_session_is_valid
, "%" PRIu32
, "_AUDIT_SESSION");
783 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->loginuid
, uid_t
, uid_is_valid
, UID_FMT
, "_AUDIT_LOGINUID");
785 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->cgroup
, "_SYSTEMD_CGROUP");
786 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->session
, "_SYSTEMD_SESSION");
787 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->owner_uid
, uid_t
, uid_is_valid
, UID_FMT
, "_SYSTEMD_OWNER_UID");
788 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->unit
, "_SYSTEMD_UNIT");
789 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->user_unit
, "_SYSTEMD_USER_UNIT");
790 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->slice
, "_SYSTEMD_SLICE");
791 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->user_slice
, "_SYSTEMD_USER_SLICE");
793 IOVEC_ADD_ID128_FIELD(iovec
, n
, c
->invocation_id
, "_SYSTEMD_INVOCATION_ID");
798 if (pid_is_valid(object_pid
) && client_context_get(s
, object_pid
, NULL
, NULL
, 0, NULL
, &o
) >= 0) {
800 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->pid
, pid_t
, pid_is_valid
, PID_FMT
, "OBJECT_PID");
801 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->uid
, uid_t
, uid_is_valid
, UID_FMT
, "OBJECT_UID");
802 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->gid
, gid_t
, gid_is_valid
, GID_FMT
, "OBJECT_GID");
804 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->comm
, "OBJECT_COMM");
805 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->exe
, "OBJECT_EXE");
806 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->cmdline
, "OBJECT_CMDLINE");
807 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->capeff
, "OBJECT_CAP_EFFECTIVE");
809 IOVEC_ADD_SIZED_FIELD(iovec
, n
, o
->label
, o
->label_size
, "OBJECT_SELINUX_CONTEXT");
811 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->auditid
, uint32_t, audit_session_is_valid
, "%" PRIu32
, "OBJECT_AUDIT_SESSION");
812 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->loginuid
, uid_t
, uid_is_valid
, UID_FMT
, "OBJECT_AUDIT_LOGINUID");
814 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->cgroup
, "OBJECT_SYSTEMD_CGROUP");
815 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->session
, "OBJECT_SYSTEMD_SESSION");
816 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->owner_uid
, uid_t
, uid_is_valid
, UID_FMT
, "OBJECT_SYSTEMD_OWNER_UID");
817 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->unit
, "OBJECT_SYSTEMD_UNIT");
818 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->user_unit
, "OBJECT_SYSTEMD_USER_UNIT");
819 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->slice
, "OBJECT_SYSTEMD_SLICE");
820 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->user_slice
, "OBJECT_SYSTEMD_USER_SLICE");
822 IOVEC_ADD_ID128_FIELD(iovec
, n
, o
->invocation_id
, "OBJECT_SYSTEMD_INVOCATION_ID=");
828 sprintf(source_time
, "_SOURCE_REALTIME_TIMESTAMP=" USEC_FMT
, timeval_load(tv
));
829 iovec
[n
++] = IOVEC_MAKE_STRING(source_time
);
832 /* Note that strictly speaking storing the boot id here is
833 * redundant since the entry includes this in-line
834 * anyway. However, we need this indexed, too. */
835 if (!isempty(s
->boot_id_field
))
836 iovec
[n
++] = IOVEC_MAKE_STRING(s
->boot_id_field
);
838 if (!isempty(s
->machine_id_field
))
839 iovec
[n
++] = IOVEC_MAKE_STRING(s
->machine_id_field
);
841 if (!isempty(s
->hostname_field
))
842 iovec
[n
++] = IOVEC_MAKE_STRING(s
->hostname_field
);
846 if (s
->split_mode
== SPLIT_UID
&& c
&& uid_is_valid(c
->uid
))
847 /* Split up strictly by (non-root) UID */
848 journal_uid
= c
->uid
;
849 else if (s
->split_mode
== SPLIT_LOGIN
&& c
&& c
->uid
> 0 && uid_is_valid(c
->owner_uid
))
850 /* Split up by login UIDs. We do this only if the
851 * realuid is not root, in order not to accidentally
852 * leak privileged information to the user that is
853 * logged by a privileged process that is part of an
854 * unprivileged session. */
855 journal_uid
= c
->owner_uid
;
859 write_to_journal(s
, journal_uid
, iovec
, n
, priority
);
862 void server_driver_message(Server
*s
, const char *message_id
, const char *format
, ...) {
864 struct iovec iovec
[N_IOVEC_META_FIELDS
+ 5 + N_IOVEC_PAYLOAD_FIELDS
];
872 assert_cc(3 == LOG_FAC(LOG_DAEMON
));
873 iovec
[n
++] = IOVEC_MAKE_STRING("SYSLOG_FACILITY=3");
874 iovec
[n
++] = IOVEC_MAKE_STRING("SYSLOG_IDENTIFIER=systemd-journald");
876 iovec
[n
++] = IOVEC_MAKE_STRING("_TRANSPORT=driver");
877 assert_cc(6 == LOG_INFO
);
878 iovec
[n
++] = IOVEC_MAKE_STRING("PRIORITY=6");
881 iovec
[n
++] = IOVEC_MAKE_STRING(message_id
);
884 va_start(ap
, format
);
885 r
= log_format_iovec(iovec
, ELEMENTSOF(iovec
), &n
, false, 0, format
, ap
);
886 /* Error handling below */
890 dispatch_message_real(s
, iovec
, n
, ELEMENTSOF(iovec
), s
->my_context
, NULL
, LOG_INFO
, 0);
893 free(iovec
[m
++].iov_base
);
896 /* We failed to format the message. Emit a warning instead. */
899 xsprintf(buf
, "MESSAGE=Entry printing failed: %s", strerror(-r
));
902 iovec
[n
++] = IOVEC_MAKE_STRING("PRIORITY=4");
903 iovec
[n
++] = IOVEC_MAKE_STRING(buf
);
904 dispatch_message_real(s
, iovec
, n
, ELEMENTSOF(iovec
), s
->my_context
, NULL
, LOG_INFO
, 0);
908 void server_dispatch_message(
910 struct iovec
*iovec
, unsigned n
, unsigned m
,
912 const struct timeval
*tv
,
916 uint64_t available
= 0;
920 assert(iovec
|| n
== 0);
925 if (LOG_PRI(priority
) > s
->max_level_store
)
928 /* Stop early in case the information will not be stored
930 if (s
->storage
== STORAGE_NONE
)
934 (void) determine_space(s
, &available
, NULL
);
936 rl
= journal_rate_limit_test(s
->rate_limit
, c
->unit
, priority
& LOG_PRIMASK
, available
);
940 /* Write a suppression message if we suppressed something */
942 server_driver_message(s
, "MESSAGE_ID=" SD_MESSAGE_JOURNAL_DROPPED_STR
,
943 LOG_MESSAGE("Suppressed %u messages from %s", rl
- 1, c
->unit
),
947 dispatch_message_real(s
, iovec
, n
, m
, c
, tv
, priority
, object_pid
);
950 int server_flush_to_var(Server
*s
, bool require_flag_file
) {
952 sd_journal
*j
= NULL
;
953 char ts
[FORMAT_TIMESPAN_MAX
];
960 if (!IN_SET(s
->storage
, STORAGE_AUTO
, STORAGE_PERSISTENT
))
963 if (!s
->runtime_journal
)
966 if (require_flag_file
&& !flushed_flag_is_set())
969 (void) system_journal_open(s
, true);
971 if (!s
->system_journal
)
974 log_debug("Flushing to /var...");
976 start
= now(CLOCK_MONOTONIC
);
978 r
= sd_id128_get_machine(&machine
);
982 r
= sd_journal_open(&j
, SD_JOURNAL_RUNTIME_ONLY
);
984 return log_error_errno(r
, "Failed to read runtime journal: %m");
986 sd_journal_set_data_threshold(j
, 0);
988 SD_JOURNAL_FOREACH(j
) {
993 assert(f
&& f
->current_offset
> 0);
997 r
= journal_file_move_to_object(f
, OBJECT_ENTRY
, f
->current_offset
, &o
);
999 log_error_errno(r
, "Can't read entry: %m");
1003 r
= journal_file_copy_entry(f
, s
->system_journal
, o
, f
->current_offset
, NULL
, NULL
, NULL
);
1007 if (!shall_try_append_again(s
->system_journal
, r
)) {
1008 log_error_errno(r
, "Can't write entry: %m");
1013 server_vacuum(s
, false);
1015 if (!s
->system_journal
) {
1016 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1021 log_debug("Retrying write.");
1022 r
= journal_file_copy_entry(f
, s
->system_journal
, o
, f
->current_offset
, NULL
, NULL
, NULL
);
1024 log_error_errno(r
, "Can't write entry: %m");
1032 journal_file_post_change(s
->system_journal
);
1034 s
->runtime_journal
= journal_file_close(s
->runtime_journal
);
1037 (void) rm_rf("/run/log/journal", REMOVE_ROOT
);
1039 sd_journal_close(j
);
1041 server_driver_message(s
, NULL
,
1042 LOG_MESSAGE("Time spent on flushing to /var is %s for %u entries.",
1043 format_timespan(ts
, sizeof(ts
), now(CLOCK_MONOTONIC
) - start
, 0),
1050 int server_process_datagram(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1051 Server
*s
= userdata
;
1052 struct ucred
*ucred
= NULL
;
1053 struct timeval
*tv
= NULL
;
1054 struct cmsghdr
*cmsg
;
1056 size_t label_len
= 0, m
;
1059 int *fds
= NULL
, v
= 0;
1063 struct cmsghdr cmsghdr
;
1065 /* We use NAME_MAX space for the SELinux label
1066 * here. The kernel currently enforces no
1067 * limit, but according to suggestions from
1068 * the SELinux people this will change and it
1069 * will probably be identical to NAME_MAX. For
1070 * now we use that, but this should be updated
1071 * one day when the final limit is known. */
1072 uint8_t buf
[CMSG_SPACE(sizeof(struct ucred
)) +
1073 CMSG_SPACE(sizeof(struct timeval
)) +
1074 CMSG_SPACE(sizeof(int)) + /* fd */
1075 CMSG_SPACE(NAME_MAX
)]; /* selinux label */
1078 union sockaddr_union sa
= {};
1080 struct msghdr msghdr
= {
1083 .msg_control
= &control
,
1084 .msg_controllen
= sizeof(control
),
1086 .msg_namelen
= sizeof(sa
),
1090 assert(fd
== s
->native_fd
|| fd
== s
->syslog_fd
|| fd
== s
->audit_fd
);
1092 if (revents
!= EPOLLIN
) {
1093 log_error("Got invalid event from epoll for datagram fd: %"PRIx32
, revents
);
1097 /* Try to get the right size, if we can. (Not all sockets support SIOCINQ, hence we just try, but don't rely on
1099 (void) ioctl(fd
, SIOCINQ
, &v
);
1101 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1102 m
= PAGE_ALIGN(MAX3((size_t) v
+ 1,
1104 ALIGN(sizeof(struct nlmsghdr
)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH
)) + 1);
1106 if (!GREEDY_REALLOC(s
->buffer
, s
->buffer_size
, m
))
1109 iovec
.iov_base
= s
->buffer
;
1110 iovec
.iov_len
= s
->buffer_size
- 1; /* Leave room for trailing NUL we add later */
1112 n
= recvmsg(fd
, &msghdr
, MSG_DONTWAIT
|MSG_CMSG_CLOEXEC
);
1114 if (IN_SET(errno
, EINTR
, EAGAIN
))
1117 return log_error_errno(errno
, "recvmsg() failed: %m");
1120 CMSG_FOREACH(cmsg
, &msghdr
) {
1122 if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1123 cmsg
->cmsg_type
== SCM_CREDENTIALS
&&
1124 cmsg
->cmsg_len
== CMSG_LEN(sizeof(struct ucred
)))
1125 ucred
= (struct ucred
*) CMSG_DATA(cmsg
);
1126 else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1127 cmsg
->cmsg_type
== SCM_SECURITY
) {
1128 label
= (char*) CMSG_DATA(cmsg
);
1129 label_len
= cmsg
->cmsg_len
- CMSG_LEN(0);
1130 } else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1131 cmsg
->cmsg_type
== SO_TIMESTAMP
&&
1132 cmsg
->cmsg_len
== CMSG_LEN(sizeof(struct timeval
)))
1133 tv
= (struct timeval
*) CMSG_DATA(cmsg
);
1134 else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1135 cmsg
->cmsg_type
== SCM_RIGHTS
) {
1136 fds
= (int*) CMSG_DATA(cmsg
);
1137 n_fds
= (cmsg
->cmsg_len
- CMSG_LEN(0)) / sizeof(int);
1141 /* And a trailing NUL, just in case */
1144 if (fd
== s
->syslog_fd
) {
1145 if (n
> 0 && n_fds
== 0)
1146 server_process_syslog_message(s
, strstrip(s
->buffer
), ucred
, tv
, label
, label_len
);
1148 log_warning("Got file descriptors via syslog socket. Ignoring.");
1150 } else if (fd
== s
->native_fd
) {
1151 if (n
> 0 && n_fds
== 0)
1152 server_process_native_message(s
, s
->buffer
, n
, ucred
, tv
, label
, label_len
);
1153 else if (n
== 0 && n_fds
== 1)
1154 server_process_native_file(s
, fds
[0], ucred
, tv
, label
, label_len
);
1156 log_warning("Got too many file descriptors via native socket. Ignoring.");
1159 assert(fd
== s
->audit_fd
);
1161 if (n
> 0 && n_fds
== 0)
1162 server_process_audit_message(s
, s
->buffer
, n
, ucred
, &sa
, msghdr
.msg_namelen
);
1164 log_warning("Got file descriptors via audit socket. Ignoring.");
1167 close_many(fds
, n_fds
);
1171 static int dispatch_sigusr1(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1172 Server
*s
= userdata
;
1177 log_info("Received request to flush runtime journal from PID " PID_FMT
, si
->ssi_pid
);
1179 (void) server_flush_to_var(s
, false);
1181 server_vacuum(s
, false);
1183 r
= touch("/run/systemd/journal/flushed");
1185 log_warning_errno(r
, "Failed to touch /run/systemd/journal/flushed, ignoring: %m");
1187 server_space_usage_message(s
, NULL
);
1191 static int dispatch_sigusr2(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1192 Server
*s
= userdata
;
1197 log_info("Received request to rotate journal from PID " PID_FMT
, si
->ssi_pid
);
1199 server_vacuum(s
, true);
1201 if (s
->system_journal
)
1202 patch_min_use(&s
->system_storage
);
1203 if (s
->runtime_journal
)
1204 patch_min_use(&s
->runtime_storage
);
1206 /* Let clients know when the most recent rotation happened. */
1207 r
= write_timestamp_file_atomic("/run/systemd/journal/rotated", now(CLOCK_MONOTONIC
));
1209 log_warning_errno(r
, "Failed to write /run/systemd/journal/rotated, ignoring: %m");
1214 static int dispatch_sigterm(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1215 Server
*s
= userdata
;
1219 log_received_signal(LOG_INFO
, si
);
1221 sd_event_exit(s
->event
, 0);
1225 static int dispatch_sigrtmin1(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1226 Server
*s
= userdata
;
1231 log_debug("Received request to sync from PID " PID_FMT
, si
->ssi_pid
);
1235 /* Let clients know when the most recent sync happened. */
1236 r
= write_timestamp_file_atomic("/run/systemd/journal/synced", now(CLOCK_MONOTONIC
));
1238 log_warning_errno(r
, "Failed to write /run/systemd/journal/synced, ignoring: %m");
1243 static int setup_signals(Server
*s
) {
1248 assert_se(sigprocmask_many(SIG_SETMASK
, NULL
, SIGINT
, SIGTERM
, SIGUSR1
, SIGUSR2
, SIGRTMIN
+1, -1) >= 0);
1250 r
= sd_event_add_signal(s
->event
, &s
->sigusr1_event_source
, SIGUSR1
, dispatch_sigusr1
, s
);
1254 r
= sd_event_add_signal(s
->event
, &s
->sigusr2_event_source
, SIGUSR2
, dispatch_sigusr2
, s
);
1258 r
= sd_event_add_signal(s
->event
, &s
->sigterm_event_source
, SIGTERM
, dispatch_sigterm
, s
);
1262 /* Let's process SIGTERM late, so that we flush all queued
1263 * messages to disk before we exit */
1264 r
= sd_event_source_set_priority(s
->sigterm_event_source
, SD_EVENT_PRIORITY_NORMAL
+20);
1268 /* When journald is invoked on the terminal (when debugging),
1269 * it's useful if C-c is handled equivalent to SIGTERM. */
1270 r
= sd_event_add_signal(s
->event
, &s
->sigint_event_source
, SIGINT
, dispatch_sigterm
, s
);
1274 r
= sd_event_source_set_priority(s
->sigint_event_source
, SD_EVENT_PRIORITY_NORMAL
+20);
1278 /* SIGRTMIN+1 causes an immediate sync. We process this very
1279 * late, so that everything else queued at this point is
1280 * really written to disk. Clients can watch
1281 * /run/systemd/journal/synced with inotify until its mtime
1282 * changes to see when a sync happened. */
1283 r
= sd_event_add_signal(s
->event
, &s
->sigrtmin1_event_source
, SIGRTMIN
+1, dispatch_sigrtmin1
, s
);
1287 r
= sd_event_source_set_priority(s
->sigrtmin1_event_source
, SD_EVENT_PRIORITY_NORMAL
+15);
1294 static int parse_proc_cmdline_item(const char *key
, const char *value
, void *data
) {
1300 if (proc_cmdline_key_streq(key
, "systemd.journald.forward_to_syslog")) {
1302 r
= value
? parse_boolean(value
) : true;
1304 log_warning("Failed to parse forward to syslog switch \"%s\". Ignoring.", value
);
1306 s
->forward_to_syslog
= r
;
1308 } else if (proc_cmdline_key_streq(key
, "systemd.journald.forward_to_kmsg")) {
1310 r
= value
? parse_boolean(value
) : true;
1312 log_warning("Failed to parse forward to kmsg switch \"%s\". Ignoring.", value
);
1314 s
->forward_to_kmsg
= r
;
1316 } else if (proc_cmdline_key_streq(key
, "systemd.journald.forward_to_console")) {
1318 r
= value
? parse_boolean(value
) : true;
1320 log_warning("Failed to parse forward to console switch \"%s\". Ignoring.", value
);
1322 s
->forward_to_console
= r
;
1324 } else if (proc_cmdline_key_streq(key
, "systemd.journald.forward_to_wall")) {
1326 r
= value
? parse_boolean(value
) : true;
1328 log_warning("Failed to parse forward to wall switch \"%s\". Ignoring.", value
);
1330 s
->forward_to_wall
= r
;
1332 } else if (proc_cmdline_key_streq(key
, "systemd.journald.max_level_console")) {
1334 if (proc_cmdline_value_missing(key
, value
))
1337 r
= log_level_from_string(value
);
1339 log_warning("Failed to parse max level console value \"%s\". Ignoring.", value
);
1341 s
->max_level_console
= r
;
1343 } else if (proc_cmdline_key_streq(key
, "systemd.journald.max_level_store")) {
1345 if (proc_cmdline_value_missing(key
, value
))
1348 r
= log_level_from_string(value
);
1350 log_warning("Failed to parse max level store value \"%s\". Ignoring.", value
);
1352 s
->max_level_store
= r
;
1354 } else if (proc_cmdline_key_streq(key
, "systemd.journald.max_level_syslog")) {
1356 if (proc_cmdline_value_missing(key
, value
))
1359 r
= log_level_from_string(value
);
1361 log_warning("Failed to parse max level syslog value \"%s\". Ignoring.", value
);
1363 s
->max_level_syslog
= r
;
1365 } else if (proc_cmdline_key_streq(key
, "systemd.journald.max_level_kmsg")) {
1367 if (proc_cmdline_value_missing(key
, value
))
1370 r
= log_level_from_string(value
);
1372 log_warning("Failed to parse max level kmsg value \"%s\". Ignoring.", value
);
1374 s
->max_level_kmsg
= r
;
1376 } else if (proc_cmdline_key_streq(key
, "systemd.journald.max_level_wall")) {
1378 if (proc_cmdline_value_missing(key
, value
))
1381 r
= log_level_from_string(value
);
1383 log_warning("Failed to parse max level wall value \"%s\". Ignoring.", value
);
1385 s
->max_level_wall
= r
;
1387 } else if (startswith(key
, "systemd.journald"))
1388 log_warning("Unknown journald kernel command line option \"%s\". Ignoring.", key
);
1390 /* do not warn about state here, since probably systemd already did */
1394 static int server_parse_config_file(Server
*s
) {
1397 return config_parse_many_nulstr(PKGSYSCONFDIR
"/journald.conf",
1398 CONF_PATHS_NULSTR("systemd/journald.conf.d"),
1400 config_item_perf_lookup
, journald_gperf_lookup
,
1404 static int server_dispatch_sync(sd_event_source
*es
, usec_t t
, void *userdata
) {
1405 Server
*s
= userdata
;
1413 int server_schedule_sync(Server
*s
, int priority
) {
1418 if (priority
<= LOG_CRIT
) {
1419 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1424 if (s
->sync_scheduled
)
1427 if (s
->sync_interval_usec
> 0) {
1430 r
= sd_event_now(s
->event
, CLOCK_MONOTONIC
, &when
);
1434 when
+= s
->sync_interval_usec
;
1436 if (!s
->sync_event_source
) {
1437 r
= sd_event_add_time(
1439 &s
->sync_event_source
,
1442 server_dispatch_sync
, s
);
1446 r
= sd_event_source_set_priority(s
->sync_event_source
, SD_EVENT_PRIORITY_IMPORTANT
);
1448 r
= sd_event_source_set_time(s
->sync_event_source
, when
);
1452 r
= sd_event_source_set_enabled(s
->sync_event_source
, SD_EVENT_ONESHOT
);
1457 s
->sync_scheduled
= true;
1463 static int dispatch_hostname_change(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1464 Server
*s
= userdata
;
1468 server_cache_hostname(s
);
1472 static int server_open_hostname(Server
*s
) {
1477 s
->hostname_fd
= open("/proc/sys/kernel/hostname", O_RDONLY
|O_CLOEXEC
|O_NDELAY
|O_NOCTTY
);
1478 if (s
->hostname_fd
< 0)
1479 return log_error_errno(errno
, "Failed to open /proc/sys/kernel/hostname: %m");
1481 r
= sd_event_add_io(s
->event
, &s
->hostname_event_source
, s
->hostname_fd
, 0, dispatch_hostname_change
, s
);
1483 /* kernels prior to 3.2 don't support polling this file. Ignore
1486 log_warning_errno(r
, "Failed to register hostname fd in event loop, ignoring: %m");
1487 s
->hostname_fd
= safe_close(s
->hostname_fd
);
1491 return log_error_errno(r
, "Failed to register hostname fd in event loop: %m");
1494 r
= sd_event_source_set_priority(s
->hostname_event_source
, SD_EVENT_PRIORITY_IMPORTANT
-10);
1496 return log_error_errno(r
, "Failed to adjust priority of host name event source: %m");
1501 static int dispatch_notify_event(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1502 Server
*s
= userdata
;
1506 assert(s
->notify_event_source
== es
);
1507 assert(s
->notify_fd
== fd
);
1509 /* The $NOTIFY_SOCKET is writable again, now send exactly one
1510 * message on it. Either it's the watchdog event, the initial
1511 * READY=1 event or an stdout stream event. If there's nothing
1512 * to write anymore, turn our event source off. The next time
1513 * there's something to send it will be turned on again. */
1515 if (!s
->sent_notify_ready
) {
1516 static const char p
[] =
1518 "STATUS=Processing requests...";
1521 l
= send(s
->notify_fd
, p
, strlen(p
), MSG_DONTWAIT
);
1523 if (errno
== EAGAIN
)
1526 return log_error_errno(errno
, "Failed to send READY=1 notification message: %m");
1529 s
->sent_notify_ready
= true;
1530 log_debug("Sent READY=1 notification.");
1532 } else if (s
->send_watchdog
) {
1534 static const char p
[] =
1539 l
= send(s
->notify_fd
, p
, strlen(p
), MSG_DONTWAIT
);
1541 if (errno
== EAGAIN
)
1544 return log_error_errno(errno
, "Failed to send WATCHDOG=1 notification message: %m");
1547 s
->send_watchdog
= false;
1548 log_debug("Sent WATCHDOG=1 notification.");
1550 } else if (s
->stdout_streams_notify_queue
)
1551 /* Dispatch one stream notification event */
1552 stdout_stream_send_notify(s
->stdout_streams_notify_queue
);
1554 /* Leave us enabled if there's still more to do. */
1555 if (s
->send_watchdog
|| s
->stdout_streams_notify_queue
)
1558 /* There was nothing to do anymore, let's turn ourselves off. */
1559 r
= sd_event_source_set_enabled(es
, SD_EVENT_OFF
);
1561 return log_error_errno(r
, "Failed to turn off notify event source: %m");
1566 static int dispatch_watchdog(sd_event_source
*es
, uint64_t usec
, void *userdata
) {
1567 Server
*s
= userdata
;
1572 s
->send_watchdog
= true;
1574 r
= sd_event_source_set_enabled(s
->notify_event_source
, SD_EVENT_ON
);
1576 log_warning_errno(r
, "Failed to turn on notify event source: %m");
1578 r
= sd_event_source_set_time(s
->watchdog_event_source
, usec
+ s
->watchdog_usec
/ 2);
1580 return log_error_errno(r
, "Failed to restart watchdog event source: %m");
1582 r
= sd_event_source_set_enabled(s
->watchdog_event_source
, SD_EVENT_ON
);
1584 return log_error_errno(r
, "Failed to enable watchdog event source: %m");
1589 static int server_connect_notify(Server
*s
) {
1590 union sockaddr_union sa
= {
1591 .un
.sun_family
= AF_UNIX
,
1597 assert(s
->notify_fd
< 0);
1598 assert(!s
->notify_event_source
);
1601 So here's the problem: we'd like to send notification
1602 messages to PID 1, but we cannot do that via sd_notify(),
1603 since that's synchronous, and we might end up blocking on
1604 it. Specifically: given that PID 1 might block on
1605 dbus-daemon during IPC, and dbus-daemon is logging to us,
1606 and might hence block on us, we might end up in a deadlock
1607 if we block on sending PID 1 notification messages — by
1608 generating a full blocking circle. To avoid this, let's
1609 create a non-blocking socket, and connect it to the
1610 notification socket, and then wait for POLLOUT before we
1611 send anything. This should efficiently avoid any deadlocks,
1612 as we'll never block on PID 1, hence PID 1 can safely block
1613 on dbus-daemon which can safely block on us again.
1615 Don't think that this issue is real? It is, see:
1616 https://github.com/systemd/systemd/issues/1505
1619 e
= getenv("NOTIFY_SOCKET");
1623 if (!IN_SET(e
[0], '@', '/') || e
[1] == 0) {
1624 log_error("NOTIFY_SOCKET set to an invalid value: %s", e
);
1628 if (strlen(e
) > sizeof(sa
.un
.sun_path
)) {
1629 log_error("NOTIFY_SOCKET path too long: %s", e
);
1633 s
->notify_fd
= socket(AF_UNIX
, SOCK_DGRAM
|SOCK_CLOEXEC
|SOCK_NONBLOCK
, 0);
1634 if (s
->notify_fd
< 0)
1635 return log_error_errno(errno
, "Failed to create notify socket: %m");
1637 (void) fd_inc_sndbuf(s
->notify_fd
, NOTIFY_SNDBUF_SIZE
);
1639 strncpy(sa
.un
.sun_path
, e
, sizeof(sa
.un
.sun_path
));
1640 if (sa
.un
.sun_path
[0] == '@')
1641 sa
.un
.sun_path
[0] = 0;
1643 r
= connect(s
->notify_fd
, &sa
.sa
, SOCKADDR_UN_LEN(sa
.un
));
1645 return log_error_errno(errno
, "Failed to connect to notify socket: %m");
1647 r
= sd_event_add_io(s
->event
, &s
->notify_event_source
, s
->notify_fd
, EPOLLOUT
, dispatch_notify_event
, s
);
1649 return log_error_errno(r
, "Failed to watch notification socket: %m");
1651 if (sd_watchdog_enabled(false, &s
->watchdog_usec
) > 0) {
1652 s
->send_watchdog
= true;
1654 r
= sd_event_add_time(s
->event
, &s
->watchdog_event_source
, CLOCK_MONOTONIC
, now(CLOCK_MONOTONIC
) + s
->watchdog_usec
/2, s
->watchdog_usec
/4, dispatch_watchdog
, s
);
1656 return log_error_errno(r
, "Failed to add watchdog time event: %m");
1659 /* This should fire pretty soon, which we'll use to send the
1665 int server_init(Server
*s
) {
1666 _cleanup_fdset_free_ FDSet
*fds
= NULL
;
1673 s
->syslog_fd
= s
->native_fd
= s
->stdout_fd
= s
->dev_kmsg_fd
= s
->audit_fd
= s
->hostname_fd
= s
->notify_fd
= -1;
1676 s
->read_kmsg
= true;
1678 s
->watchdog_usec
= USEC_INFINITY
;
1680 s
->sync_interval_usec
= DEFAULT_SYNC_INTERVAL_USEC
;
1681 s
->sync_scheduled
= false;
1683 s
->rate_limit_interval
= DEFAULT_RATE_LIMIT_INTERVAL
;
1684 s
->rate_limit_burst
= DEFAULT_RATE_LIMIT_BURST
;
1686 s
->forward_to_wall
= true;
1688 s
->max_file_usec
= DEFAULT_MAX_FILE_USEC
;
1690 s
->max_level_store
= LOG_DEBUG
;
1691 s
->max_level_syslog
= LOG_DEBUG
;
1692 s
->max_level_kmsg
= LOG_NOTICE
;
1693 s
->max_level_console
= LOG_INFO
;
1694 s
->max_level_wall
= LOG_EMERG
;
1696 s
->line_max
= DEFAULT_LINE_MAX
;
1698 journal_reset_metrics(&s
->system_storage
.metrics
);
1699 journal_reset_metrics(&s
->runtime_storage
.metrics
);
1701 server_parse_config_file(s
);
1703 r
= proc_cmdline_parse(parse_proc_cmdline_item
, s
, PROC_CMDLINE_STRIP_RD_PREFIX
);
1705 log_warning_errno(r
, "Failed to parse kernel command line, ignoring: %m");
1707 if (!!s
->rate_limit_interval
^ !!s
->rate_limit_burst
) {
1708 log_debug("Setting both rate limit interval and burst from "USEC_FMT
",%u to 0,0",
1709 s
->rate_limit_interval
, s
->rate_limit_burst
);
1710 s
->rate_limit_interval
= s
->rate_limit_burst
= 0;
1713 (void) mkdir_p("/run/systemd/journal", 0755);
1715 s
->user_journals
= ordered_hashmap_new(NULL
);
1716 if (!s
->user_journals
)
1719 s
->mmap
= mmap_cache_new();
1723 s
->deferred_closes
= set_new(NULL
);
1724 if (!s
->deferred_closes
)
1727 r
= sd_event_default(&s
->event
);
1729 return log_error_errno(r
, "Failed to create event loop: %m");
1731 n
= sd_listen_fds(true);
1733 return log_error_errno(n
, "Failed to read listening file descriptors from environment: %m");
1735 for (fd
= SD_LISTEN_FDS_START
; fd
< SD_LISTEN_FDS_START
+ n
; fd
++) {
1737 if (sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/run/systemd/journal/socket", 0) > 0) {
1739 if (s
->native_fd
>= 0) {
1740 log_error("Too many native sockets passed.");
1746 } else if (sd_is_socket_unix(fd
, SOCK_STREAM
, 1, "/run/systemd/journal/stdout", 0) > 0) {
1748 if (s
->stdout_fd
>= 0) {
1749 log_error("Too many stdout sockets passed.");
1755 } else if (sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/dev/log", 0) > 0 ||
1756 sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/run/systemd/journal/dev-log", 0) > 0) {
1758 if (s
->syslog_fd
>= 0) {
1759 log_error("Too many /dev/log sockets passed.");
1765 } else if (sd_is_socket(fd
, AF_NETLINK
, SOCK_RAW
, -1) > 0) {
1767 if (s
->audit_fd
>= 0) {
1768 log_error("Too many audit sockets passed.");
1782 r
= fdset_put(fds
, fd
);
1788 /* Try to restore streams, but don't bother if this fails */
1789 (void) server_restore_streams(s
, fds
);
1791 if (fdset_size(fds
) > 0) {
1792 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds
));
1793 fds
= fdset_free(fds
);
1796 no_sockets
= s
->native_fd
< 0 && s
->stdout_fd
< 0 && s
->syslog_fd
< 0 && s
->audit_fd
< 0;
1798 /* always open stdout, syslog, native, and kmsg sockets */
1800 /* systemd-journald.socket: /run/systemd/journal/stdout */
1801 r
= server_open_stdout_socket(s
);
1805 /* systemd-journald-dev-log.socket: /run/systemd/journal/dev-log */
1806 r
= server_open_syslog_socket(s
);
1810 /* systemd-journald.socket: /run/systemd/journal/socket */
1811 r
= server_open_native_socket(s
);
1816 r
= server_open_dev_kmsg(s
);
1820 /* Unless we got *some* sockets and not audit, open audit socket */
1821 if (s
->audit_fd
>= 0 || no_sockets
) {
1822 r
= server_open_audit(s
);
1827 r
= server_open_kernel_seqnum(s
);
1831 r
= server_open_hostname(s
);
1835 r
= setup_signals(s
);
1839 s
->udev
= udev_new();
1843 s
->rate_limit
= journal_rate_limit_new(s
->rate_limit_interval
, s
->rate_limit_burst
);
1847 r
= cg_get_root_path(&s
->cgroup_root
);
1851 server_cache_hostname(s
);
1852 server_cache_boot_id(s
);
1853 server_cache_machine_id(s
);
1855 s
->runtime_storage
.name
= "Runtime journal";
1856 s
->system_storage
.name
= "System journal";
1858 s
->runtime_storage
.path
= strjoin("/run/log/journal/", SERVER_MACHINE_ID(s
));
1859 s
->system_storage
.path
= strjoin("/var/log/journal/", SERVER_MACHINE_ID(s
));
1860 if (!s
->runtime_storage
.path
|| !s
->system_storage
.path
)
1863 (void) server_connect_notify(s
);
1865 (void) client_context_acquire_default(s
);
1867 return system_journal_open(s
, false);
1870 void server_maybe_append_tags(Server
*s
) {
1876 n
= now(CLOCK_REALTIME
);
1878 if (s
->system_journal
)
1879 journal_file_maybe_append_tag(s
->system_journal
, n
);
1881 ORDERED_HASHMAP_FOREACH(f
, s
->user_journals
, i
)
1882 journal_file_maybe_append_tag(f
, n
);
1886 void server_done(Server
*s
) {
1890 if (s
->deferred_closes
) {
1891 journal_file_close_set(s
->deferred_closes
);
1892 set_free(s
->deferred_closes
);
1895 while (s
->stdout_streams
)
1896 stdout_stream_free(s
->stdout_streams
);
1898 client_context_flush_all(s
);
1900 if (s
->system_journal
)
1901 (void) journal_file_close(s
->system_journal
);
1903 if (s
->runtime_journal
)
1904 (void) journal_file_close(s
->runtime_journal
);
1906 while ((f
= ordered_hashmap_steal_first(s
->user_journals
)))
1907 (void) journal_file_close(f
);
1909 ordered_hashmap_free(s
->user_journals
);
1911 sd_event_source_unref(s
->syslog_event_source
);
1912 sd_event_source_unref(s
->native_event_source
);
1913 sd_event_source_unref(s
->stdout_event_source
);
1914 sd_event_source_unref(s
->dev_kmsg_event_source
);
1915 sd_event_source_unref(s
->audit_event_source
);
1916 sd_event_source_unref(s
->sync_event_source
);
1917 sd_event_source_unref(s
->sigusr1_event_source
);
1918 sd_event_source_unref(s
->sigusr2_event_source
);
1919 sd_event_source_unref(s
->sigterm_event_source
);
1920 sd_event_source_unref(s
->sigint_event_source
);
1921 sd_event_source_unref(s
->sigrtmin1_event_source
);
1922 sd_event_source_unref(s
->hostname_event_source
);
1923 sd_event_source_unref(s
->notify_event_source
);
1924 sd_event_source_unref(s
->watchdog_event_source
);
1925 sd_event_unref(s
->event
);
1927 safe_close(s
->syslog_fd
);
1928 safe_close(s
->native_fd
);
1929 safe_close(s
->stdout_fd
);
1930 safe_close(s
->dev_kmsg_fd
);
1931 safe_close(s
->audit_fd
);
1932 safe_close(s
->hostname_fd
);
1933 safe_close(s
->notify_fd
);
1936 journal_rate_limit_free(s
->rate_limit
);
1938 if (s
->kernel_seqnum
)
1939 munmap(s
->kernel_seqnum
, sizeof(uint64_t));
1943 free(s
->cgroup_root
);
1944 free(s
->hostname_field
);
1945 free(s
->runtime_storage
.path
);
1946 free(s
->system_storage
.path
);
1949 mmap_cache_unref(s
->mmap
);
1951 udev_unref(s
->udev
);
1954 static const char* const storage_table
[_STORAGE_MAX
] = {
1955 [STORAGE_AUTO
] = "auto",
1956 [STORAGE_VOLATILE
] = "volatile",
1957 [STORAGE_PERSISTENT
] = "persistent",
1958 [STORAGE_NONE
] = "none"
1961 DEFINE_STRING_TABLE_LOOKUP(storage
, Storage
);
1962 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage
, storage
, Storage
, "Failed to parse storage setting");
1964 static const char* const split_mode_table
[_SPLIT_MAX
] = {
1965 [SPLIT_LOGIN
] = "login",
1966 [SPLIT_UID
] = "uid",
1967 [SPLIT_NONE
] = "none",
1970 DEFINE_STRING_TABLE_LOOKUP(split_mode
, SplitMode
);
1971 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode
, split_mode
, SplitMode
, "Failed to parse split mode setting");
1973 int config_parse_line_max(
1975 const char *filename
,
1977 const char *section
,
1978 unsigned section_line
,
1993 if (isempty(rvalue
))
1994 /* Empty assignment means default */
1995 *sz
= DEFAULT_LINE_MAX
;
1999 r
= parse_size(rvalue
, 1024, &v
);
2001 log_syntax(unit
, LOG_ERR
, filename
, line
, r
, "Failed to parse LineMax= value, ignoring: %s", rvalue
);
2006 /* Why specify 79 here as minimum line length? Simply, because the most common traditional
2007 * terminal size is 80ch, and it might make sense to break one character before the natural
2008 * line break would occur on that. */
2009 log_syntax(unit
, LOG_WARNING
, filename
, line
, 0, "LineMax= too small, clamping to 79: %s", rvalue
);
2011 } else if (v
> (uint64_t) (SSIZE_MAX
-1)) {
2012 /* So, why specify SSIZE_MAX-1 here? Because that's one below the largest size value read()
2013 * can return, and we need one extra byte for the trailing NUL byte. Of course IRL such large
2014 * memory allocations will fail anyway, hence this limit is mostly theoretical anyway, as we'll
2015 * fail much earlier anyway. */
2016 log_syntax(unit
, LOG_WARNING
, filename
, line
, 0, "LineMax= too large, clamping to %" PRIu64
": %s", (uint64_t) (SSIZE_MAX
-1), rvalue
);