1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2011 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
23 #include <selinux/selinux.h>
25 #include <sys/ioctl.h>
27 #include <sys/signalfd.h>
28 #include <sys/statvfs.h>
29 #include <linux/sockios.h>
32 #include "sd-daemon.h"
33 #include "sd-journal.h"
34 #include "sd-messages.h"
37 #include "alloc-util.h"
38 #include "audit-util.h"
39 #include "cgroup-util.h"
40 #include "conf-parser.h"
41 #include "dirent-util.h"
42 #include "extract-word.h"
45 #include "formats-util.h"
48 #include "hostname-util.h"
50 #include "journal-authenticate.h"
51 #include "journal-file.h"
52 #include "journal-internal.h"
53 #include "journal-vacuum.h"
54 #include "journald-audit.h"
55 #include "journald-kmsg.h"
56 #include "journald-native.h"
57 #include "journald-rate-limit.h"
58 #include "journald-server.h"
59 #include "journald-stream.h"
60 #include "journald-syslog.h"
63 #include "parse-util.h"
64 #include "proc-cmdline.h"
65 #include "process-util.h"
67 #include "selinux-util.h"
68 #include "signal-util.h"
69 #include "socket-util.h"
70 #include "string-table.h"
71 #include "string-util.h"
72 #include "user-util.h"
74 #define USER_JOURNALS_MAX 1024
76 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
77 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
78 #define DEFAULT_RATE_LIMIT_BURST 1000
79 #define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
81 #define RECHECK_SPACE_USEC (30*USEC_PER_SEC)
83 #define NOTIFY_SNDBUF_SIZE (8*1024*1024)
85 /* The period to insert between posting changes for coalescing */
86 #define POST_CHANGE_TIMER_INTERVAL_USEC (250*USEC_PER_MSEC)
88 static int determine_space_for(
90 JournalMetrics
*metrics
,
98 uint64_t sum
= 0, ss_avail
, avail
;
99 _cleanup_closedir_
DIR *d
= NULL
;
110 ts
= now(CLOCK_MONOTONIC
);
112 if (!verbose
&& s
->cached_space_timestamp
+ RECHECK_SPACE_USEC
> ts
) {
115 *available
= s
->cached_space_available
;
117 *limit
= s
->cached_space_limit
;
122 p
= strjoina(path
, SERVER_MACHINE_ID(s
));
125 return log_full_errno(errno
== ENOENT
? LOG_DEBUG
: LOG_ERR
, errno
, "Failed to open %s: %m", p
);
127 if (fstatvfs(dirfd(d
), &ss
) < 0)
128 return log_error_errno(errno
, "Failed to fstatvfs(%s): %m", p
);
130 FOREACH_DIRENT_ALL(de
, d
, break) {
133 if (!endswith(de
->d_name
, ".journal") &&
134 !endswith(de
->d_name
, ".journal~"))
137 if (fstatat(dirfd(d
), de
->d_name
, &st
, AT_SYMLINK_NOFOLLOW
) < 0) {
138 log_debug_errno(errno
, "Failed to stat %s/%s, ignoring: %m", p
, de
->d_name
);
142 if (!S_ISREG(st
.st_mode
))
145 sum
+= (uint64_t) st
.st_blocks
* 512UL;
148 /* If request, then let's bump the min_use limit to the
149 * current usage on disk. We do this when starting up and
150 * first opening the journal files. This way sudden spikes in
151 * disk usage will not cause journald to vacuum files without
152 * bounds. Note that this means that only a restart of
153 * journald will make it reset this value. */
156 metrics
->min_use
= MAX(metrics
->min_use
, sum
);
158 ss_avail
= ss
.f_bsize
* ss
.f_bavail
;
159 avail
= LESS_BY(ss_avail
, metrics
->keep_free
);
161 s
->cached_space_limit
= MIN(MAX(sum
+ avail
, metrics
->min_use
), metrics
->max_use
);
162 s
->cached_space_available
= LESS_BY(s
->cached_space_limit
, sum
);
163 s
->cached_space_timestamp
= ts
;
166 char fb1
[FORMAT_BYTES_MAX
], fb2
[FORMAT_BYTES_MAX
], fb3
[FORMAT_BYTES_MAX
],
167 fb4
[FORMAT_BYTES_MAX
], fb5
[FORMAT_BYTES_MAX
], fb6
[FORMAT_BYTES_MAX
];
169 server_driver_message(s
, SD_MESSAGE_JOURNAL_USAGE
,
170 "%s (%s) is currently using %s.\n"
171 "Maximum allowed usage is set to %s.\n"
172 "Leaving at least %s free (of currently available %s of space).\n"
173 "Enforced usage limit is thus %s, of which %s are still available.",
175 format_bytes(fb1
, sizeof(fb1
), sum
),
176 format_bytes(fb2
, sizeof(fb2
), metrics
->max_use
),
177 format_bytes(fb3
, sizeof(fb3
), metrics
->keep_free
),
178 format_bytes(fb4
, sizeof(fb4
), ss_avail
),
179 format_bytes(fb5
, sizeof(fb5
), s
->cached_space_limit
),
180 format_bytes(fb6
, sizeof(fb6
), s
->cached_space_available
));
184 *available
= s
->cached_space_available
;
186 *limit
= s
->cached_space_limit
;
191 static int determine_space(Server
*s
, bool verbose
, bool patch_min_use
, uint64_t *available
, uint64_t *limit
) {
192 JournalMetrics
*metrics
;
193 const char *path
, *name
;
197 if (s
->system_journal
) {
198 path
= "/var/log/journal/";
199 metrics
= &s
->system_metrics
;
200 name
= "System journal";
202 path
= "/run/log/journal/";
203 metrics
= &s
->runtime_metrics
;
204 name
= "Runtime journal";
207 return determine_space_for(s
, metrics
, path
, name
, verbose
, patch_min_use
, available
, limit
);
210 static void server_add_acls(JournalFile
*f
, uid_t uid
) {
217 if (uid
<= SYSTEM_UID_MAX
)
220 r
= add_acls_for_user(f
->fd
, uid
);
222 log_warning_errno(r
, "Failed to set ACL on %s, ignoring: %m", f
->path
);
226 static int open_journal(
232 JournalMetrics
*metrics
,
233 JournalFile
*template,
242 r
= journal_file_open_reliably(fname
, flags
, 0640, s
->compress
, seal
, metrics
, s
->mmap
, template, ret
);
244 r
= journal_file_open(fname
, flags
, 0640, s
->compress
, seal
, metrics
, s
->mmap
, template, ret
);
249 r
= journal_file_enable_post_change_timer(*ret
, s
->event
, POST_CHANGE_TIMER_INTERVAL_USEC
);
251 *ret
= journal_file_close(*ret
);
258 static JournalFile
* find_journal(Server
*s
, uid_t uid
) {
259 _cleanup_free_
char *p
= NULL
;
266 /* We split up user logs only on /var, not on /run. If the
267 * runtime file is open, we write to it exclusively, in order
268 * to guarantee proper order as soon as we flush /run to
269 * /var and close the runtime file. */
271 if (s
->runtime_journal
)
272 return s
->runtime_journal
;
274 if (uid
<= SYSTEM_UID_MAX
)
275 return s
->system_journal
;
277 r
= sd_id128_get_machine(&machine
);
279 return s
->system_journal
;
281 f
= ordered_hashmap_get(s
->user_journals
, UID_TO_PTR(uid
));
285 if (asprintf(&p
, "/var/log/journal/" SD_ID128_FORMAT_STR
"/user-"UID_FMT
".journal",
286 SD_ID128_FORMAT_VAL(machine
), uid
) < 0)
287 return s
->system_journal
;
289 while (ordered_hashmap_size(s
->user_journals
) >= USER_JOURNALS_MAX
) {
290 /* Too many open? Then let's close one */
291 f
= ordered_hashmap_steal_first(s
->user_journals
);
293 journal_file_close(f
);
296 r
= open_journal(s
, true, p
, O_RDWR
|O_CREAT
, s
->seal
, &s
->system_metrics
, NULL
, &f
);
298 return s
->system_journal
;
300 server_add_acls(f
, uid
);
302 r
= ordered_hashmap_put(s
->user_journals
, UID_TO_PTR(uid
), f
);
304 journal_file_close(f
);
305 return s
->system_journal
;
311 static int do_rotate(
324 r
= journal_file_rotate(f
, s
->compress
, seal
);
327 log_error_errno(r
, "Failed to rotate %s: %m", (*f
)->path
);
329 log_error_errno(r
, "Failed to create new %s journal: %m", name
);
331 server_add_acls(*f
, uid
);
336 void server_rotate(Server
*s
) {
342 log_debug("Rotating...");
344 (void) do_rotate(s
, &s
->runtime_journal
, "runtime", false, 0);
345 (void) do_rotate(s
, &s
->system_journal
, "system", s
->seal
, 0);
347 ORDERED_HASHMAP_FOREACH_KEY(f
, k
, s
->user_journals
, i
) {
348 r
= do_rotate(s
, &f
, "user", s
->seal
, PTR_TO_UID(k
));
350 ordered_hashmap_replace(s
->user_journals
, k
, f
);
352 /* Old file has been closed and deallocated */
353 ordered_hashmap_remove(s
->user_journals
, k
);
357 void server_sync(Server
*s
) {
362 if (s
->system_journal
) {
363 r
= journal_file_set_offline(s
->system_journal
);
365 log_warning_errno(r
, "Failed to sync system journal, ignoring: %m");
368 ORDERED_HASHMAP_FOREACH(f
, s
->user_journals
, i
) {
369 r
= journal_file_set_offline(f
);
371 log_warning_errno(r
, "Failed to sync user journal, ignoring: %m");
374 if (s
->sync_event_source
) {
375 r
= sd_event_source_set_enabled(s
->sync_event_source
, SD_EVENT_OFF
);
377 log_error_errno(r
, "Failed to disable sync timer source: %m");
380 s
->sync_scheduled
= false;
383 static void do_vacuum(
386 JournalMetrics
*metrics
,
390 bool patch_min_use
) {
404 p
= strjoina(path
, SERVER_MACHINE_ID(s
));
406 limit
= metrics
->max_use
;
407 (void) determine_space_for(s
, metrics
, path
, name
, verbose
, patch_min_use
, NULL
, &limit
);
409 r
= journal_directory_vacuum(p
, limit
, metrics
->n_max_files
, s
->max_retention_usec
, &s
->oldest_file_usec
, verbose
);
410 if (r
< 0 && r
!= -ENOENT
)
411 log_warning_errno(r
, "Failed to vacuum %s, ignoring: %m", p
);
414 int server_vacuum(Server
*s
, bool verbose
, bool patch_min_use
) {
417 log_debug("Vacuuming...");
419 s
->oldest_file_usec
= 0;
421 do_vacuum(s
, s
->system_journal
, &s
->system_metrics
, "/var/log/journal/", "System journal", verbose
, patch_min_use
);
422 do_vacuum(s
, s
->runtime_journal
, &s
->runtime_metrics
, "/run/log/journal/", "Runtime journal", verbose
, patch_min_use
);
424 s
->cached_space_limit
= 0;
425 s
->cached_space_available
= 0;
426 s
->cached_space_timestamp
= 0;
431 static void server_cache_machine_id(Server
*s
) {
437 r
= sd_id128_get_machine(&id
);
441 sd_id128_to_string(id
, stpcpy(s
->machine_id_field
, "_MACHINE_ID="));
444 static void server_cache_boot_id(Server
*s
) {
450 r
= sd_id128_get_boot(&id
);
454 sd_id128_to_string(id
, stpcpy(s
->boot_id_field
, "_BOOT_ID="));
457 static void server_cache_hostname(Server
*s
) {
458 _cleanup_free_
char *t
= NULL
;
463 t
= gethostname_malloc();
467 x
= strappend("_HOSTNAME=", t
);
471 free(s
->hostname_field
);
472 s
->hostname_field
= x
;
475 static bool shall_try_append_again(JournalFile
*f
, int r
) {
477 /* -E2BIG Hit configured limit
479 -EDQUOT Quota limit hit
481 -EIO I/O error of some kind (mmap)
482 -EHOSTDOWN Other machine
483 -EBUSY Unclean shutdown
484 -EPROTONOSUPPORT Unsupported feature
487 -ESHUTDOWN Already archived
488 -EIDRM Journal file has been deleted */
490 if (r
== -E2BIG
|| r
== -EFBIG
|| r
== -EDQUOT
|| r
== -ENOSPC
)
491 log_debug("%s: Allocation limit reached, rotating.", f
->path
);
492 else if (r
== -EHOSTDOWN
)
493 log_info("%s: Journal file from other machine, rotating.", f
->path
);
494 else if (r
== -EBUSY
)
495 log_info("%s: Unclean shutdown, rotating.", f
->path
);
496 else if (r
== -EPROTONOSUPPORT
)
497 log_info("%s: Unsupported feature, rotating.", f
->path
);
498 else if (r
== -EBADMSG
|| r
== -ENODATA
|| r
== ESHUTDOWN
)
499 log_warning("%s: Journal file corrupted, rotating.", f
->path
);
501 log_warning("%s: IO error, rotating.", f
->path
);
502 else if (r
== -EIDRM
)
503 log_warning("%s: Journal file has been deleted, rotating.", f
->path
);
510 static void write_to_journal(Server
*s
, uid_t uid
, struct iovec
*iovec
, unsigned n
, int priority
) {
512 bool vacuumed
= false;
519 f
= find_journal(s
, uid
);
523 if (journal_file_rotate_suggested(f
, s
->max_file_usec
)) {
524 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f
->path
);
526 server_vacuum(s
, false, false);
529 f
= find_journal(s
, uid
);
534 r
= journal_file_append_entry(f
, NULL
, iovec
, n
, &s
->seqnum
, NULL
, NULL
);
536 server_schedule_sync(s
, priority
);
540 if (vacuumed
|| !shall_try_append_again(f
, r
)) {
541 log_error_errno(r
, "Failed to write entry (%d items, %zu bytes), ignoring: %m", n
, IOVEC_TOTAL_SIZE(iovec
, n
));
546 server_vacuum(s
, false, false);
548 f
= find_journal(s
, uid
);
552 log_debug("Retrying write.");
553 r
= journal_file_append_entry(f
, NULL
, iovec
, n
, &s
->seqnum
, NULL
, NULL
);
555 log_error_errno(r
, "Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %m", n
, IOVEC_TOTAL_SIZE(iovec
, n
));
557 server_schedule_sync(s
, priority
);
560 static void dispatch_message_real(
562 struct iovec
*iovec
, unsigned n
, unsigned m
,
563 const struct ucred
*ucred
,
564 const struct timeval
*tv
,
565 const char *label
, size_t label_len
,
570 char pid
[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t
)],
571 uid
[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t
)],
572 gid
[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t
)],
573 owner_uid
[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t
)],
574 source_time
[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t
)],
575 o_uid
[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t
)],
576 o_gid
[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t
)],
577 o_owner_uid
[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t
)];
583 uid_t realuid
= 0, owner
= 0, journal_uid
;
584 bool owner_valid
= false;
586 char audit_session
[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
587 audit_loginuid
[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t
)],
588 o_audit_session
[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
589 o_audit_loginuid
[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t
)];
598 assert(n
+ N_IOVEC_META_FIELDS
+ (object_pid
? N_IOVEC_OBJECT_FIELDS
: 0) <= m
);
601 realuid
= ucred
->uid
;
603 sprintf(pid
, "_PID="PID_FMT
, ucred
->pid
);
604 IOVEC_SET_STRING(iovec
[n
++], pid
);
606 sprintf(uid
, "_UID="UID_FMT
, ucred
->uid
);
607 IOVEC_SET_STRING(iovec
[n
++], uid
);
609 sprintf(gid
, "_GID="GID_FMT
, ucred
->gid
);
610 IOVEC_SET_STRING(iovec
[n
++], gid
);
612 r
= get_process_comm(ucred
->pid
, &t
);
614 x
= strjoina("_COMM=", t
);
616 IOVEC_SET_STRING(iovec
[n
++], x
);
619 r
= get_process_exe(ucred
->pid
, &t
);
621 x
= strjoina("_EXE=", t
);
623 IOVEC_SET_STRING(iovec
[n
++], x
);
626 r
= get_process_cmdline(ucred
->pid
, 0, false, &t
);
628 x
= strjoina("_CMDLINE=", t
);
630 IOVEC_SET_STRING(iovec
[n
++], x
);
633 r
= get_process_capeff(ucred
->pid
, &t
);
635 x
= strjoina("_CAP_EFFECTIVE=", t
);
637 IOVEC_SET_STRING(iovec
[n
++], x
);
641 r
= audit_session_from_pid(ucred
->pid
, &audit
);
643 sprintf(audit_session
, "_AUDIT_SESSION=%"PRIu32
, audit
);
644 IOVEC_SET_STRING(iovec
[n
++], audit_session
);
647 r
= audit_loginuid_from_pid(ucred
->pid
, &loginuid
);
649 sprintf(audit_loginuid
, "_AUDIT_LOGINUID="UID_FMT
, loginuid
);
650 IOVEC_SET_STRING(iovec
[n
++], audit_loginuid
);
654 r
= cg_pid_get_path_shifted(ucred
->pid
, s
->cgroup_root
, &c
);
656 char *session
= NULL
;
658 x
= strjoina("_SYSTEMD_CGROUP=", c
);
659 IOVEC_SET_STRING(iovec
[n
++], x
);
661 r
= cg_path_get_session(c
, &t
);
663 session
= strjoina("_SYSTEMD_SESSION=", t
);
665 IOVEC_SET_STRING(iovec
[n
++], session
);
668 if (cg_path_get_owner_uid(c
, &owner
) >= 0) {
671 sprintf(owner_uid
, "_SYSTEMD_OWNER_UID="UID_FMT
, owner
);
672 IOVEC_SET_STRING(iovec
[n
++], owner_uid
);
675 if (cg_path_get_unit(c
, &t
) >= 0) {
676 x
= strjoina("_SYSTEMD_UNIT=", t
);
678 IOVEC_SET_STRING(iovec
[n
++], x
);
679 } else if (unit_id
&& !session
) {
680 x
= strjoina("_SYSTEMD_UNIT=", unit_id
);
681 IOVEC_SET_STRING(iovec
[n
++], x
);
684 if (cg_path_get_user_unit(c
, &t
) >= 0) {
685 x
= strjoina("_SYSTEMD_USER_UNIT=", t
);
687 IOVEC_SET_STRING(iovec
[n
++], x
);
688 } else if (unit_id
&& session
) {
689 x
= strjoina("_SYSTEMD_USER_UNIT=", unit_id
);
690 IOVEC_SET_STRING(iovec
[n
++], x
);
693 if (cg_path_get_slice(c
, &t
) >= 0) {
694 x
= strjoina("_SYSTEMD_SLICE=", t
);
696 IOVEC_SET_STRING(iovec
[n
++], x
);
700 } else if (unit_id
) {
701 x
= strjoina("_SYSTEMD_UNIT=", unit_id
);
702 IOVEC_SET_STRING(iovec
[n
++], x
);
706 if (mac_selinux_have()) {
708 x
= alloca(strlen("_SELINUX_CONTEXT=") + label_len
+ 1);
710 *((char*) mempcpy(stpcpy(x
, "_SELINUX_CONTEXT="), label
, label_len
)) = 0;
711 IOVEC_SET_STRING(iovec
[n
++], x
);
713 security_context_t con
;
715 if (getpidcon(ucred
->pid
, &con
) >= 0) {
716 x
= strjoina("_SELINUX_CONTEXT=", con
);
719 IOVEC_SET_STRING(iovec
[n
++], x
);
728 r
= get_process_uid(object_pid
, &object_uid
);
730 sprintf(o_uid
, "OBJECT_UID="UID_FMT
, object_uid
);
731 IOVEC_SET_STRING(iovec
[n
++], o_uid
);
734 r
= get_process_gid(object_pid
, &object_gid
);
736 sprintf(o_gid
, "OBJECT_GID="GID_FMT
, object_gid
);
737 IOVEC_SET_STRING(iovec
[n
++], o_gid
);
740 r
= get_process_comm(object_pid
, &t
);
742 x
= strjoina("OBJECT_COMM=", t
);
744 IOVEC_SET_STRING(iovec
[n
++], x
);
747 r
= get_process_exe(object_pid
, &t
);
749 x
= strjoina("OBJECT_EXE=", t
);
751 IOVEC_SET_STRING(iovec
[n
++], x
);
754 r
= get_process_cmdline(object_pid
, 0, false, &t
);
756 x
= strjoina("OBJECT_CMDLINE=", t
);
758 IOVEC_SET_STRING(iovec
[n
++], x
);
762 r
= audit_session_from_pid(object_pid
, &audit
);
764 sprintf(o_audit_session
, "OBJECT_AUDIT_SESSION=%"PRIu32
, audit
);
765 IOVEC_SET_STRING(iovec
[n
++], o_audit_session
);
768 r
= audit_loginuid_from_pid(object_pid
, &loginuid
);
770 sprintf(o_audit_loginuid
, "OBJECT_AUDIT_LOGINUID="UID_FMT
, loginuid
);
771 IOVEC_SET_STRING(iovec
[n
++], o_audit_loginuid
);
775 r
= cg_pid_get_path_shifted(object_pid
, s
->cgroup_root
, &c
);
777 x
= strjoina("OBJECT_SYSTEMD_CGROUP=", c
);
778 IOVEC_SET_STRING(iovec
[n
++], x
);
780 r
= cg_path_get_session(c
, &t
);
782 x
= strjoina("OBJECT_SYSTEMD_SESSION=", t
);
784 IOVEC_SET_STRING(iovec
[n
++], x
);
787 if (cg_path_get_owner_uid(c
, &owner
) >= 0) {
788 sprintf(o_owner_uid
, "OBJECT_SYSTEMD_OWNER_UID="UID_FMT
, owner
);
789 IOVEC_SET_STRING(iovec
[n
++], o_owner_uid
);
792 if (cg_path_get_unit(c
, &t
) >= 0) {
793 x
= strjoina("OBJECT_SYSTEMD_UNIT=", t
);
795 IOVEC_SET_STRING(iovec
[n
++], x
);
798 if (cg_path_get_user_unit(c
, &t
) >= 0) {
799 x
= strjoina("OBJECT_SYSTEMD_USER_UNIT=", t
);
801 IOVEC_SET_STRING(iovec
[n
++], x
);
810 sprintf(source_time
, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv
));
811 IOVEC_SET_STRING(iovec
[n
++], source_time
);
814 /* Note that strictly speaking storing the boot id here is
815 * redundant since the entry includes this in-line
816 * anyway. However, we need this indexed, too. */
817 if (!isempty(s
->boot_id_field
))
818 IOVEC_SET_STRING(iovec
[n
++], s
->boot_id_field
);
820 if (!isempty(s
->machine_id_field
))
821 IOVEC_SET_STRING(iovec
[n
++], s
->machine_id_field
);
823 if (!isempty(s
->hostname_field
))
824 IOVEC_SET_STRING(iovec
[n
++], s
->hostname_field
);
828 if (s
->split_mode
== SPLIT_UID
&& realuid
> 0)
829 /* Split up strictly by any UID */
830 journal_uid
= realuid
;
831 else if (s
->split_mode
== SPLIT_LOGIN
&& realuid
> 0 && owner_valid
&& owner
> 0)
832 /* Split up by login UIDs. We do this only if the
833 * realuid is not root, in order not to accidentally
834 * leak privileged information to the user that is
835 * logged by a privileged process that is part of an
836 * unprivileged session. */
841 write_to_journal(s
, journal_uid
, iovec
, n
, priority
);
844 void server_driver_message(Server
*s
, sd_id128_t message_id
, const char *format
, ...) {
845 char mid
[11 + 32 + 1];
846 char buffer
[16 + LINE_MAX
+ 1];
847 struct iovec iovec
[N_IOVEC_META_FIELDS
+ 6];
850 struct ucred ucred
= {};
855 IOVEC_SET_STRING(iovec
[n
++], "SYSLOG_FACILITY=3");
856 IOVEC_SET_STRING(iovec
[n
++], "SYSLOG_IDENTIFIER=systemd-journald");
858 IOVEC_SET_STRING(iovec
[n
++], "PRIORITY=6");
859 IOVEC_SET_STRING(iovec
[n
++], "_TRANSPORT=driver");
861 memcpy(buffer
, "MESSAGE=", 8);
862 va_start(ap
, format
);
863 vsnprintf(buffer
+ 8, sizeof(buffer
) - 8, format
, ap
);
865 IOVEC_SET_STRING(iovec
[n
++], buffer
);
867 if (!sd_id128_equal(message_id
, SD_ID128_NULL
)) {
868 snprintf(mid
, sizeof(mid
), LOG_MESSAGE_ID(message_id
));
869 IOVEC_SET_STRING(iovec
[n
++], mid
);
872 ucred
.pid
= getpid();
873 ucred
.uid
= getuid();
874 ucred
.gid
= getgid();
876 dispatch_message_real(s
, iovec
, n
, ELEMENTSOF(iovec
), &ucred
, NULL
, NULL
, 0, NULL
, LOG_INFO
, 0);
879 void server_dispatch_message(
881 struct iovec
*iovec
, unsigned n
, unsigned m
,
882 const struct ucred
*ucred
,
883 const struct timeval
*tv
,
884 const char *label
, size_t label_len
,
890 _cleanup_free_
char *path
= NULL
;
891 uint64_t available
= 0;
895 assert(iovec
|| n
== 0);
900 if (LOG_PRI(priority
) > s
->max_level_store
)
903 /* Stop early in case the information will not be stored
905 if (s
->storage
== STORAGE_NONE
)
911 r
= cg_pid_get_path_shifted(ucred
->pid
, s
->cgroup_root
, &path
);
915 /* example: /user/lennart/3/foobar
916 * /system/dbus.service/foobar
918 * So let's cut of everything past the third /, since that is
919 * where user directories start */
921 c
= strchr(path
, '/');
923 c
= strchr(c
+1, '/');
925 c
= strchr(c
+1, '/');
931 (void) determine_space(s
, false, false, &available
, NULL
);
932 rl
= journal_rate_limit_test(s
->rate_limit
, path
, priority
& LOG_PRIMASK
, available
);
936 /* Write a suppression message if we suppressed something */
938 server_driver_message(s
, SD_MESSAGE_JOURNAL_DROPPED
,
939 "Suppressed %u messages from %s", rl
- 1, path
);
942 dispatch_message_real(s
, iovec
, n
, m
, ucred
, tv
, label
, label_len
, unit_id
, priority
, object_pid
);
946 static int system_journal_open(Server
*s
, bool flush_requested
) {
950 if (!s
->system_journal
&&
951 (s
->storage
== STORAGE_PERSISTENT
|| s
->storage
== STORAGE_AUTO
) &&
953 || access("/run/systemd/journal/flushed", F_OK
) >= 0)) {
955 /* If in auto mode: first try to create the machine
956 * path, but not the prefix.
958 * If in persistent mode: create /var/log/journal and
959 * the machine path */
961 if (s
->storage
== STORAGE_PERSISTENT
)
962 (void) mkdir_p("/var/log/journal/", 0755);
964 fn
= strjoina("/var/log/journal/", SERVER_MACHINE_ID(s
));
965 (void) mkdir(fn
, 0755);
967 fn
= strjoina(fn
, "/system.journal");
968 r
= open_journal(s
, true, fn
, O_RDWR
|O_CREAT
, s
->seal
, &s
->system_metrics
, NULL
, &s
->system_journal
);
970 server_add_acls(s
->system_journal
, 0);
971 (void) determine_space_for(s
, &s
->system_metrics
, "/var/log/journal/", "System journal", true, true, NULL
, NULL
);
973 if (r
!= -ENOENT
&& r
!= -EROFS
)
974 log_warning_errno(r
, "Failed to open system journal: %m");
980 if (!s
->runtime_journal
&&
981 (s
->storage
!= STORAGE_NONE
)) {
983 fn
= strjoina("/run/log/journal/", SERVER_MACHINE_ID(s
), "/system.journal");
985 if (s
->system_journal
) {
987 /* Try to open the runtime journal, but only
988 * if it already exists, so that we can flush
989 * it into the system journal */
991 r
= open_journal(s
, false, fn
, O_RDWR
, false, &s
->runtime_metrics
, NULL
, &s
->runtime_journal
);
994 log_warning_errno(r
, "Failed to open runtime journal: %m");
1001 /* OK, we really need the runtime journal, so create
1002 * it if necessary. */
1004 (void) mkdir("/run/log", 0755);
1005 (void) mkdir("/run/log/journal", 0755);
1006 (void) mkdir_parents(fn
, 0750);
1008 r
= open_journal(s
, true, fn
, O_RDWR
|O_CREAT
, false, &s
->runtime_metrics
, NULL
, &s
->runtime_journal
);
1010 return log_error_errno(r
, "Failed to open runtime journal: %m");
1013 if (s
->runtime_journal
) {
1014 server_add_acls(s
->runtime_journal
, 0);
1015 (void) determine_space_for(s
, &s
->runtime_metrics
, "/run/log/journal/", "Runtime journal", true, true, NULL
, NULL
);
1022 int server_flush_to_var(Server
*s
) {
1024 sd_journal
*j
= NULL
;
1025 char ts
[FORMAT_TIMESPAN_MAX
];
1032 if (s
->storage
!= STORAGE_AUTO
&&
1033 s
->storage
!= STORAGE_PERSISTENT
)
1036 if (!s
->runtime_journal
)
1039 (void) system_journal_open(s
, true);
1041 if (!s
->system_journal
)
1044 log_debug("Flushing to /var...");
1046 start
= now(CLOCK_MONOTONIC
);
1048 r
= sd_id128_get_machine(&machine
);
1052 r
= sd_journal_open(&j
, SD_JOURNAL_RUNTIME_ONLY
);
1054 return log_error_errno(r
, "Failed to read runtime journal: %m");
1056 sd_journal_set_data_threshold(j
, 0);
1058 SD_JOURNAL_FOREACH(j
) {
1062 f
= j
->current_file
;
1063 assert(f
&& f
->current_offset
> 0);
1067 r
= journal_file_move_to_object(f
, OBJECT_ENTRY
, f
->current_offset
, &o
);
1069 log_error_errno(r
, "Can't read entry: %m");
1073 r
= journal_file_copy_entry(f
, s
->system_journal
, o
, f
->current_offset
, NULL
, NULL
, NULL
);
1077 if (!shall_try_append_again(s
->system_journal
, r
)) {
1078 log_error_errno(r
, "Can't write entry: %m");
1083 server_vacuum(s
, false, false);
1085 if (!s
->system_journal
) {
1086 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1091 log_debug("Retrying write.");
1092 r
= journal_file_copy_entry(f
, s
->system_journal
, o
, f
->current_offset
, NULL
, NULL
, NULL
);
1094 log_error_errno(r
, "Can't write entry: %m");
1102 journal_file_post_change(s
->system_journal
);
1104 s
->runtime_journal
= journal_file_close(s
->runtime_journal
);
1107 (void) rm_rf("/run/log/journal", REMOVE_ROOT
);
1109 sd_journal_close(j
);
1111 server_driver_message(s
, SD_ID128_NULL
, "Time spent on flushing to /var is %s for %u entries.", format_timespan(ts
, sizeof(ts
), now(CLOCK_MONOTONIC
) - start
, 0), n
);
1116 int server_process_datagram(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1117 Server
*s
= userdata
;
1118 struct ucred
*ucred
= NULL
;
1119 struct timeval
*tv
= NULL
;
1120 struct cmsghdr
*cmsg
;
1122 size_t label_len
= 0, m
;
1125 int *fds
= NULL
, v
= 0;
1129 struct cmsghdr cmsghdr
;
1131 /* We use NAME_MAX space for the SELinux label
1132 * here. The kernel currently enforces no
1133 * limit, but according to suggestions from
1134 * the SELinux people this will change and it
1135 * will probably be identical to NAME_MAX. For
1136 * now we use that, but this should be updated
1137 * one day when the final limit is known. */
1138 uint8_t buf
[CMSG_SPACE(sizeof(struct ucred
)) +
1139 CMSG_SPACE(sizeof(struct timeval
)) +
1140 CMSG_SPACE(sizeof(int)) + /* fd */
1141 CMSG_SPACE(NAME_MAX
)]; /* selinux label */
1144 union sockaddr_union sa
= {};
1146 struct msghdr msghdr
= {
1149 .msg_control
= &control
,
1150 .msg_controllen
= sizeof(control
),
1152 .msg_namelen
= sizeof(sa
),
1156 assert(fd
== s
->native_fd
|| fd
== s
->syslog_fd
|| fd
== s
->audit_fd
);
1158 if (revents
!= EPOLLIN
) {
1159 log_error("Got invalid event from epoll for datagram fd: %"PRIx32
, revents
);
1163 /* Try to get the right size, if we can. (Not all
1164 * sockets support SIOCINQ, hence we just try, but
1165 * don't rely on it. */
1166 (void) ioctl(fd
, SIOCINQ
, &v
);
1168 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1169 m
= PAGE_ALIGN(MAX3((size_t) v
+ 1,
1171 ALIGN(sizeof(struct nlmsghdr
)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH
)) + 1);
1173 if (!GREEDY_REALLOC(s
->buffer
, s
->buffer_size
, m
))
1176 iovec
.iov_base
= s
->buffer
;
1177 iovec
.iov_len
= s
->buffer_size
- 1; /* Leave room for trailing NUL we add later */
1179 n
= recvmsg(fd
, &msghdr
, MSG_DONTWAIT
|MSG_CMSG_CLOEXEC
);
1181 if (errno
== EINTR
|| errno
== EAGAIN
)
1184 return log_error_errno(errno
, "recvmsg() failed: %m");
1187 CMSG_FOREACH(cmsg
, &msghdr
) {
1189 if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1190 cmsg
->cmsg_type
== SCM_CREDENTIALS
&&
1191 cmsg
->cmsg_len
== CMSG_LEN(sizeof(struct ucred
)))
1192 ucred
= (struct ucred
*) CMSG_DATA(cmsg
);
1193 else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1194 cmsg
->cmsg_type
== SCM_SECURITY
) {
1195 label
= (char*) CMSG_DATA(cmsg
);
1196 label_len
= cmsg
->cmsg_len
- CMSG_LEN(0);
1197 } else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1198 cmsg
->cmsg_type
== SO_TIMESTAMP
&&
1199 cmsg
->cmsg_len
== CMSG_LEN(sizeof(struct timeval
)))
1200 tv
= (struct timeval
*) CMSG_DATA(cmsg
);
1201 else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1202 cmsg
->cmsg_type
== SCM_RIGHTS
) {
1203 fds
= (int*) CMSG_DATA(cmsg
);
1204 n_fds
= (cmsg
->cmsg_len
- CMSG_LEN(0)) / sizeof(int);
1208 /* And a trailing NUL, just in case */
1211 if (fd
== s
->syslog_fd
) {
1212 if (n
> 0 && n_fds
== 0)
1213 server_process_syslog_message(s
, strstrip(s
->buffer
), ucred
, tv
, label
, label_len
);
1215 log_warning("Got file descriptors via syslog socket. Ignoring.");
1217 } else if (fd
== s
->native_fd
) {
1218 if (n
> 0 && n_fds
== 0)
1219 server_process_native_message(s
, s
->buffer
, n
, ucred
, tv
, label
, label_len
);
1220 else if (n
== 0 && n_fds
== 1)
1221 server_process_native_file(s
, fds
[0], ucred
, tv
, label
, label_len
);
1223 log_warning("Got too many file descriptors via native socket. Ignoring.");
1226 assert(fd
== s
->audit_fd
);
1228 if (n
> 0 && n_fds
== 0)
1229 server_process_audit_message(s
, s
->buffer
, n
, ucred
, &sa
, msghdr
.msg_namelen
);
1231 log_warning("Got file descriptors via audit socket. Ignoring.");
1234 close_many(fds
, n_fds
);
1238 static int dispatch_sigusr1(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1239 Server
*s
= userdata
;
1244 log_info("Received request to flush runtime journal from PID " PID_FMT
, si
->ssi_pid
);
1246 server_flush_to_var(s
);
1248 server_vacuum(s
, false, false);
1250 r
= touch("/run/systemd/journal/flushed");
1252 log_warning_errno(r
, "Failed to touch /run/systemd/journal/flushed, ignoring: %m");
1257 static int dispatch_sigusr2(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1258 Server
*s
= userdata
;
1263 log_info("Received request to rotate journal from PID " PID_FMT
, si
->ssi_pid
);
1265 server_vacuum(s
, true, true);
1267 /* Let clients know when the most recent rotation happened. */
1268 r
= write_timestamp_file_atomic("/run/systemd/journal/rotated", now(CLOCK_MONOTONIC
));
1270 log_warning_errno(r
, "Failed to write /run/systemd/journal/rotated, ignoring: %m");
1275 static int dispatch_sigterm(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1276 Server
*s
= userdata
;
1280 log_received_signal(LOG_INFO
, si
);
1282 sd_event_exit(s
->event
, 0);
1286 static int dispatch_sigrtmin1(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1287 Server
*s
= userdata
;
1292 log_debug("Received request to sync from PID " PID_FMT
, si
->ssi_pid
);
1296 /* Let clients know when the most recent sync happened. */
1297 r
= write_timestamp_file_atomic("/run/systemd/journal/synced", now(CLOCK_MONOTONIC
));
1299 log_warning_errno(r
, "Failed to write /run/systemd/journal/synced, ignoring: %m");
1304 static int setup_signals(Server
*s
) {
1309 assert(sigprocmask_many(SIG_SETMASK
, NULL
, SIGINT
, SIGTERM
, SIGUSR1
, SIGUSR2
, SIGRTMIN
+1, -1) >= 0);
1311 r
= sd_event_add_signal(s
->event
, &s
->sigusr1_event_source
, SIGUSR1
, dispatch_sigusr1
, s
);
1315 r
= sd_event_add_signal(s
->event
, &s
->sigusr2_event_source
, SIGUSR2
, dispatch_sigusr2
, s
);
1319 r
= sd_event_add_signal(s
->event
, &s
->sigterm_event_source
, SIGTERM
, dispatch_sigterm
, s
);
1323 /* Let's process SIGTERM late, so that we flush all queued
1324 * messages to disk before we exit */
1325 r
= sd_event_source_set_priority(s
->sigterm_event_source
, SD_EVENT_PRIORITY_NORMAL
+20);
1329 /* When journald is invoked on the terminal (when debugging),
1330 * it's useful if C-c is handled equivalent to SIGTERM. */
1331 r
= sd_event_add_signal(s
->event
, &s
->sigint_event_source
, SIGINT
, dispatch_sigterm
, s
);
1335 r
= sd_event_source_set_priority(s
->sigint_event_source
, SD_EVENT_PRIORITY_NORMAL
+20);
1339 /* SIGRTMIN+1 causes an immediate sync. We process this very
1340 * late, so that everything else queued at this point is
1341 * really written to disk. Clients can watch
1342 * /run/systemd/journal/synced with inotify until its mtime
1343 * changes to see when a sync happened. */
1344 r
= sd_event_add_signal(s
->event
, &s
->sigrtmin1_event_source
, SIGRTMIN
+1, dispatch_sigrtmin1
, s
);
1348 r
= sd_event_source_set_priority(s
->sigrtmin1_event_source
, SD_EVENT_PRIORITY_NORMAL
+15);
1355 static int server_parse_proc_cmdline(Server
*s
) {
1356 _cleanup_free_
char *line
= NULL
;
1360 r
= proc_cmdline(&line
);
1362 log_warning_errno(r
, "Failed to read /proc/cmdline, ignoring: %m");
1368 _cleanup_free_
char *word
= NULL
;
1370 r
= extract_first_word(&p
, &word
, NULL
, 0);
1372 return log_error_errno(r
, "Failed to parse journald syntax \"%s\": %m", line
);
1377 if (startswith(word
, "systemd.journald.forward_to_syslog=")) {
1378 r
= parse_boolean(word
+ 35);
1380 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word
+ 35);
1382 s
->forward_to_syslog
= r
;
1383 } else if (startswith(word
, "systemd.journald.forward_to_kmsg=")) {
1384 r
= parse_boolean(word
+ 33);
1386 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word
+ 33);
1388 s
->forward_to_kmsg
= r
;
1389 } else if (startswith(word
, "systemd.journald.forward_to_console=")) {
1390 r
= parse_boolean(word
+ 36);
1392 log_warning("Failed to parse forward to console switch %s. Ignoring.", word
+ 36);
1394 s
->forward_to_console
= r
;
1395 } else if (startswith(word
, "systemd.journald.forward_to_wall=")) {
1396 r
= parse_boolean(word
+ 33);
1398 log_warning("Failed to parse forward to wall switch %s. Ignoring.", word
+ 33);
1400 s
->forward_to_wall
= r
;
1401 } else if (startswith(word
, "systemd.journald"))
1402 log_warning("Invalid systemd.journald parameter. Ignoring.");
1405 /* do not warn about state here, since probably systemd already did */
1409 static int server_parse_config_file(Server
*s
) {
1412 return config_parse_many(PKGSYSCONFDIR
"/journald.conf",
1413 CONF_PATHS_NULSTR("systemd/journald.conf.d"),
1415 config_item_perf_lookup
, journald_gperf_lookup
,
1419 static int server_dispatch_sync(sd_event_source
*es
, usec_t t
, void *userdata
) {
1420 Server
*s
= userdata
;
1428 int server_schedule_sync(Server
*s
, int priority
) {
1433 if (priority
<= LOG_CRIT
) {
1434 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1439 if (s
->sync_scheduled
)
1442 if (s
->sync_interval_usec
> 0) {
1445 r
= sd_event_now(s
->event
, CLOCK_MONOTONIC
, &when
);
1449 when
+= s
->sync_interval_usec
;
1451 if (!s
->sync_event_source
) {
1452 r
= sd_event_add_time(
1454 &s
->sync_event_source
,
1457 server_dispatch_sync
, s
);
1461 r
= sd_event_source_set_priority(s
->sync_event_source
, SD_EVENT_PRIORITY_IMPORTANT
);
1463 r
= sd_event_source_set_time(s
->sync_event_source
, when
);
1467 r
= sd_event_source_set_enabled(s
->sync_event_source
, SD_EVENT_ONESHOT
);
1472 s
->sync_scheduled
= true;
1478 static int dispatch_hostname_change(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1479 Server
*s
= userdata
;
1483 server_cache_hostname(s
);
1487 static int server_open_hostname(Server
*s
) {
1492 s
->hostname_fd
= open("/proc/sys/kernel/hostname", O_RDONLY
|O_CLOEXEC
|O_NDELAY
|O_NOCTTY
);
1493 if (s
->hostname_fd
< 0)
1494 return log_error_errno(errno
, "Failed to open /proc/sys/kernel/hostname: %m");
1496 r
= sd_event_add_io(s
->event
, &s
->hostname_event_source
, s
->hostname_fd
, 0, dispatch_hostname_change
, s
);
1498 /* kernels prior to 3.2 don't support polling this file. Ignore
1501 log_warning_errno(r
, "Failed to register hostname fd in event loop, ignoring: %m");
1502 s
->hostname_fd
= safe_close(s
->hostname_fd
);
1506 return log_error_errno(r
, "Failed to register hostname fd in event loop: %m");
1509 r
= sd_event_source_set_priority(s
->hostname_event_source
, SD_EVENT_PRIORITY_IMPORTANT
-10);
1511 return log_error_errno(r
, "Failed to adjust priority of host name event source: %m");
1516 static int dispatch_notify_event(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1517 Server
*s
= userdata
;
1521 assert(s
->notify_event_source
== es
);
1522 assert(s
->notify_fd
== fd
);
1524 /* The $NOTIFY_SOCKET is writable again, now send exactly one
1525 * message on it. Either it's the wtachdog event, the initial
1526 * READY=1 event or an stdout stream event. If there's nothing
1527 * to write anymore, turn our event source off. The next time
1528 * there's something to send it will be turned on again. */
1530 if (!s
->sent_notify_ready
) {
1531 static const char p
[] =
1533 "STATUS=Processing requests...";
1536 l
= send(s
->notify_fd
, p
, strlen(p
), MSG_DONTWAIT
);
1538 if (errno
== EAGAIN
)
1541 return log_error_errno(errno
, "Failed to send READY=1 notification message: %m");
1544 s
->sent_notify_ready
= true;
1545 log_debug("Sent READY=1 notification.");
1547 } else if (s
->send_watchdog
) {
1549 static const char p
[] =
1554 l
= send(s
->notify_fd
, p
, strlen(p
), MSG_DONTWAIT
);
1556 if (errno
== EAGAIN
)
1559 return log_error_errno(errno
, "Failed to send WATCHDOG=1 notification message: %m");
1562 s
->send_watchdog
= false;
1563 log_debug("Sent WATCHDOG=1 notification.");
1565 } else if (s
->stdout_streams_notify_queue
)
1566 /* Dispatch one stream notification event */
1567 stdout_stream_send_notify(s
->stdout_streams_notify_queue
);
1569 /* Leave us enabled if there's still more to to do. */
1570 if (s
->send_watchdog
|| s
->stdout_streams_notify_queue
)
1573 /* There was nothing to do anymore, let's turn ourselves off. */
1574 r
= sd_event_source_set_enabled(es
, SD_EVENT_OFF
);
1576 return log_error_errno(r
, "Failed to turn off notify event source: %m");
1581 static int dispatch_watchdog(sd_event_source
*es
, uint64_t usec
, void *userdata
) {
1582 Server
*s
= userdata
;
1587 s
->send_watchdog
= true;
1589 r
= sd_event_source_set_enabled(s
->notify_event_source
, SD_EVENT_ON
);
1591 log_warning_errno(r
, "Failed to turn on notify event source: %m");
1593 r
= sd_event_source_set_time(s
->watchdog_event_source
, usec
+ s
->watchdog_usec
/ 2);
1595 return log_error_errno(r
, "Failed to restart watchdog event source: %m");
1597 r
= sd_event_source_set_enabled(s
->watchdog_event_source
, SD_EVENT_ON
);
1599 return log_error_errno(r
, "Failed to enable watchdog event source: %m");
1604 static int server_connect_notify(Server
*s
) {
1605 union sockaddr_union sa
= {
1606 .un
.sun_family
= AF_UNIX
,
1612 assert(s
->notify_fd
< 0);
1613 assert(!s
->notify_event_source
);
1616 So here's the problem: we'd like to send notification
1617 messages to PID 1, but we cannot do that via sd_notify(),
1618 since that's synchronous, and we might end up blocking on
1619 it. Specifically: given that PID 1 might block on
1620 dbus-daemon during IPC, and dbus-daemon is logging to us,
1621 and might hence block on us, we might end up in a deadlock
1622 if we block on sending PID 1 notification messages -- by
1623 generating a full blocking circle. To avoid this, let's
1624 create a non-blocking socket, and connect it to the
1625 notification socket, and then wait for POLLOUT before we
1626 send anything. This should efficiently avoid any deadlocks,
1627 as we'll never block on PID 1, hence PID 1 can safely block
1628 on dbus-daemon which can safely block on us again.
1630 Don't think that this issue is real? It is, see:
1631 https://github.com/systemd/systemd/issues/1505
1634 e
= getenv("NOTIFY_SOCKET");
1638 if ((e
[0] != '@' && e
[0] != '/') || e
[1] == 0) {
1639 log_error("NOTIFY_SOCKET set to an invalid value: %s", e
);
1643 if (strlen(e
) > sizeof(sa
.un
.sun_path
)) {
1644 log_error("NOTIFY_SOCKET path too long: %s", e
);
1648 s
->notify_fd
= socket(AF_UNIX
, SOCK_DGRAM
|SOCK_CLOEXEC
|SOCK_NONBLOCK
, 0);
1649 if (s
->notify_fd
< 0)
1650 return log_error_errno(errno
, "Failed to create notify socket: %m");
1652 (void) fd_inc_sndbuf(s
->notify_fd
, NOTIFY_SNDBUF_SIZE
);
1654 strncpy(sa
.un
.sun_path
, e
, sizeof(sa
.un
.sun_path
));
1655 if (sa
.un
.sun_path
[0] == '@')
1656 sa
.un
.sun_path
[0] = 0;
1658 r
= connect(s
->notify_fd
, &sa
.sa
, offsetof(struct sockaddr_un
, sun_path
) + strlen(e
));
1660 return log_error_errno(errno
, "Failed to connect to notify socket: %m");
1662 r
= sd_event_add_io(s
->event
, &s
->notify_event_source
, s
->notify_fd
, EPOLLOUT
, dispatch_notify_event
, s
);
1664 return log_error_errno(r
, "Failed to watch notification socket: %m");
1666 if (sd_watchdog_enabled(false, &s
->watchdog_usec
) > 0) {
1667 s
->send_watchdog
= true;
1669 r
= sd_event_add_time(s
->event
, &s
->watchdog_event_source
, CLOCK_MONOTONIC
, now(CLOCK_MONOTONIC
) + s
->watchdog_usec
/2, s
->watchdog_usec
/4, dispatch_watchdog
, s
);
1671 return log_error_errno(r
, "Failed to add watchdog time event: %m");
1674 /* This should fire pretty soon, which we'll use to send the
1680 int server_init(Server
*s
) {
1681 _cleanup_fdset_free_ FDSet
*fds
= NULL
;
1688 s
->syslog_fd
= s
->native_fd
= s
->stdout_fd
= s
->dev_kmsg_fd
= s
->audit_fd
= s
->hostname_fd
= s
->notify_fd
= -1;
1692 s
->watchdog_usec
= USEC_INFINITY
;
1694 s
->sync_interval_usec
= DEFAULT_SYNC_INTERVAL_USEC
;
1695 s
->sync_scheduled
= false;
1697 s
->rate_limit_interval
= DEFAULT_RATE_LIMIT_INTERVAL
;
1698 s
->rate_limit_burst
= DEFAULT_RATE_LIMIT_BURST
;
1700 s
->forward_to_wall
= true;
1702 s
->max_file_usec
= DEFAULT_MAX_FILE_USEC
;
1704 s
->max_level_store
= LOG_DEBUG
;
1705 s
->max_level_syslog
= LOG_DEBUG
;
1706 s
->max_level_kmsg
= LOG_NOTICE
;
1707 s
->max_level_console
= LOG_INFO
;
1708 s
->max_level_wall
= LOG_EMERG
;
1710 journal_reset_metrics(&s
->system_metrics
);
1711 journal_reset_metrics(&s
->runtime_metrics
);
1713 server_parse_config_file(s
);
1714 server_parse_proc_cmdline(s
);
1716 if (!!s
->rate_limit_interval
^ !!s
->rate_limit_burst
) {
1717 log_debug("Setting both rate limit interval and burst from "USEC_FMT
",%u to 0,0",
1718 s
->rate_limit_interval
, s
->rate_limit_burst
);
1719 s
->rate_limit_interval
= s
->rate_limit_burst
= 0;
1722 (void) mkdir_p("/run/systemd/journal", 0755);
1724 s
->user_journals
= ordered_hashmap_new(NULL
);
1725 if (!s
->user_journals
)
1728 s
->mmap
= mmap_cache_new();
1732 r
= sd_event_default(&s
->event
);
1734 return log_error_errno(r
, "Failed to create event loop: %m");
1736 n
= sd_listen_fds(true);
1738 return log_error_errno(n
, "Failed to read listening file descriptors from environment: %m");
1740 for (fd
= SD_LISTEN_FDS_START
; fd
< SD_LISTEN_FDS_START
+ n
; fd
++) {
1742 if (sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/run/systemd/journal/socket", 0) > 0) {
1744 if (s
->native_fd
>= 0) {
1745 log_error("Too many native sockets passed.");
1751 } else if (sd_is_socket_unix(fd
, SOCK_STREAM
, 1, "/run/systemd/journal/stdout", 0) > 0) {
1753 if (s
->stdout_fd
>= 0) {
1754 log_error("Too many stdout sockets passed.");
1760 } else if (sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/dev/log", 0) > 0 ||
1761 sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/run/systemd/journal/dev-log", 0) > 0) {
1763 if (s
->syslog_fd
>= 0) {
1764 log_error("Too many /dev/log sockets passed.");
1770 } else if (sd_is_socket(fd
, AF_NETLINK
, SOCK_RAW
, -1) > 0) {
1772 if (s
->audit_fd
>= 0) {
1773 log_error("Too many audit sockets passed.");
1787 r
= fdset_put(fds
, fd
);
1793 /* Try to restore streams, but don't bother if this fails */
1794 (void) server_restore_streams(s
, fds
);
1796 if (fdset_size(fds
) > 0) {
1797 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds
));
1798 fds
= fdset_free(fds
);
1801 no_sockets
= s
->native_fd
< 0 && s
->stdout_fd
< 0 && s
->syslog_fd
< 0 && s
->audit_fd
< 0;
1803 /* always open stdout, syslog, native, and kmsg sockets */
1805 /* systemd-journald.socket: /run/systemd/journal/stdout */
1806 r
= server_open_stdout_socket(s
);
1810 /* systemd-journald-dev-log.socket: /run/systemd/journal/dev-log */
1811 r
= server_open_syslog_socket(s
);
1815 /* systemd-journald.socket: /run/systemd/journal/socket */
1816 r
= server_open_native_socket(s
);
1821 r
= server_open_dev_kmsg(s
);
1825 /* Unless we got *some* sockets and not audit, open audit socket */
1826 if (s
->audit_fd
>= 0 || no_sockets
) {
1827 r
= server_open_audit(s
);
1832 r
= server_open_kernel_seqnum(s
);
1836 r
= server_open_hostname(s
);
1840 r
= setup_signals(s
);
1844 s
->udev
= udev_new();
1848 s
->rate_limit
= journal_rate_limit_new(s
->rate_limit_interval
, s
->rate_limit_burst
);
1852 r
= cg_get_root_path(&s
->cgroup_root
);
1856 server_cache_hostname(s
);
1857 server_cache_boot_id(s
);
1858 server_cache_machine_id(s
);
1860 (void) server_connect_notify(s
);
1862 return system_journal_open(s
, false);
1865 void server_maybe_append_tags(Server
*s
) {
1871 n
= now(CLOCK_REALTIME
);
1873 if (s
->system_journal
)
1874 journal_file_maybe_append_tag(s
->system_journal
, n
);
1876 ORDERED_HASHMAP_FOREACH(f
, s
->user_journals
, i
)
1877 journal_file_maybe_append_tag(f
, n
);
1881 void server_done(Server
*s
) {
1885 while (s
->stdout_streams
)
1886 stdout_stream_free(s
->stdout_streams
);
1888 if (s
->system_journal
)
1889 journal_file_close(s
->system_journal
);
1891 if (s
->runtime_journal
)
1892 journal_file_close(s
->runtime_journal
);
1894 while ((f
= ordered_hashmap_steal_first(s
->user_journals
)))
1895 journal_file_close(f
);
1897 ordered_hashmap_free(s
->user_journals
);
1899 sd_event_source_unref(s
->syslog_event_source
);
1900 sd_event_source_unref(s
->native_event_source
);
1901 sd_event_source_unref(s
->stdout_event_source
);
1902 sd_event_source_unref(s
->dev_kmsg_event_source
);
1903 sd_event_source_unref(s
->audit_event_source
);
1904 sd_event_source_unref(s
->sync_event_source
);
1905 sd_event_source_unref(s
->sigusr1_event_source
);
1906 sd_event_source_unref(s
->sigusr2_event_source
);
1907 sd_event_source_unref(s
->sigterm_event_source
);
1908 sd_event_source_unref(s
->sigint_event_source
);
1909 sd_event_source_unref(s
->sigrtmin1_event_source
);
1910 sd_event_source_unref(s
->hostname_event_source
);
1911 sd_event_source_unref(s
->notify_event_source
);
1912 sd_event_source_unref(s
->watchdog_event_source
);
1913 sd_event_unref(s
->event
);
1915 safe_close(s
->syslog_fd
);
1916 safe_close(s
->native_fd
);
1917 safe_close(s
->stdout_fd
);
1918 safe_close(s
->dev_kmsg_fd
);
1919 safe_close(s
->audit_fd
);
1920 safe_close(s
->hostname_fd
);
1921 safe_close(s
->notify_fd
);
1924 journal_rate_limit_free(s
->rate_limit
);
1926 if (s
->kernel_seqnum
)
1927 munmap(s
->kernel_seqnum
, sizeof(uint64_t));
1931 free(s
->cgroup_root
);
1932 free(s
->hostname_field
);
1935 mmap_cache_unref(s
->mmap
);
1937 udev_unref(s
->udev
);
1940 static const char* const storage_table
[_STORAGE_MAX
] = {
1941 [STORAGE_AUTO
] = "auto",
1942 [STORAGE_VOLATILE
] = "volatile",
1943 [STORAGE_PERSISTENT
] = "persistent",
1944 [STORAGE_NONE
] = "none"
1947 DEFINE_STRING_TABLE_LOOKUP(storage
, Storage
);
1948 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage
, storage
, Storage
, "Failed to parse storage setting");
1950 static const char* const split_mode_table
[_SPLIT_MAX
] = {
1951 [SPLIT_LOGIN
] = "login",
1952 [SPLIT_UID
] = "uid",
1953 [SPLIT_NONE
] = "none",
1956 DEFINE_STRING_TABLE_LOOKUP(split_mode
, SplitMode
);
1957 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode
, split_mode
, SplitMode
, "Failed to parse split mode setting");