1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2011 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
23 #include <selinux/selinux.h>
25 #include <sys/ioctl.h>
27 #include <sys/signalfd.h>
28 #include <sys/statvfs.h>
29 #include <linux/sockios.h>
32 #include "sd-daemon.h"
33 #include "sd-journal.h"
34 #include "sd-messages.h"
37 #include "alloc-util.h"
38 #include "audit-util.h"
39 #include "cgroup-util.h"
40 #include "conf-parser.h"
41 #include "dirent-util.h"
42 #include "extract-word.h"
45 #include "formats-util.h"
48 #include "hostname-util.h"
50 #include "journal-authenticate.h"
51 #include "journal-file.h"
52 #include "journal-internal.h"
53 #include "journal-vacuum.h"
54 #include "journald-audit.h"
55 #include "journald-kmsg.h"
56 #include "journald-native.h"
57 #include "journald-rate-limit.h"
58 #include "journald-server.h"
59 #include "journald-stream.h"
60 #include "journald-syslog.h"
63 #include "parse-util.h"
64 #include "proc-cmdline.h"
65 #include "process-util.h"
67 #include "selinux-util.h"
68 #include "signal-util.h"
69 #include "socket-util.h"
70 #include "string-table.h"
71 #include "string-util.h"
72 #include "user-util.h"
75 #define USER_JOURNALS_MAX 1024
77 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
78 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
79 #define DEFAULT_RATE_LIMIT_BURST 1000
80 #define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
82 #define RECHECK_SPACE_USEC (30*USEC_PER_SEC)
84 #define NOTIFY_SNDBUF_SIZE (8*1024*1024)
86 static int determine_space_for(
88 JournalMetrics
*metrics
,
96 uint64_t sum
= 0, ss_avail
, avail
;
97 _cleanup_closedir_
DIR *d
= NULL
;
108 ts
= now(CLOCK_MONOTONIC
);
110 if (!verbose
&& s
->cached_space_timestamp
+ RECHECK_SPACE_USEC
> ts
) {
113 *available
= s
->cached_space_available
;
115 *limit
= s
->cached_space_limit
;
120 p
= strjoina(path
, SERVER_MACHINE_ID(s
));
123 return log_full_errno(errno
== ENOENT
? LOG_DEBUG
: LOG_ERR
, errno
, "Failed to open %s: %m", p
);
125 if (fstatvfs(dirfd(d
), &ss
) < 0)
126 return log_error_errno(errno
, "Failed to fstatvfs(%s): %m", p
);
128 FOREACH_DIRENT_ALL(de
, d
, break) {
131 if (!endswith(de
->d_name
, ".journal") &&
132 !endswith(de
->d_name
, ".journal~"))
135 if (fstatat(dirfd(d
), de
->d_name
, &st
, AT_SYMLINK_NOFOLLOW
) < 0) {
136 log_debug_errno(errno
, "Failed to stat %s/%s, ignoring: %m", p
, de
->d_name
);
140 if (!S_ISREG(st
.st_mode
))
143 sum
+= (uint64_t) st
.st_blocks
* 512UL;
146 /* If requested, then let's bump the min_use limit to the
147 * current usage on disk. We do this when starting up and
148 * first opening the journal files. This way sudden spikes in
149 * disk usage will not cause journald to vacuum files without
150 * bounds. Note that this means that only a restart of
151 * journald will make it reset this value. */
154 metrics
->min_use
= MAX(metrics
->min_use
, sum
);
156 ss_avail
= ss
.f_bsize
* ss
.f_bavail
;
157 avail
= LESS_BY(ss_avail
, metrics
->keep_free
);
159 s
->cached_space_limit
= MIN(MAX(sum
+ avail
, metrics
->min_use
), metrics
->max_use
);
160 s
->cached_space_available
= LESS_BY(s
->cached_space_limit
, sum
);
161 s
->cached_space_timestamp
= ts
;
164 char fb1
[FORMAT_BYTES_MAX
], fb2
[FORMAT_BYTES_MAX
], fb3
[FORMAT_BYTES_MAX
],
165 fb4
[FORMAT_BYTES_MAX
], fb5
[FORMAT_BYTES_MAX
], fb6
[FORMAT_BYTES_MAX
];
166 format_bytes(fb1
, sizeof(fb1
), sum
);
167 format_bytes(fb2
, sizeof(fb2
), metrics
->max_use
);
168 format_bytes(fb3
, sizeof(fb3
), metrics
->keep_free
);
169 format_bytes(fb4
, sizeof(fb4
), ss_avail
);
170 format_bytes(fb5
, sizeof(fb5
), s
->cached_space_limit
);
171 format_bytes(fb6
, sizeof(fb6
), s
->cached_space_available
);
173 server_driver_message(s
, SD_MESSAGE_JOURNAL_USAGE
,
174 LOG_MESSAGE("%s (%s) is %s, max %s, %s free.",
175 name
, path
, fb1
, fb5
, fb6
),
176 "JOURNAL_NAME=%s", name
,
177 "JOURNAL_PATH=%s", path
,
178 "CURRENT_USE=%"PRIu64
, sum
,
179 "CURRENT_USE_PRETTY=%s", fb1
,
180 "MAX_USE=%"PRIu64
, metrics
->max_use
,
181 "MAX_USE_PRETTY=%s", fb2
,
182 "DISK_KEEP_FREE=%"PRIu64
, metrics
->keep_free
,
183 "DISK_KEEP_FREE_PRETTY=%s", fb3
,
184 "DISK_AVAILABLE=%"PRIu64
, ss_avail
,
185 "DISK_AVAILABLE_PRETTY=%s", fb4
,
186 "LIMIT=%"PRIu64
, s
->cached_space_limit
,
187 "LIMIT_PRETTY=%s", fb5
,
188 "AVAILABLE=%"PRIu64
, s
->cached_space_available
,
189 "AVAILABLE_PRETTY=%s", fb6
,
194 *available
= s
->cached_space_available
;
196 *limit
= s
->cached_space_limit
;
201 static int determine_space(Server
*s
, bool verbose
, bool patch_min_use
, uint64_t *available
, uint64_t *limit
) {
202 JournalMetrics
*metrics
;
203 const char *path
, *name
;
207 if (s
->system_journal
) {
208 path
= "/var/log/journal/";
209 metrics
= &s
->system_metrics
;
210 name
= "System journal";
212 path
= "/run/log/journal/";
213 metrics
= &s
->runtime_metrics
;
214 name
= "Runtime journal";
217 return determine_space_for(s
, metrics
, path
, name
, verbose
, patch_min_use
, available
, limit
);
220 static void server_add_acls(JournalFile
*f
, uid_t uid
) {
227 if (uid
<= SYSTEM_UID_MAX
)
230 r
= add_acls_for_user(f
->fd
, uid
);
232 log_warning_errno(r
, "Failed to set ACL on %s, ignoring: %m", f
->path
);
236 static JournalFile
* find_journal(Server
*s
, uid_t uid
) {
237 _cleanup_free_
char *p
= NULL
;
244 /* We split up user logs only on /var, not on /run. If the
245 * runtime file is open, we write to it exclusively, in order
246 * to guarantee proper order as soon as we flush /run to
247 * /var and close the runtime file. */
249 if (s
->runtime_journal
)
250 return s
->runtime_journal
;
252 if (uid
<= SYSTEM_UID_MAX
)
253 return s
->system_journal
;
255 r
= sd_id128_get_machine(&machine
);
257 return s
->system_journal
;
259 f
= ordered_hashmap_get(s
->user_journals
, UID_TO_PTR(uid
));
263 if (asprintf(&p
, "/var/log/journal/" SD_ID128_FORMAT_STR
"/user-"UID_FMT
".journal",
264 SD_ID128_FORMAT_VAL(machine
), uid
) < 0)
265 return s
->system_journal
;
267 while (ordered_hashmap_size(s
->user_journals
) >= USER_JOURNALS_MAX
) {
268 /* Too many open? Then let's close one */
269 f
= ordered_hashmap_steal_first(s
->user_journals
);
271 journal_file_close(f
);
274 r
= journal_file_open_reliably(p
, O_RDWR
|O_CREAT
, 0640, s
->compress
, s
->seal
, &s
->system_metrics
, s
->mmap
, NULL
, &f
);
276 return s
->system_journal
;
278 server_add_acls(f
, uid
);
280 r
= ordered_hashmap_put(s
->user_journals
, UID_TO_PTR(uid
), f
);
282 journal_file_close(f
);
283 return s
->system_journal
;
289 static int do_rotate(
302 r
= journal_file_rotate(f
, s
->compress
, seal
);
305 log_error_errno(r
, "Failed to rotate %s: %m", (*f
)->path
);
307 log_error_errno(r
, "Failed to create new %s journal: %m", name
);
309 server_add_acls(*f
, uid
);
314 void server_rotate(Server
*s
) {
320 log_debug("Rotating...");
322 (void) do_rotate(s
, &s
->runtime_journal
, "runtime", false, 0);
323 (void) do_rotate(s
, &s
->system_journal
, "system", s
->seal
, 0);
325 ORDERED_HASHMAP_FOREACH_KEY(f
, k
, s
->user_journals
, i
) {
326 r
= do_rotate(s
, &f
, "user", s
->seal
, PTR_TO_UID(k
));
328 ordered_hashmap_replace(s
->user_journals
, k
, f
);
330 /* Old file has been closed and deallocated */
331 ordered_hashmap_remove(s
->user_journals
, k
);
335 void server_sync(Server
*s
) {
340 if (s
->system_journal
) {
341 r
= journal_file_set_offline(s
->system_journal
);
343 log_warning_errno(r
, "Failed to sync system journal, ignoring: %m");
346 ORDERED_HASHMAP_FOREACH(f
, s
->user_journals
, i
) {
347 r
= journal_file_set_offline(f
);
349 log_warning_errno(r
, "Failed to sync user journal, ignoring: %m");
352 if (s
->sync_event_source
) {
353 r
= sd_event_source_set_enabled(s
->sync_event_source
, SD_EVENT_OFF
);
355 log_error_errno(r
, "Failed to disable sync timer source: %m");
358 s
->sync_scheduled
= false;
361 static void do_vacuum(
364 JournalMetrics
*metrics
,
368 bool patch_min_use
) {
382 p
= strjoina(path
, SERVER_MACHINE_ID(s
));
384 limit
= metrics
->max_use
;
385 (void) determine_space_for(s
, metrics
, path
, name
, verbose
, patch_min_use
, NULL
, &limit
);
387 r
= journal_directory_vacuum(p
, limit
, metrics
->n_max_files
, s
->max_retention_usec
, &s
->oldest_file_usec
, verbose
);
388 if (r
< 0 && r
!= -ENOENT
)
389 log_warning_errno(r
, "Failed to vacuum %s, ignoring: %m", p
);
392 int server_vacuum(Server
*s
, bool verbose
, bool patch_min_use
) {
395 log_debug("Vacuuming...");
397 s
->oldest_file_usec
= 0;
399 do_vacuum(s
, s
->system_journal
, &s
->system_metrics
, "/var/log/journal/", "System journal", verbose
, patch_min_use
);
400 do_vacuum(s
, s
->runtime_journal
, &s
->runtime_metrics
, "/run/log/journal/", "Runtime journal", verbose
, patch_min_use
);
402 s
->cached_space_limit
= 0;
403 s
->cached_space_available
= 0;
404 s
->cached_space_timestamp
= 0;
409 static void server_cache_machine_id(Server
*s
) {
415 r
= sd_id128_get_machine(&id
);
419 sd_id128_to_string(id
, stpcpy(s
->machine_id_field
, "_MACHINE_ID="));
422 static void server_cache_boot_id(Server
*s
) {
428 r
= sd_id128_get_boot(&id
);
432 sd_id128_to_string(id
, stpcpy(s
->boot_id_field
, "_BOOT_ID="));
435 static void server_cache_hostname(Server
*s
) {
436 _cleanup_free_
char *t
= NULL
;
441 t
= gethostname_malloc();
445 x
= strappend("_HOSTNAME=", t
);
449 free(s
->hostname_field
);
450 s
->hostname_field
= x
;
453 static bool shall_try_append_again(JournalFile
*f
, int r
) {
455 /* -E2BIG Hit configured limit
457 -EDQUOT Quota limit hit
459 -EIO I/O error of some kind (mmap)
460 -EHOSTDOWN Other machine
461 -EBUSY Unclean shutdown
462 -EPROTONOSUPPORT Unsupported feature
465 -ESHUTDOWN Already archived
466 -EIDRM Journal file has been deleted */
468 if (r
== -E2BIG
|| r
== -EFBIG
|| r
== -EDQUOT
|| r
== -ENOSPC
)
469 log_debug("%s: Allocation limit reached, rotating.", f
->path
);
470 else if (r
== -EHOSTDOWN
)
471 log_info("%s: Journal file from other machine, rotating.", f
->path
);
472 else if (r
== -EBUSY
)
473 log_info("%s: Unclean shutdown, rotating.", f
->path
);
474 else if (r
== -EPROTONOSUPPORT
)
475 log_info("%s: Unsupported feature, rotating.", f
->path
);
476 else if (r
== -EBADMSG
|| r
== -ENODATA
|| r
== ESHUTDOWN
)
477 log_warning("%s: Journal file corrupted, rotating.", f
->path
);
479 log_warning("%s: IO error, rotating.", f
->path
);
480 else if (r
== -EIDRM
)
481 log_warning("%s: Journal file has been deleted, rotating.", f
->path
);
488 static void write_to_journal(Server
*s
, uid_t uid
, struct iovec
*iovec
, unsigned n
, int priority
) {
490 bool vacuumed
= false;
497 f
= find_journal(s
, uid
);
501 if (journal_file_rotate_suggested(f
, s
->max_file_usec
)) {
502 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f
->path
);
504 server_vacuum(s
, false, false);
507 f
= find_journal(s
, uid
);
512 r
= journal_file_append_entry(f
, NULL
, iovec
, n
, &s
->seqnum
, NULL
, NULL
);
514 server_schedule_sync(s
, priority
);
518 if (vacuumed
|| !shall_try_append_again(f
, r
)) {
519 log_error_errno(r
, "Failed to write entry (%d items, %zu bytes), ignoring: %m", n
, IOVEC_TOTAL_SIZE(iovec
, n
));
524 server_vacuum(s
, false, false);
526 f
= find_journal(s
, uid
);
530 log_debug("Retrying write.");
531 r
= journal_file_append_entry(f
, NULL
, iovec
, n
, &s
->seqnum
, NULL
, NULL
);
533 log_error_errno(r
, "Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %m", n
, IOVEC_TOTAL_SIZE(iovec
, n
));
535 server_schedule_sync(s
, priority
);
538 static void dispatch_message_real(
540 struct iovec
*iovec
, unsigned n
, unsigned m
,
541 const struct ucred
*ucred
,
542 const struct timeval
*tv
,
543 const char *label
, size_t label_len
,
548 char pid
[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t
)],
549 uid
[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t
)],
550 gid
[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t
)],
551 owner_uid
[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t
)],
552 source_time
[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t
)],
553 o_uid
[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t
)],
554 o_gid
[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t
)],
555 o_owner_uid
[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t
)];
561 uid_t realuid
= 0, owner
= 0, journal_uid
;
562 bool owner_valid
= false;
564 char audit_session
[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
565 audit_loginuid
[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t
)],
566 o_audit_session
[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
567 o_audit_loginuid
[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t
)];
576 assert(n
+ N_IOVEC_META_FIELDS
+ (object_pid
? N_IOVEC_OBJECT_FIELDS
: 0) <= m
);
579 realuid
= ucred
->uid
;
581 sprintf(pid
, "_PID="PID_FMT
, ucred
->pid
);
582 IOVEC_SET_STRING(iovec
[n
++], pid
);
584 sprintf(uid
, "_UID="UID_FMT
, ucred
->uid
);
585 IOVEC_SET_STRING(iovec
[n
++], uid
);
587 sprintf(gid
, "_GID="GID_FMT
, ucred
->gid
);
588 IOVEC_SET_STRING(iovec
[n
++], gid
);
590 r
= get_process_comm(ucred
->pid
, &t
);
592 x
= strjoina("_COMM=", t
);
594 IOVEC_SET_STRING(iovec
[n
++], x
);
597 r
= get_process_exe(ucred
->pid
, &t
);
599 x
= strjoina("_EXE=", t
);
601 IOVEC_SET_STRING(iovec
[n
++], x
);
604 r
= get_process_cmdline(ucred
->pid
, 0, false, &t
);
606 x
= strjoina("_CMDLINE=", t
);
608 IOVEC_SET_STRING(iovec
[n
++], x
);
611 r
= get_process_capeff(ucred
->pid
, &t
);
613 x
= strjoina("_CAP_EFFECTIVE=", t
);
615 IOVEC_SET_STRING(iovec
[n
++], x
);
619 r
= audit_session_from_pid(ucred
->pid
, &audit
);
621 sprintf(audit_session
, "_AUDIT_SESSION=%"PRIu32
, audit
);
622 IOVEC_SET_STRING(iovec
[n
++], audit_session
);
625 r
= audit_loginuid_from_pid(ucred
->pid
, &loginuid
);
627 sprintf(audit_loginuid
, "_AUDIT_LOGINUID="UID_FMT
, loginuid
);
628 IOVEC_SET_STRING(iovec
[n
++], audit_loginuid
);
632 r
= cg_pid_get_path_shifted(ucred
->pid
, s
->cgroup_root
, &c
);
634 char *session
= NULL
;
636 x
= strjoina("_SYSTEMD_CGROUP=", c
);
637 IOVEC_SET_STRING(iovec
[n
++], x
);
639 r
= cg_path_get_session(c
, &t
);
641 session
= strjoina("_SYSTEMD_SESSION=", t
);
643 IOVEC_SET_STRING(iovec
[n
++], session
);
646 if (cg_path_get_owner_uid(c
, &owner
) >= 0) {
649 sprintf(owner_uid
, "_SYSTEMD_OWNER_UID="UID_FMT
, owner
);
650 IOVEC_SET_STRING(iovec
[n
++], owner_uid
);
653 if (cg_path_get_unit(c
, &t
) >= 0) {
654 x
= strjoina("_SYSTEMD_UNIT=", t
);
656 IOVEC_SET_STRING(iovec
[n
++], x
);
657 } else if (unit_id
&& !session
) {
658 x
= strjoina("_SYSTEMD_UNIT=", unit_id
);
659 IOVEC_SET_STRING(iovec
[n
++], x
);
662 if (cg_path_get_user_unit(c
, &t
) >= 0) {
663 x
= strjoina("_SYSTEMD_USER_UNIT=", t
);
665 IOVEC_SET_STRING(iovec
[n
++], x
);
666 } else if (unit_id
&& session
) {
667 x
= strjoina("_SYSTEMD_USER_UNIT=", unit_id
);
668 IOVEC_SET_STRING(iovec
[n
++], x
);
671 if (cg_path_get_slice(c
, &t
) >= 0) {
672 x
= strjoina("_SYSTEMD_SLICE=", t
);
674 IOVEC_SET_STRING(iovec
[n
++], x
);
678 } else if (unit_id
) {
679 x
= strjoina("_SYSTEMD_UNIT=", unit_id
);
680 IOVEC_SET_STRING(iovec
[n
++], x
);
684 if (mac_selinux_have()) {
686 x
= alloca(strlen("_SELINUX_CONTEXT=") + label_len
+ 1);
688 *((char*) mempcpy(stpcpy(x
, "_SELINUX_CONTEXT="), label
, label_len
)) = 0;
689 IOVEC_SET_STRING(iovec
[n
++], x
);
691 security_context_t con
;
693 if (getpidcon(ucred
->pid
, &con
) >= 0) {
694 x
= strjoina("_SELINUX_CONTEXT=", con
);
697 IOVEC_SET_STRING(iovec
[n
++], x
);
706 r
= get_process_uid(object_pid
, &object_uid
);
708 sprintf(o_uid
, "OBJECT_UID="UID_FMT
, object_uid
);
709 IOVEC_SET_STRING(iovec
[n
++], o_uid
);
712 r
= get_process_gid(object_pid
, &object_gid
);
714 sprintf(o_gid
, "OBJECT_GID="GID_FMT
, object_gid
);
715 IOVEC_SET_STRING(iovec
[n
++], o_gid
);
718 r
= get_process_comm(object_pid
, &t
);
720 x
= strjoina("OBJECT_COMM=", t
);
722 IOVEC_SET_STRING(iovec
[n
++], x
);
725 r
= get_process_exe(object_pid
, &t
);
727 x
= strjoina("OBJECT_EXE=", t
);
729 IOVEC_SET_STRING(iovec
[n
++], x
);
732 r
= get_process_cmdline(object_pid
, 0, false, &t
);
734 x
= strjoina("OBJECT_CMDLINE=", t
);
736 IOVEC_SET_STRING(iovec
[n
++], x
);
740 r
= audit_session_from_pid(object_pid
, &audit
);
742 sprintf(o_audit_session
, "OBJECT_AUDIT_SESSION=%"PRIu32
, audit
);
743 IOVEC_SET_STRING(iovec
[n
++], o_audit_session
);
746 r
= audit_loginuid_from_pid(object_pid
, &loginuid
);
748 sprintf(o_audit_loginuid
, "OBJECT_AUDIT_LOGINUID="UID_FMT
, loginuid
);
749 IOVEC_SET_STRING(iovec
[n
++], o_audit_loginuid
);
753 r
= cg_pid_get_path_shifted(object_pid
, s
->cgroup_root
, &c
);
755 x
= strjoina("OBJECT_SYSTEMD_CGROUP=", c
);
756 IOVEC_SET_STRING(iovec
[n
++], x
);
758 r
= cg_path_get_session(c
, &t
);
760 x
= strjoina("OBJECT_SYSTEMD_SESSION=", t
);
762 IOVEC_SET_STRING(iovec
[n
++], x
);
765 if (cg_path_get_owner_uid(c
, &owner
) >= 0) {
766 sprintf(o_owner_uid
, "OBJECT_SYSTEMD_OWNER_UID="UID_FMT
, owner
);
767 IOVEC_SET_STRING(iovec
[n
++], o_owner_uid
);
770 if (cg_path_get_unit(c
, &t
) >= 0) {
771 x
= strjoina("OBJECT_SYSTEMD_UNIT=", t
);
773 IOVEC_SET_STRING(iovec
[n
++], x
);
776 if (cg_path_get_user_unit(c
, &t
) >= 0) {
777 x
= strjoina("OBJECT_SYSTEMD_USER_UNIT=", t
);
779 IOVEC_SET_STRING(iovec
[n
++], x
);
788 sprintf(source_time
, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv
));
789 IOVEC_SET_STRING(iovec
[n
++], source_time
);
792 /* Note that strictly speaking storing the boot id here is
793 * redundant since the entry includes this in-line
794 * anyway. However, we need this indexed, too. */
795 if (!isempty(s
->boot_id_field
))
796 IOVEC_SET_STRING(iovec
[n
++], s
->boot_id_field
);
798 if (!isempty(s
->machine_id_field
))
799 IOVEC_SET_STRING(iovec
[n
++], s
->machine_id_field
);
801 if (!isempty(s
->hostname_field
))
802 IOVEC_SET_STRING(iovec
[n
++], s
->hostname_field
);
806 if (s
->split_mode
== SPLIT_UID
&& realuid
> 0)
807 /* Split up strictly by any UID */
808 journal_uid
= realuid
;
809 else if (s
->split_mode
== SPLIT_LOGIN
&& realuid
> 0 && owner_valid
&& owner
> 0)
810 /* Split up by login UIDs. We do this only if the
811 * realuid is not root, in order not to accidentally
812 * leak privileged information to the user that is
813 * logged by a privileged process that is part of an
814 * unprivileged session. */
819 write_to_journal(s
, journal_uid
, iovec
, n
, priority
);
822 void server_driver_message(Server
*s
, sd_id128_t message_id
, const char *format
, ...) {
823 char mid
[11 + 32 + 1];
824 struct iovec iovec
[N_IOVEC_META_FIELDS
+ 5 + N_IOVEC_PAYLOAD_FIELDS
];
827 struct ucred ucred
= {};
832 IOVEC_SET_STRING(iovec
[n
++], "SYSLOG_FACILITY=3");
833 IOVEC_SET_STRING(iovec
[n
++], "SYSLOG_IDENTIFIER=systemd-journald");
835 IOVEC_SET_STRING(iovec
[n
++], "PRIORITY=6");
836 IOVEC_SET_STRING(iovec
[n
++], "_TRANSPORT=driver");
838 if (!sd_id128_equal(message_id
, SD_ID128_NULL
)) {
839 snprintf(mid
, sizeof(mid
), LOG_MESSAGE_ID(message_id
));
840 IOVEC_SET_STRING(iovec
[n
++], mid
);
845 va_start(ap
, format
);
846 assert_se(log_format_iovec(iovec
, ELEMENTSOF(iovec
), &n
, false, 0, format
, ap
) >= 0);
849 ucred
.pid
= getpid();
850 ucred
.uid
= getuid();
851 ucred
.gid
= getgid();
853 dispatch_message_real(s
, iovec
, n
, ELEMENTSOF(iovec
), &ucred
, NULL
, NULL
, 0, NULL
, LOG_INFO
, 0);
856 free(iovec
[m
++].iov_base
);
859 void server_dispatch_message(
861 struct iovec
*iovec
, unsigned n
, unsigned m
,
862 const struct ucred
*ucred
,
863 const struct timeval
*tv
,
864 const char *label
, size_t label_len
,
870 _cleanup_free_
char *path
= NULL
;
871 uint64_t available
= 0;
875 assert(iovec
|| n
== 0);
880 if (LOG_PRI(priority
) > s
->max_level_store
)
883 /* Stop early in case the information will not be stored
885 if (s
->storage
== STORAGE_NONE
)
891 r
= cg_pid_get_path_shifted(ucred
->pid
, s
->cgroup_root
, &path
);
895 /* example: /user/lennart/3/foobar
896 * /system/dbus.service/foobar
898 * So let's cut of everything past the third /, since that is
899 * where user directories start */
901 c
= strchr(path
, '/');
903 c
= strchr(c
+1, '/');
905 c
= strchr(c
+1, '/');
911 (void) determine_space(s
, false, false, &available
, NULL
);
912 rl
= journal_rate_limit_test(s
->rate_limit
, path
, priority
& LOG_PRIMASK
, available
);
916 /* Write a suppression message if we suppressed something */
918 server_driver_message(s
, SD_MESSAGE_JOURNAL_DROPPED
,
919 LOG_MESSAGE("Suppressed %u messages from %s", rl
- 1, path
),
923 dispatch_message_real(s
, iovec
, n
, m
, ucred
, tv
, label
, label_len
, unit_id
, priority
, object_pid
);
927 static int system_journal_open(Server
*s
, bool flush_requested
) {
931 if (!s
->system_journal
&&
932 (s
->storage
== STORAGE_PERSISTENT
|| s
->storage
== STORAGE_AUTO
) &&
934 || access("/run/systemd/journal/flushed", F_OK
) >= 0)) {
936 /* If in auto mode: first try to create the machine
937 * path, but not the prefix.
939 * If in persistent mode: create /var/log/journal and
940 * the machine path */
942 if (s
->storage
== STORAGE_PERSISTENT
)
943 (void) mkdir_p("/var/log/journal/", 0755);
945 fn
= strjoina("/var/log/journal/", SERVER_MACHINE_ID(s
));
946 (void) mkdir(fn
, 0755);
948 fn
= strjoina(fn
, "/system.journal");
949 r
= journal_file_open_reliably(fn
, O_RDWR
|O_CREAT
, 0640, s
->compress
, s
->seal
, &s
->system_metrics
, s
->mmap
, NULL
, &s
->system_journal
);
951 server_add_acls(s
->system_journal
, 0);
952 (void) determine_space_for(s
, &s
->system_metrics
, "/var/log/journal/", "System journal", true, true, NULL
, NULL
);
954 if (r
!= -ENOENT
&& r
!= -EROFS
)
955 log_warning_errno(r
, "Failed to open system journal: %m");
961 if (!s
->runtime_journal
&&
962 (s
->storage
!= STORAGE_NONE
)) {
964 fn
= strjoina("/run/log/journal/", SERVER_MACHINE_ID(s
), "/system.journal");
966 if (s
->system_journal
) {
968 /* Try to open the runtime journal, but only
969 * if it already exists, so that we can flush
970 * it into the system journal */
972 r
= journal_file_open(fn
, O_RDWR
, 0640, s
->compress
, false, &s
->runtime_metrics
, s
->mmap
, NULL
, &s
->runtime_journal
);
975 log_warning_errno(r
, "Failed to open runtime journal: %m");
982 /* OK, we really need the runtime journal, so create
983 * it if necessary. */
985 (void) mkdir("/run/log", 0755);
986 (void) mkdir("/run/log/journal", 0755);
987 (void) mkdir_parents(fn
, 0750);
989 r
= journal_file_open_reliably(fn
, O_RDWR
|O_CREAT
, 0640, s
->compress
, false, &s
->runtime_metrics
, s
->mmap
, NULL
, &s
->runtime_journal
);
991 return log_error_errno(r
, "Failed to open runtime journal: %m");
994 if (s
->runtime_journal
) {
995 server_add_acls(s
->runtime_journal
, 0);
996 (void) determine_space_for(s
, &s
->runtime_metrics
, "/run/log/journal/", "Runtime journal", true, true, NULL
, NULL
);
1003 int server_flush_to_var(Server
*s
) {
1005 sd_journal
*j
= NULL
;
1006 char ts
[FORMAT_TIMESPAN_MAX
];
1013 if (s
->storage
!= STORAGE_AUTO
&&
1014 s
->storage
!= STORAGE_PERSISTENT
)
1017 if (!s
->runtime_journal
)
1020 (void) system_journal_open(s
, true);
1022 if (!s
->system_journal
)
1025 log_debug("Flushing to /var...");
1027 start
= now(CLOCK_MONOTONIC
);
1029 r
= sd_id128_get_machine(&machine
);
1033 r
= sd_journal_open(&j
, SD_JOURNAL_RUNTIME_ONLY
);
1035 return log_error_errno(r
, "Failed to read runtime journal: %m");
1037 sd_journal_set_data_threshold(j
, 0);
1039 SD_JOURNAL_FOREACH(j
) {
1043 f
= j
->current_file
;
1044 assert(f
&& f
->current_offset
> 0);
1048 r
= journal_file_move_to_object(f
, OBJECT_ENTRY
, f
->current_offset
, &o
);
1050 log_error_errno(r
, "Can't read entry: %m");
1054 r
= journal_file_copy_entry(f
, s
->system_journal
, o
, f
->current_offset
, NULL
, NULL
, NULL
);
1058 if (!shall_try_append_again(s
->system_journal
, r
)) {
1059 log_error_errno(r
, "Can't write entry: %m");
1064 server_vacuum(s
, false, false);
1066 if (!s
->system_journal
) {
1067 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1072 log_debug("Retrying write.");
1073 r
= journal_file_copy_entry(f
, s
->system_journal
, o
, f
->current_offset
, NULL
, NULL
, NULL
);
1075 log_error_errno(r
, "Can't write entry: %m");
1083 journal_file_post_change(s
->system_journal
);
1085 s
->runtime_journal
= journal_file_close(s
->runtime_journal
);
1088 (void) rm_rf("/run/log/journal", REMOVE_ROOT
);
1090 sd_journal_close(j
);
1092 server_driver_message(s
, SD_ID128_NULL
,
1093 LOG_MESSAGE("Time spent on flushing to /var is %s for %u entries.",
1094 format_timespan(ts
, sizeof(ts
), now(CLOCK_MONOTONIC
) - start
, 0),
1101 int server_process_datagram(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1102 Server
*s
= userdata
;
1103 struct ucred
*ucred
= NULL
;
1104 struct timeval
*tv
= NULL
;
1105 struct cmsghdr
*cmsg
;
1107 size_t label_len
= 0, m
;
1110 int *fds
= NULL
, v
= 0;
1114 struct cmsghdr cmsghdr
;
1116 /* We use NAME_MAX space for the SELinux label
1117 * here. The kernel currently enforces no
1118 * limit, but according to suggestions from
1119 * the SELinux people this will change and it
1120 * will probably be identical to NAME_MAX. For
1121 * now we use that, but this should be updated
1122 * one day when the final limit is known. */
1123 uint8_t buf
[CMSG_SPACE(sizeof(struct ucred
)) +
1124 CMSG_SPACE(sizeof(struct timeval
)) +
1125 CMSG_SPACE(sizeof(int)) + /* fd */
1126 CMSG_SPACE(NAME_MAX
)]; /* selinux label */
1129 union sockaddr_union sa
= {};
1131 struct msghdr msghdr
= {
1134 .msg_control
= &control
,
1135 .msg_controllen
= sizeof(control
),
1137 .msg_namelen
= sizeof(sa
),
1141 assert(fd
== s
->native_fd
|| fd
== s
->syslog_fd
|| fd
== s
->audit_fd
);
1143 if (revents
!= EPOLLIN
) {
1144 log_error("Got invalid event from epoll for datagram fd: %"PRIx32
, revents
);
1148 /* Try to get the right size, if we can. (Not all
1149 * sockets support SIOCINQ, hence we just try, but
1150 * don't rely on it. */
1151 (void) ioctl(fd
, SIOCINQ
, &v
);
1153 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1154 m
= PAGE_ALIGN(MAX3((size_t) v
+ 1,
1156 ALIGN(sizeof(struct nlmsghdr
)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH
)) + 1);
1158 if (!GREEDY_REALLOC(s
->buffer
, s
->buffer_size
, m
))
1161 iovec
.iov_base
= s
->buffer
;
1162 iovec
.iov_len
= s
->buffer_size
- 1; /* Leave room for trailing NUL we add later */
1164 n
= recvmsg(fd
, &msghdr
, MSG_DONTWAIT
|MSG_CMSG_CLOEXEC
);
1166 if (errno
== EINTR
|| errno
== EAGAIN
)
1169 return log_error_errno(errno
, "recvmsg() failed: %m");
1172 CMSG_FOREACH(cmsg
, &msghdr
) {
1174 if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1175 cmsg
->cmsg_type
== SCM_CREDENTIALS
&&
1176 cmsg
->cmsg_len
== CMSG_LEN(sizeof(struct ucred
)))
1177 ucred
= (struct ucred
*) CMSG_DATA(cmsg
);
1178 else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1179 cmsg
->cmsg_type
== SCM_SECURITY
) {
1180 label
= (char*) CMSG_DATA(cmsg
);
1181 label_len
= cmsg
->cmsg_len
- CMSG_LEN(0);
1182 } else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1183 cmsg
->cmsg_type
== SO_TIMESTAMP
&&
1184 cmsg
->cmsg_len
== CMSG_LEN(sizeof(struct timeval
)))
1185 tv
= (struct timeval
*) CMSG_DATA(cmsg
);
1186 else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1187 cmsg
->cmsg_type
== SCM_RIGHTS
) {
1188 fds
= (int*) CMSG_DATA(cmsg
);
1189 n_fds
= (cmsg
->cmsg_len
- CMSG_LEN(0)) / sizeof(int);
1193 /* And a trailing NUL, just in case */
1196 if (fd
== s
->syslog_fd
) {
1197 if (n
> 0 && n_fds
== 0)
1198 server_process_syslog_message(s
, strstrip(s
->buffer
), ucred
, tv
, label
, label_len
);
1200 log_warning("Got file descriptors via syslog socket. Ignoring.");
1202 } else if (fd
== s
->native_fd
) {
1203 if (n
> 0 && n_fds
== 0)
1204 server_process_native_message(s
, s
->buffer
, n
, ucred
, tv
, label
, label_len
);
1205 else if (n
== 0 && n_fds
== 1)
1206 server_process_native_file(s
, fds
[0], ucred
, tv
, label
, label_len
);
1208 log_warning("Got too many file descriptors via native socket. Ignoring.");
1211 assert(fd
== s
->audit_fd
);
1213 if (n
> 0 && n_fds
== 0)
1214 server_process_audit_message(s
, s
->buffer
, n
, ucred
, &sa
, msghdr
.msg_namelen
);
1216 log_warning("Got file descriptors via audit socket. Ignoring.");
1219 close_many(fds
, n_fds
);
1223 static int dispatch_sigusr1(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1224 Server
*s
= userdata
;
1229 log_info("Received request to flush runtime journal from PID " PID_FMT
, si
->ssi_pid
);
1231 server_flush_to_var(s
);
1233 server_vacuum(s
, false, false);
1235 r
= touch("/run/systemd/journal/flushed");
1237 log_warning_errno(r
, "Failed to touch /run/systemd/journal/flushed, ignoring: %m");
1242 static int dispatch_sigusr2(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1243 Server
*s
= userdata
;
1248 log_info("Received request to rotate journal from PID " PID_FMT
, si
->ssi_pid
);
1250 server_vacuum(s
, true, true);
1252 /* Let clients know when the most recent rotation happened. */
1253 r
= write_timestamp_file_atomic("/run/systemd/journal/rotated", now(CLOCK_MONOTONIC
));
1255 log_warning_errno(r
, "Failed to write /run/systemd/journal/rotated, ignoring: %m");
1260 static int dispatch_sigterm(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1261 Server
*s
= userdata
;
1265 log_received_signal(LOG_INFO
, si
);
1267 sd_event_exit(s
->event
, 0);
1271 static int dispatch_sigrtmin1(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1272 Server
*s
= userdata
;
1277 log_debug("Received request to sync from PID " PID_FMT
, si
->ssi_pid
);
1281 /* Let clients know when the most recent sync happened. */
1282 r
= write_timestamp_file_atomic("/run/systemd/journal/synced", now(CLOCK_MONOTONIC
));
1284 log_warning_errno(r
, "Failed to write /run/systemd/journal/synced, ignoring: %m");
1289 static int setup_signals(Server
*s
) {
1294 assert(sigprocmask_many(SIG_SETMASK
, NULL
, SIGINT
, SIGTERM
, SIGUSR1
, SIGUSR2
, SIGRTMIN
+1, -1) >= 0);
1296 r
= sd_event_add_signal(s
->event
, &s
->sigusr1_event_source
, SIGUSR1
, dispatch_sigusr1
, s
);
1300 r
= sd_event_add_signal(s
->event
, &s
->sigusr2_event_source
, SIGUSR2
, dispatch_sigusr2
, s
);
1304 r
= sd_event_add_signal(s
->event
, &s
->sigterm_event_source
, SIGTERM
, dispatch_sigterm
, s
);
1308 /* Let's process SIGTERM late, so that we flush all queued
1309 * messages to disk before we exit */
1310 r
= sd_event_source_set_priority(s
->sigterm_event_source
, SD_EVENT_PRIORITY_NORMAL
+20);
1314 /* When journald is invoked on the terminal (when debugging),
1315 * it's useful if C-c is handled equivalent to SIGTERM. */
1316 r
= sd_event_add_signal(s
->event
, &s
->sigint_event_source
, SIGINT
, dispatch_sigterm
, s
);
1320 r
= sd_event_source_set_priority(s
->sigint_event_source
, SD_EVENT_PRIORITY_NORMAL
+20);
1324 /* SIGRTMIN+1 causes an immediate sync. We process this very
1325 * late, so that everything else queued at this point is
1326 * really written to disk. Clients can watch
1327 * /run/systemd/journal/synced with inotify until its mtime
1328 * changes to see when a sync happened. */
1329 r
= sd_event_add_signal(s
->event
, &s
->sigrtmin1_event_source
, SIGRTMIN
+1, dispatch_sigrtmin1
, s
);
1333 r
= sd_event_source_set_priority(s
->sigrtmin1_event_source
, SD_EVENT_PRIORITY_NORMAL
+15);
1340 static int server_parse_proc_cmdline(Server
*s
) {
1341 _cleanup_free_
char *line
= NULL
;
1345 r
= proc_cmdline(&line
);
1347 log_warning_errno(r
, "Failed to read /proc/cmdline, ignoring: %m");
1353 _cleanup_free_
char *word
= NULL
;
1355 r
= extract_first_word(&p
, &word
, NULL
, 0);
1357 return log_error_errno(r
, "Failed to parse journald syntax \"%s\": %m", line
);
1362 if (startswith(word
, "systemd.journald.forward_to_syslog=")) {
1363 r
= parse_boolean(word
+ 35);
1365 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word
+ 35);
1367 s
->forward_to_syslog
= r
;
1368 } else if (startswith(word
, "systemd.journald.forward_to_kmsg=")) {
1369 r
= parse_boolean(word
+ 33);
1371 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word
+ 33);
1373 s
->forward_to_kmsg
= r
;
1374 } else if (startswith(word
, "systemd.journald.forward_to_console=")) {
1375 r
= parse_boolean(word
+ 36);
1377 log_warning("Failed to parse forward to console switch %s. Ignoring.", word
+ 36);
1379 s
->forward_to_console
= r
;
1380 } else if (startswith(word
, "systemd.journald.forward_to_wall=")) {
1381 r
= parse_boolean(word
+ 33);
1383 log_warning("Failed to parse forward to wall switch %s. Ignoring.", word
+ 33);
1385 s
->forward_to_wall
= r
;
1386 } else if (startswith(word
, "systemd.journald"))
1387 log_warning("Invalid systemd.journald parameter. Ignoring.");
1390 /* do not warn about state here, since probably systemd already did */
1394 static int server_parse_config_file(Server
*s
) {
1397 return config_parse_many(PKGSYSCONFDIR
"/journald.conf",
1398 CONF_PATHS_NULSTR("systemd/journald.conf.d"),
1400 config_item_perf_lookup
, journald_gperf_lookup
,
1404 static int server_dispatch_sync(sd_event_source
*es
, usec_t t
, void *userdata
) {
1405 Server
*s
= userdata
;
1413 int server_schedule_sync(Server
*s
, int priority
) {
1418 if (priority
<= LOG_CRIT
) {
1419 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1424 if (s
->sync_scheduled
)
1427 if (s
->sync_interval_usec
> 0) {
1430 r
= sd_event_now(s
->event
, CLOCK_MONOTONIC
, &when
);
1434 when
+= s
->sync_interval_usec
;
1436 if (!s
->sync_event_source
) {
1437 r
= sd_event_add_time(
1439 &s
->sync_event_source
,
1442 server_dispatch_sync
, s
);
1446 r
= sd_event_source_set_priority(s
->sync_event_source
, SD_EVENT_PRIORITY_IMPORTANT
);
1448 r
= sd_event_source_set_time(s
->sync_event_source
, when
);
1452 r
= sd_event_source_set_enabled(s
->sync_event_source
, SD_EVENT_ONESHOT
);
1457 s
->sync_scheduled
= true;
1463 static int dispatch_hostname_change(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1464 Server
*s
= userdata
;
1468 server_cache_hostname(s
);
1472 static int server_open_hostname(Server
*s
) {
1477 s
->hostname_fd
= open("/proc/sys/kernel/hostname", O_RDONLY
|O_CLOEXEC
|O_NDELAY
|O_NOCTTY
);
1478 if (s
->hostname_fd
< 0)
1479 return log_error_errno(errno
, "Failed to open /proc/sys/kernel/hostname: %m");
1481 r
= sd_event_add_io(s
->event
, &s
->hostname_event_source
, s
->hostname_fd
, 0, dispatch_hostname_change
, s
);
1483 /* kernels prior to 3.2 don't support polling this file. Ignore
1486 log_warning_errno(r
, "Failed to register hostname fd in event loop, ignoring: %m");
1487 s
->hostname_fd
= safe_close(s
->hostname_fd
);
1491 return log_error_errno(r
, "Failed to register hostname fd in event loop: %m");
1494 r
= sd_event_source_set_priority(s
->hostname_event_source
, SD_EVENT_PRIORITY_IMPORTANT
-10);
1496 return log_error_errno(r
, "Failed to adjust priority of host name event source: %m");
1501 static int dispatch_notify_event(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1502 Server
*s
= userdata
;
1506 assert(s
->notify_event_source
== es
);
1507 assert(s
->notify_fd
== fd
);
1509 /* The $NOTIFY_SOCKET is writable again, now send exactly one
1510 * message on it. Either it's the wtachdog event, the initial
1511 * READY=1 event or an stdout stream event. If there's nothing
1512 * to write anymore, turn our event source off. The next time
1513 * there's something to send it will be turned on again. */
1515 if (!s
->sent_notify_ready
) {
1516 static const char p
[] =
1518 "STATUS=Processing requests...";
1521 l
= send(s
->notify_fd
, p
, strlen(p
), MSG_DONTWAIT
);
1523 if (errno
== EAGAIN
)
1526 return log_error_errno(errno
, "Failed to send READY=1 notification message: %m");
1529 s
->sent_notify_ready
= true;
1530 log_debug("Sent READY=1 notification.");
1532 } else if (s
->send_watchdog
) {
1534 static const char p
[] =
1539 l
= send(s
->notify_fd
, p
, strlen(p
), MSG_DONTWAIT
);
1541 if (errno
== EAGAIN
)
1544 return log_error_errno(errno
, "Failed to send WATCHDOG=1 notification message: %m");
1547 s
->send_watchdog
= false;
1548 log_debug("Sent WATCHDOG=1 notification.");
1550 } else if (s
->stdout_streams_notify_queue
)
1551 /* Dispatch one stream notification event */
1552 stdout_stream_send_notify(s
->stdout_streams_notify_queue
);
1554 /* Leave us enabled if there's still more to to do. */
1555 if (s
->send_watchdog
|| s
->stdout_streams_notify_queue
)
1558 /* There was nothing to do anymore, let's turn ourselves off. */
1559 r
= sd_event_source_set_enabled(es
, SD_EVENT_OFF
);
1561 return log_error_errno(r
, "Failed to turn off notify event source: %m");
1566 static int dispatch_watchdog(sd_event_source
*es
, uint64_t usec
, void *userdata
) {
1567 Server
*s
= userdata
;
1572 s
->send_watchdog
= true;
1574 r
= sd_event_source_set_enabled(s
->notify_event_source
, SD_EVENT_ON
);
1576 log_warning_errno(r
, "Failed to turn on notify event source: %m");
1578 r
= sd_event_source_set_time(s
->watchdog_event_source
, usec
+ s
->watchdog_usec
/ 2);
1580 return log_error_errno(r
, "Failed to restart watchdog event source: %m");
1582 r
= sd_event_source_set_enabled(s
->watchdog_event_source
, SD_EVENT_ON
);
1584 return log_error_errno(r
, "Failed to enable watchdog event source: %m");
1589 static int server_connect_notify(Server
*s
) {
1590 union sockaddr_union sa
= {
1591 .un
.sun_family
= AF_UNIX
,
1597 assert(s
->notify_fd
< 0);
1598 assert(!s
->notify_event_source
);
1601 So here's the problem: we'd like to send notification
1602 messages to PID 1, but we cannot do that via sd_notify(),
1603 since that's synchronous, and we might end up blocking on
1604 it. Specifically: given that PID 1 might block on
1605 dbus-daemon during IPC, and dbus-daemon is logging to us,
1606 and might hence block on us, we might end up in a deadlock
1607 if we block on sending PID 1 notification messages -- by
1608 generating a full blocking circle. To avoid this, let's
1609 create a non-blocking socket, and connect it to the
1610 notification socket, and then wait for POLLOUT before we
1611 send anything. This should efficiently avoid any deadlocks,
1612 as we'll never block on PID 1, hence PID 1 can safely block
1613 on dbus-daemon which can safely block on us again.
1615 Don't think that this issue is real? It is, see:
1616 https://github.com/systemd/systemd/issues/1505
1619 e
= getenv("NOTIFY_SOCKET");
1623 if ((e
[0] != '@' && e
[0] != '/') || e
[1] == 0) {
1624 log_error("NOTIFY_SOCKET set to an invalid value: %s", e
);
1628 if (strlen(e
) > sizeof(sa
.un
.sun_path
)) {
1629 log_error("NOTIFY_SOCKET path too long: %s", e
);
1633 s
->notify_fd
= socket(AF_UNIX
, SOCK_DGRAM
|SOCK_CLOEXEC
|SOCK_NONBLOCK
, 0);
1634 if (s
->notify_fd
< 0)
1635 return log_error_errno(errno
, "Failed to create notify socket: %m");
1637 (void) fd_inc_sndbuf(s
->notify_fd
, NOTIFY_SNDBUF_SIZE
);
1639 strncpy(sa
.un
.sun_path
, e
, sizeof(sa
.un
.sun_path
));
1640 if (sa
.un
.sun_path
[0] == '@')
1641 sa
.un
.sun_path
[0] = 0;
1643 r
= connect(s
->notify_fd
, &sa
.sa
, offsetof(struct sockaddr_un
, sun_path
) + strlen(e
));
1645 return log_error_errno(errno
, "Failed to connect to notify socket: %m");
1647 r
= sd_event_add_io(s
->event
, &s
->notify_event_source
, s
->notify_fd
, EPOLLOUT
, dispatch_notify_event
, s
);
1649 return log_error_errno(r
, "Failed to watch notification socket: %m");
1651 if (sd_watchdog_enabled(false, &s
->watchdog_usec
) > 0) {
1652 s
->send_watchdog
= true;
1654 r
= sd_event_add_time(s
->event
, &s
->watchdog_event_source
, CLOCK_MONOTONIC
, now(CLOCK_MONOTONIC
) + s
->watchdog_usec
/2, s
->watchdog_usec
/4, dispatch_watchdog
, s
);
1656 return log_error_errno(r
, "Failed to add watchdog time event: %m");
1659 /* This should fire pretty soon, which we'll use to send the
1665 int server_init(Server
*s
) {
1666 _cleanup_fdset_free_ FDSet
*fds
= NULL
;
1673 s
->syslog_fd
= s
->native_fd
= s
->stdout_fd
= s
->dev_kmsg_fd
= s
->audit_fd
= s
->hostname_fd
= s
->notify_fd
= -1;
1677 s
->watchdog_usec
= USEC_INFINITY
;
1679 s
->sync_interval_usec
= DEFAULT_SYNC_INTERVAL_USEC
;
1680 s
->sync_scheduled
= false;
1682 s
->rate_limit_interval
= DEFAULT_RATE_LIMIT_INTERVAL
;
1683 s
->rate_limit_burst
= DEFAULT_RATE_LIMIT_BURST
;
1685 s
->forward_to_wall
= true;
1687 s
->max_file_usec
= DEFAULT_MAX_FILE_USEC
;
1689 s
->max_level_store
= LOG_DEBUG
;
1690 s
->max_level_syslog
= LOG_DEBUG
;
1691 s
->max_level_kmsg
= LOG_NOTICE
;
1692 s
->max_level_console
= LOG_INFO
;
1693 s
->max_level_wall
= LOG_EMERG
;
1695 journal_reset_metrics(&s
->system_metrics
);
1696 journal_reset_metrics(&s
->runtime_metrics
);
1698 server_parse_config_file(s
);
1699 server_parse_proc_cmdline(s
);
1701 if (!!s
->rate_limit_interval
^ !!s
->rate_limit_burst
) {
1702 log_debug("Setting both rate limit interval and burst from "USEC_FMT
",%u to 0,0",
1703 s
->rate_limit_interval
, s
->rate_limit_burst
);
1704 s
->rate_limit_interval
= s
->rate_limit_burst
= 0;
1707 (void) mkdir_p("/run/systemd/journal", 0755);
1709 s
->user_journals
= ordered_hashmap_new(NULL
);
1710 if (!s
->user_journals
)
1713 s
->mmap
= mmap_cache_new();
1717 r
= sd_event_default(&s
->event
);
1719 return log_error_errno(r
, "Failed to create event loop: %m");
1721 n
= sd_listen_fds(true);
1723 return log_error_errno(n
, "Failed to read listening file descriptors from environment: %m");
1725 for (fd
= SD_LISTEN_FDS_START
; fd
< SD_LISTEN_FDS_START
+ n
; fd
++) {
1727 if (sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/run/systemd/journal/socket", 0) > 0) {
1729 if (s
->native_fd
>= 0) {
1730 log_error("Too many native sockets passed.");
1736 } else if (sd_is_socket_unix(fd
, SOCK_STREAM
, 1, "/run/systemd/journal/stdout", 0) > 0) {
1738 if (s
->stdout_fd
>= 0) {
1739 log_error("Too many stdout sockets passed.");
1745 } else if (sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/dev/log", 0) > 0 ||
1746 sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/run/systemd/journal/dev-log", 0) > 0) {
1748 if (s
->syslog_fd
>= 0) {
1749 log_error("Too many /dev/log sockets passed.");
1755 } else if (sd_is_socket(fd
, AF_NETLINK
, SOCK_RAW
, -1) > 0) {
1757 if (s
->audit_fd
>= 0) {
1758 log_error("Too many audit sockets passed.");
1772 r
= fdset_put(fds
, fd
);
1778 /* Try to restore streams, but don't bother if this fails */
1779 (void) server_restore_streams(s
, fds
);
1781 if (fdset_size(fds
) > 0) {
1782 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds
));
1783 fds
= fdset_free(fds
);
1786 no_sockets
= s
->native_fd
< 0 && s
->stdout_fd
< 0 && s
->syslog_fd
< 0 && s
->audit_fd
< 0;
1788 /* always open stdout, syslog, native, and kmsg sockets */
1790 /* systemd-journald.socket: /run/systemd/journal/stdout */
1791 r
= server_open_stdout_socket(s
);
1795 /* systemd-journald-dev-log.socket: /run/systemd/journal/dev-log */
1796 r
= server_open_syslog_socket(s
);
1800 /* systemd-journald.socket: /run/systemd/journal/socket */
1801 r
= server_open_native_socket(s
);
1806 r
= server_open_dev_kmsg(s
);
1810 /* Unless we got *some* sockets and not audit, open audit socket */
1811 if (s
->audit_fd
>= 0 || no_sockets
) {
1812 r
= server_open_audit(s
);
1817 r
= server_open_kernel_seqnum(s
);
1821 r
= server_open_hostname(s
);
1825 r
= setup_signals(s
);
1829 s
->udev
= udev_new();
1833 s
->rate_limit
= journal_rate_limit_new(s
->rate_limit_interval
, s
->rate_limit_burst
);
1837 r
= cg_get_root_path(&s
->cgroup_root
);
1841 server_cache_hostname(s
);
1842 server_cache_boot_id(s
);
1843 server_cache_machine_id(s
);
1845 (void) server_connect_notify(s
);
1847 return system_journal_open(s
, false);
1850 void server_maybe_append_tags(Server
*s
) {
1856 n
= now(CLOCK_REALTIME
);
1858 if (s
->system_journal
)
1859 journal_file_maybe_append_tag(s
->system_journal
, n
);
1861 ORDERED_HASHMAP_FOREACH(f
, s
->user_journals
, i
)
1862 journal_file_maybe_append_tag(f
, n
);
1866 void server_done(Server
*s
) {
1870 while (s
->stdout_streams
)
1871 stdout_stream_free(s
->stdout_streams
);
1873 if (s
->system_journal
)
1874 journal_file_close(s
->system_journal
);
1876 if (s
->runtime_journal
)
1877 journal_file_close(s
->runtime_journal
);
1879 while ((f
= ordered_hashmap_steal_first(s
->user_journals
)))
1880 journal_file_close(f
);
1882 ordered_hashmap_free(s
->user_journals
);
1884 sd_event_source_unref(s
->syslog_event_source
);
1885 sd_event_source_unref(s
->native_event_source
);
1886 sd_event_source_unref(s
->stdout_event_source
);
1887 sd_event_source_unref(s
->dev_kmsg_event_source
);
1888 sd_event_source_unref(s
->audit_event_source
);
1889 sd_event_source_unref(s
->sync_event_source
);
1890 sd_event_source_unref(s
->sigusr1_event_source
);
1891 sd_event_source_unref(s
->sigusr2_event_source
);
1892 sd_event_source_unref(s
->sigterm_event_source
);
1893 sd_event_source_unref(s
->sigint_event_source
);
1894 sd_event_source_unref(s
->sigrtmin1_event_source
);
1895 sd_event_source_unref(s
->hostname_event_source
);
1896 sd_event_source_unref(s
->notify_event_source
);
1897 sd_event_source_unref(s
->watchdog_event_source
);
1898 sd_event_unref(s
->event
);
1900 safe_close(s
->syslog_fd
);
1901 safe_close(s
->native_fd
);
1902 safe_close(s
->stdout_fd
);
1903 safe_close(s
->dev_kmsg_fd
);
1904 safe_close(s
->audit_fd
);
1905 safe_close(s
->hostname_fd
);
1906 safe_close(s
->notify_fd
);
1909 journal_rate_limit_free(s
->rate_limit
);
1911 if (s
->kernel_seqnum
)
1912 munmap(s
->kernel_seqnum
, sizeof(uint64_t));
1916 free(s
->cgroup_root
);
1917 free(s
->hostname_field
);
1920 mmap_cache_unref(s
->mmap
);
1922 udev_unref(s
->udev
);
1925 static const char* const storage_table
[_STORAGE_MAX
] = {
1926 [STORAGE_AUTO
] = "auto",
1927 [STORAGE_VOLATILE
] = "volatile",
1928 [STORAGE_PERSISTENT
] = "persistent",
1929 [STORAGE_NONE
] = "none"
1932 DEFINE_STRING_TABLE_LOOKUP(storage
, Storage
);
1933 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage
, storage
, Storage
, "Failed to parse storage setting");
1935 static const char* const split_mode_table
[_SPLIT_MAX
] = {
1936 [SPLIT_LOGIN
] = "login",
1937 [SPLIT_UID
] = "uid",
1938 [SPLIT_NONE
] = "none",
1941 DEFINE_STRING_TABLE_LOOKUP(split_mode
, SplitMode
);
1942 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode
, split_mode
, SplitMode
, "Failed to parse split mode setting");