1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2011 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
23 #include <selinux/selinux.h>
25 #include <sys/ioctl.h>
27 #include <sys/signalfd.h>
28 #include <sys/statvfs.h>
29 #include <linux/sockios.h>
32 #include "sd-daemon.h"
33 #include "sd-journal.h"
34 #include "sd-messages.h"
37 #include "alloc-util.h"
38 #include "audit-util.h"
39 #include "cgroup-util.h"
40 #include "conf-parser.h"
41 #include "dirent-util.h"
42 #include "extract-word.h"
45 #include "formats-util.h"
48 #include "hostname-util.h"
50 #include "journal-authenticate.h"
51 #include "journal-file.h"
52 #include "journal-internal.h"
53 #include "journal-vacuum.h"
54 #include "journald-audit.h"
55 #include "journald-kmsg.h"
56 #include "journald-native.h"
57 #include "journald-rate-limit.h"
58 #include "journald-server.h"
59 #include "journald-stream.h"
60 #include "journald-syslog.h"
63 #include "parse-util.h"
64 #include "proc-cmdline.h"
65 #include "process-util.h"
67 #include "selinux-util.h"
68 #include "signal-util.h"
69 #include "socket-util.h"
70 #include "stdio-util.h"
71 #include "string-table.h"
72 #include "string-util.h"
73 #include "user-util.h"
76 #define USER_JOURNALS_MAX 1024
78 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
79 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
80 #define DEFAULT_RATE_LIMIT_BURST 1000
81 #define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
83 #define RECHECK_SPACE_USEC (30*USEC_PER_SEC)
85 #define NOTIFY_SNDBUF_SIZE (8*1024*1024)
87 /* The period to insert between posting changes for coalescing */
88 #define POST_CHANGE_TIMER_INTERVAL_USEC (250*USEC_PER_MSEC)
90 static int determine_space_for(
92 JournalMetrics
*metrics
,
100 uint64_t sum
= 0, ss_avail
, avail
;
101 _cleanup_closedir_
DIR *d
= NULL
;
112 ts
= now(CLOCK_MONOTONIC
);
114 if (!verbose
&& s
->cached_space_timestamp
+ RECHECK_SPACE_USEC
> ts
) {
117 *available
= s
->cached_space_available
;
119 *limit
= s
->cached_space_limit
;
124 p
= strjoina(path
, SERVER_MACHINE_ID(s
));
127 return log_full_errno(errno
== ENOENT
? LOG_DEBUG
: LOG_ERR
, errno
, "Failed to open %s: %m", p
);
129 if (fstatvfs(dirfd(d
), &ss
) < 0)
130 return log_error_errno(errno
, "Failed to fstatvfs(%s): %m", p
);
132 FOREACH_DIRENT_ALL(de
, d
, break) {
135 if (!endswith(de
->d_name
, ".journal") &&
136 !endswith(de
->d_name
, ".journal~"))
139 if (fstatat(dirfd(d
), de
->d_name
, &st
, AT_SYMLINK_NOFOLLOW
) < 0) {
140 log_debug_errno(errno
, "Failed to stat %s/%s, ignoring: %m", p
, de
->d_name
);
144 if (!S_ISREG(st
.st_mode
))
147 sum
+= (uint64_t) st
.st_blocks
* 512UL;
150 /* If requested, then let's bump the min_use limit to the
151 * current usage on disk. We do this when starting up and
152 * first opening the journal files. This way sudden spikes in
153 * disk usage will not cause journald to vacuum files without
154 * bounds. Note that this means that only a restart of
155 * journald will make it reset this value. */
158 metrics
->min_use
= MAX(metrics
->min_use
, sum
);
160 ss_avail
= ss
.f_bsize
* ss
.f_bavail
;
161 avail
= LESS_BY(ss_avail
, metrics
->keep_free
);
163 s
->cached_space_limit
= MIN(MAX(sum
+ avail
, metrics
->min_use
), metrics
->max_use
);
164 s
->cached_space_available
= LESS_BY(s
->cached_space_limit
, sum
);
165 s
->cached_space_timestamp
= ts
;
168 char fb1
[FORMAT_BYTES_MAX
], fb2
[FORMAT_BYTES_MAX
], fb3
[FORMAT_BYTES_MAX
],
169 fb4
[FORMAT_BYTES_MAX
], fb5
[FORMAT_BYTES_MAX
], fb6
[FORMAT_BYTES_MAX
];
170 format_bytes(fb1
, sizeof(fb1
), sum
);
171 format_bytes(fb2
, sizeof(fb2
), metrics
->max_use
);
172 format_bytes(fb3
, sizeof(fb3
), metrics
->keep_free
);
173 format_bytes(fb4
, sizeof(fb4
), ss_avail
);
174 format_bytes(fb5
, sizeof(fb5
), s
->cached_space_limit
);
175 format_bytes(fb6
, sizeof(fb6
), s
->cached_space_available
);
177 server_driver_message(s
, SD_MESSAGE_JOURNAL_USAGE
,
178 LOG_MESSAGE("%s (%s) is %s, max %s, %s free.",
179 name
, path
, fb1
, fb5
, fb6
),
180 "JOURNAL_NAME=%s", name
,
181 "JOURNAL_PATH=%s", path
,
182 "CURRENT_USE=%"PRIu64
, sum
,
183 "CURRENT_USE_PRETTY=%s", fb1
,
184 "MAX_USE=%"PRIu64
, metrics
->max_use
,
185 "MAX_USE_PRETTY=%s", fb2
,
186 "DISK_KEEP_FREE=%"PRIu64
, metrics
->keep_free
,
187 "DISK_KEEP_FREE_PRETTY=%s", fb3
,
188 "DISK_AVAILABLE=%"PRIu64
, ss_avail
,
189 "DISK_AVAILABLE_PRETTY=%s", fb4
,
190 "LIMIT=%"PRIu64
, s
->cached_space_limit
,
191 "LIMIT_PRETTY=%s", fb5
,
192 "AVAILABLE=%"PRIu64
, s
->cached_space_available
,
193 "AVAILABLE_PRETTY=%s", fb6
,
198 *available
= s
->cached_space_available
;
200 *limit
= s
->cached_space_limit
;
205 static int determine_space(Server
*s
, bool verbose
, bool patch_min_use
, uint64_t *available
, uint64_t *limit
) {
206 JournalMetrics
*metrics
;
207 const char *path
, *name
;
211 if (s
->system_journal
) {
212 path
= "/var/log/journal/";
213 metrics
= &s
->system_metrics
;
214 name
= "System journal";
216 path
= "/run/log/journal/";
217 metrics
= &s
->runtime_metrics
;
218 name
= "Runtime journal";
221 return determine_space_for(s
, metrics
, path
, name
, verbose
, patch_min_use
, available
, limit
);
224 static void server_add_acls(JournalFile
*f
, uid_t uid
) {
231 if (uid
<= SYSTEM_UID_MAX
)
234 r
= add_acls_for_user(f
->fd
, uid
);
236 log_warning_errno(r
, "Failed to set ACL on %s, ignoring: %m", f
->path
);
240 static int open_journal(
246 JournalMetrics
*metrics
,
256 r
= journal_file_open_reliably(fname
, flags
, 0640, s
->compress
, seal
, metrics
, s
->mmap
, NULL
, &f
);
258 r
= journal_file_open(fname
, flags
, 0640, s
->compress
, seal
, metrics
, s
->mmap
, NULL
, &f
);
262 r
= journal_file_enable_post_change_timer(f
, s
->event
, POST_CHANGE_TIMER_INTERVAL_USEC
);
264 journal_file_close(f
);
272 static JournalFile
* find_journal(Server
*s
, uid_t uid
) {
273 _cleanup_free_
char *p
= NULL
;
280 /* We split up user logs only on /var, not on /run. If the
281 * runtime file is open, we write to it exclusively, in order
282 * to guarantee proper order as soon as we flush /run to
283 * /var and close the runtime file. */
285 if (s
->runtime_journal
)
286 return s
->runtime_journal
;
288 if (uid
<= SYSTEM_UID_MAX
)
289 return s
->system_journal
;
291 r
= sd_id128_get_machine(&machine
);
293 return s
->system_journal
;
295 f
= ordered_hashmap_get(s
->user_journals
, UID_TO_PTR(uid
));
299 if (asprintf(&p
, "/var/log/journal/" SD_ID128_FORMAT_STR
"/user-"UID_FMT
".journal",
300 SD_ID128_FORMAT_VAL(machine
), uid
) < 0)
301 return s
->system_journal
;
303 while (ordered_hashmap_size(s
->user_journals
) >= USER_JOURNALS_MAX
) {
304 /* Too many open? Then let's close one */
305 f
= ordered_hashmap_steal_first(s
->user_journals
);
307 journal_file_close(f
);
310 r
= open_journal(s
, true, p
, O_RDWR
|O_CREAT
, s
->seal
, &s
->system_metrics
, &f
);
312 return s
->system_journal
;
314 server_add_acls(f
, uid
);
316 r
= ordered_hashmap_put(s
->user_journals
, UID_TO_PTR(uid
), f
);
318 journal_file_close(f
);
319 return s
->system_journal
;
325 static int do_rotate(
338 r
= journal_file_rotate(f
, s
->compress
, seal
);
341 log_error_errno(r
, "Failed to rotate %s: %m", (*f
)->path
);
343 log_error_errno(r
, "Failed to create new %s journal: %m", name
);
345 server_add_acls(*f
, uid
);
350 void server_rotate(Server
*s
) {
356 log_debug("Rotating...");
358 (void) do_rotate(s
, &s
->runtime_journal
, "runtime", false, 0);
359 (void) do_rotate(s
, &s
->system_journal
, "system", s
->seal
, 0);
361 ORDERED_HASHMAP_FOREACH_KEY(f
, k
, s
->user_journals
, i
) {
362 r
= do_rotate(s
, &f
, "user", s
->seal
, PTR_TO_UID(k
));
364 ordered_hashmap_replace(s
->user_journals
, k
, f
);
366 /* Old file has been closed and deallocated */
367 ordered_hashmap_remove(s
->user_journals
, k
);
371 void server_sync(Server
*s
) {
376 if (s
->system_journal
) {
377 r
= journal_file_set_offline(s
->system_journal
);
379 log_warning_errno(r
, "Failed to sync system journal, ignoring: %m");
382 ORDERED_HASHMAP_FOREACH(f
, s
->user_journals
, i
) {
383 r
= journal_file_set_offline(f
);
385 log_warning_errno(r
, "Failed to sync user journal, ignoring: %m");
388 if (s
->sync_event_source
) {
389 r
= sd_event_source_set_enabled(s
->sync_event_source
, SD_EVENT_OFF
);
391 log_error_errno(r
, "Failed to disable sync timer source: %m");
394 s
->sync_scheduled
= false;
397 static void do_vacuum(
400 JournalMetrics
*metrics
,
404 bool patch_min_use
) {
418 p
= strjoina(path
, SERVER_MACHINE_ID(s
));
420 limit
= metrics
->max_use
;
421 (void) determine_space_for(s
, metrics
, path
, name
, verbose
, patch_min_use
, NULL
, &limit
);
423 r
= journal_directory_vacuum(p
, limit
, metrics
->n_max_files
, s
->max_retention_usec
, &s
->oldest_file_usec
, verbose
);
424 if (r
< 0 && r
!= -ENOENT
)
425 log_warning_errno(r
, "Failed to vacuum %s, ignoring: %m", p
);
428 int server_vacuum(Server
*s
, bool verbose
, bool patch_min_use
) {
431 log_debug("Vacuuming...");
433 s
->oldest_file_usec
= 0;
435 do_vacuum(s
, s
->system_journal
, &s
->system_metrics
, "/var/log/journal/", "System journal", verbose
, patch_min_use
);
436 do_vacuum(s
, s
->runtime_journal
, &s
->runtime_metrics
, "/run/log/journal/", "Runtime journal", verbose
, patch_min_use
);
438 s
->cached_space_limit
= 0;
439 s
->cached_space_available
= 0;
440 s
->cached_space_timestamp
= 0;
445 static void server_cache_machine_id(Server
*s
) {
451 r
= sd_id128_get_machine(&id
);
455 sd_id128_to_string(id
, stpcpy(s
->machine_id_field
, "_MACHINE_ID="));
458 static void server_cache_boot_id(Server
*s
) {
464 r
= sd_id128_get_boot(&id
);
468 sd_id128_to_string(id
, stpcpy(s
->boot_id_field
, "_BOOT_ID="));
471 static void server_cache_hostname(Server
*s
) {
472 _cleanup_free_
char *t
= NULL
;
477 t
= gethostname_malloc();
481 x
= strappend("_HOSTNAME=", t
);
485 free(s
->hostname_field
);
486 s
->hostname_field
= x
;
489 static bool shall_try_append_again(JournalFile
*f
, int r
) {
491 /* -E2BIG Hit configured limit
493 -EDQUOT Quota limit hit
495 -EIO I/O error of some kind (mmap)
496 -EHOSTDOWN Other machine
497 -EBUSY Unclean shutdown
498 -EPROTONOSUPPORT Unsupported feature
501 -ESHUTDOWN Already archived
502 -EIDRM Journal file has been deleted */
504 if (r
== -E2BIG
|| r
== -EFBIG
|| r
== -EDQUOT
|| r
== -ENOSPC
)
505 log_debug("%s: Allocation limit reached, rotating.", f
->path
);
506 else if (r
== -EHOSTDOWN
)
507 log_info("%s: Journal file from other machine, rotating.", f
->path
);
508 else if (r
== -EBUSY
)
509 log_info("%s: Unclean shutdown, rotating.", f
->path
);
510 else if (r
== -EPROTONOSUPPORT
)
511 log_info("%s: Unsupported feature, rotating.", f
->path
);
512 else if (r
== -EBADMSG
|| r
== -ENODATA
|| r
== ESHUTDOWN
)
513 log_warning("%s: Journal file corrupted, rotating.", f
->path
);
515 log_warning("%s: IO error, rotating.", f
->path
);
516 else if (r
== -EIDRM
)
517 log_warning("%s: Journal file has been deleted, rotating.", f
->path
);
524 static void write_to_journal(Server
*s
, uid_t uid
, struct iovec
*iovec
, unsigned n
, int priority
) {
526 bool vacuumed
= false;
533 f
= find_journal(s
, uid
);
537 if (journal_file_rotate_suggested(f
, s
->max_file_usec
)) {
538 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f
->path
);
540 server_vacuum(s
, false, false);
543 f
= find_journal(s
, uid
);
548 r
= journal_file_append_entry(f
, NULL
, iovec
, n
, &s
->seqnum
, NULL
, NULL
);
550 server_schedule_sync(s
, priority
);
554 if (vacuumed
|| !shall_try_append_again(f
, r
)) {
555 log_error_errno(r
, "Failed to write entry (%d items, %zu bytes), ignoring: %m", n
, IOVEC_TOTAL_SIZE(iovec
, n
));
560 server_vacuum(s
, false, false);
562 f
= find_journal(s
, uid
);
566 log_debug("Retrying write.");
567 r
= journal_file_append_entry(f
, NULL
, iovec
, n
, &s
->seqnum
, NULL
, NULL
);
569 log_error_errno(r
, "Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %m", n
, IOVEC_TOTAL_SIZE(iovec
, n
));
571 server_schedule_sync(s
, priority
);
574 static void dispatch_message_real(
576 struct iovec
*iovec
, unsigned n
, unsigned m
,
577 const struct ucred
*ucred
,
578 const struct timeval
*tv
,
579 const char *label
, size_t label_len
,
584 char pid
[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t
)],
585 uid
[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t
)],
586 gid
[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t
)],
587 owner_uid
[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t
)],
588 source_time
[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t
)],
589 o_uid
[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t
)],
590 o_gid
[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t
)],
591 o_owner_uid
[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t
)];
597 uid_t realuid
= 0, owner
= 0, journal_uid
;
598 bool owner_valid
= false;
600 char audit_session
[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
601 audit_loginuid
[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t
)],
602 o_audit_session
[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
603 o_audit_loginuid
[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t
)];
612 assert(n
+ N_IOVEC_META_FIELDS
+ (object_pid
? N_IOVEC_OBJECT_FIELDS
: 0) <= m
);
615 realuid
= ucred
->uid
;
617 sprintf(pid
, "_PID="PID_FMT
, ucred
->pid
);
618 IOVEC_SET_STRING(iovec
[n
++], pid
);
620 sprintf(uid
, "_UID="UID_FMT
, ucred
->uid
);
621 IOVEC_SET_STRING(iovec
[n
++], uid
);
623 sprintf(gid
, "_GID="GID_FMT
, ucred
->gid
);
624 IOVEC_SET_STRING(iovec
[n
++], gid
);
626 r
= get_process_comm(ucred
->pid
, &t
);
628 x
= strjoina("_COMM=", t
);
630 IOVEC_SET_STRING(iovec
[n
++], x
);
633 r
= get_process_exe(ucred
->pid
, &t
);
635 x
= strjoina("_EXE=", t
);
637 IOVEC_SET_STRING(iovec
[n
++], x
);
640 r
= get_process_cmdline(ucred
->pid
, 0, false, &t
);
642 x
= strjoina("_CMDLINE=", t
);
644 IOVEC_SET_STRING(iovec
[n
++], x
);
647 r
= get_process_capeff(ucred
->pid
, &t
);
649 x
= strjoina("_CAP_EFFECTIVE=", t
);
651 IOVEC_SET_STRING(iovec
[n
++], x
);
655 r
= audit_session_from_pid(ucred
->pid
, &audit
);
657 sprintf(audit_session
, "_AUDIT_SESSION=%"PRIu32
, audit
);
658 IOVEC_SET_STRING(iovec
[n
++], audit_session
);
661 r
= audit_loginuid_from_pid(ucred
->pid
, &loginuid
);
663 sprintf(audit_loginuid
, "_AUDIT_LOGINUID="UID_FMT
, loginuid
);
664 IOVEC_SET_STRING(iovec
[n
++], audit_loginuid
);
668 r
= cg_pid_get_path_shifted(ucred
->pid
, s
->cgroup_root
, &c
);
670 char *session
= NULL
;
672 x
= strjoina("_SYSTEMD_CGROUP=", c
);
673 IOVEC_SET_STRING(iovec
[n
++], x
);
675 r
= cg_path_get_session(c
, &t
);
677 session
= strjoina("_SYSTEMD_SESSION=", t
);
679 IOVEC_SET_STRING(iovec
[n
++], session
);
682 if (cg_path_get_owner_uid(c
, &owner
) >= 0) {
685 sprintf(owner_uid
, "_SYSTEMD_OWNER_UID="UID_FMT
, owner
);
686 IOVEC_SET_STRING(iovec
[n
++], owner_uid
);
689 if (cg_path_get_unit(c
, &t
) >= 0) {
690 x
= strjoina("_SYSTEMD_UNIT=", t
);
692 IOVEC_SET_STRING(iovec
[n
++], x
);
693 } else if (unit_id
&& !session
) {
694 x
= strjoina("_SYSTEMD_UNIT=", unit_id
);
695 IOVEC_SET_STRING(iovec
[n
++], x
);
698 if (cg_path_get_user_unit(c
, &t
) >= 0) {
699 x
= strjoina("_SYSTEMD_USER_UNIT=", t
);
701 IOVEC_SET_STRING(iovec
[n
++], x
);
702 } else if (unit_id
&& session
) {
703 x
= strjoina("_SYSTEMD_USER_UNIT=", unit_id
);
704 IOVEC_SET_STRING(iovec
[n
++], x
);
707 if (cg_path_get_slice(c
, &t
) >= 0) {
708 x
= strjoina("_SYSTEMD_SLICE=", t
);
710 IOVEC_SET_STRING(iovec
[n
++], x
);
714 } else if (unit_id
) {
715 x
= strjoina("_SYSTEMD_UNIT=", unit_id
);
716 IOVEC_SET_STRING(iovec
[n
++], x
);
720 if (mac_selinux_have()) {
722 x
= alloca(strlen("_SELINUX_CONTEXT=") + label_len
+ 1);
724 *((char*) mempcpy(stpcpy(x
, "_SELINUX_CONTEXT="), label
, label_len
)) = 0;
725 IOVEC_SET_STRING(iovec
[n
++], x
);
727 security_context_t con
;
729 if (getpidcon(ucred
->pid
, &con
) >= 0) {
730 x
= strjoina("_SELINUX_CONTEXT=", con
);
733 IOVEC_SET_STRING(iovec
[n
++], x
);
742 r
= get_process_uid(object_pid
, &object_uid
);
744 sprintf(o_uid
, "OBJECT_UID="UID_FMT
, object_uid
);
745 IOVEC_SET_STRING(iovec
[n
++], o_uid
);
748 r
= get_process_gid(object_pid
, &object_gid
);
750 sprintf(o_gid
, "OBJECT_GID="GID_FMT
, object_gid
);
751 IOVEC_SET_STRING(iovec
[n
++], o_gid
);
754 r
= get_process_comm(object_pid
, &t
);
756 x
= strjoina("OBJECT_COMM=", t
);
758 IOVEC_SET_STRING(iovec
[n
++], x
);
761 r
= get_process_exe(object_pid
, &t
);
763 x
= strjoina("OBJECT_EXE=", t
);
765 IOVEC_SET_STRING(iovec
[n
++], x
);
768 r
= get_process_cmdline(object_pid
, 0, false, &t
);
770 x
= strjoina("OBJECT_CMDLINE=", t
);
772 IOVEC_SET_STRING(iovec
[n
++], x
);
776 r
= audit_session_from_pid(object_pid
, &audit
);
778 sprintf(o_audit_session
, "OBJECT_AUDIT_SESSION=%"PRIu32
, audit
);
779 IOVEC_SET_STRING(iovec
[n
++], o_audit_session
);
782 r
= audit_loginuid_from_pid(object_pid
, &loginuid
);
784 sprintf(o_audit_loginuid
, "OBJECT_AUDIT_LOGINUID="UID_FMT
, loginuid
);
785 IOVEC_SET_STRING(iovec
[n
++], o_audit_loginuid
);
789 r
= cg_pid_get_path_shifted(object_pid
, s
->cgroup_root
, &c
);
791 x
= strjoina("OBJECT_SYSTEMD_CGROUP=", c
);
792 IOVEC_SET_STRING(iovec
[n
++], x
);
794 r
= cg_path_get_session(c
, &t
);
796 x
= strjoina("OBJECT_SYSTEMD_SESSION=", t
);
798 IOVEC_SET_STRING(iovec
[n
++], x
);
801 if (cg_path_get_owner_uid(c
, &owner
) >= 0) {
802 sprintf(o_owner_uid
, "OBJECT_SYSTEMD_OWNER_UID="UID_FMT
, owner
);
803 IOVEC_SET_STRING(iovec
[n
++], o_owner_uid
);
806 if (cg_path_get_unit(c
, &t
) >= 0) {
807 x
= strjoina("OBJECT_SYSTEMD_UNIT=", t
);
809 IOVEC_SET_STRING(iovec
[n
++], x
);
812 if (cg_path_get_user_unit(c
, &t
) >= 0) {
813 x
= strjoina("OBJECT_SYSTEMD_USER_UNIT=", t
);
815 IOVEC_SET_STRING(iovec
[n
++], x
);
824 sprintf(source_time
, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv
));
825 IOVEC_SET_STRING(iovec
[n
++], source_time
);
828 /* Note that strictly speaking storing the boot id here is
829 * redundant since the entry includes this in-line
830 * anyway. However, we need this indexed, too. */
831 if (!isempty(s
->boot_id_field
))
832 IOVEC_SET_STRING(iovec
[n
++], s
->boot_id_field
);
834 if (!isempty(s
->machine_id_field
))
835 IOVEC_SET_STRING(iovec
[n
++], s
->machine_id_field
);
837 if (!isempty(s
->hostname_field
))
838 IOVEC_SET_STRING(iovec
[n
++], s
->hostname_field
);
842 if (s
->split_mode
== SPLIT_UID
&& realuid
> 0)
843 /* Split up strictly by any UID */
844 journal_uid
= realuid
;
845 else if (s
->split_mode
== SPLIT_LOGIN
&& realuid
> 0 && owner_valid
&& owner
> 0)
846 /* Split up by login UIDs. We do this only if the
847 * realuid is not root, in order not to accidentally
848 * leak privileged information to the user that is
849 * logged by a privileged process that is part of an
850 * unprivileged session. */
855 write_to_journal(s
, journal_uid
, iovec
, n
, priority
);
858 void server_driver_message(Server
*s
, sd_id128_t message_id
, const char *format
, ...) {
859 char mid
[11 + 32 + 1];
860 struct iovec iovec
[N_IOVEC_META_FIELDS
+ 5 + N_IOVEC_PAYLOAD_FIELDS
];
864 struct ucred ucred
= {};
869 assert_cc(3 == LOG_FAC(LOG_DAEMON
));
870 IOVEC_SET_STRING(iovec
[n
++], "SYSLOG_FACILITY=3");
871 IOVEC_SET_STRING(iovec
[n
++], "SYSLOG_IDENTIFIER=systemd-journald");
873 IOVEC_SET_STRING(iovec
[n
++], "_TRANSPORT=driver");
874 assert_cc(6 == LOG_INFO
);
875 IOVEC_SET_STRING(iovec
[n
++], "PRIORITY=6");
877 if (!sd_id128_equal(message_id
, SD_ID128_NULL
)) {
878 snprintf(mid
, sizeof(mid
), LOG_MESSAGE_ID(message_id
));
879 IOVEC_SET_STRING(iovec
[n
++], mid
);
884 va_start(ap
, format
);
885 r
= log_format_iovec(iovec
, ELEMENTSOF(iovec
), &n
, false, 0, format
, ap
);
886 /* Error handling below */
889 ucred
.pid
= getpid();
890 ucred
.uid
= getuid();
891 ucred
.gid
= getgid();
894 dispatch_message_real(s
, iovec
, n
, ELEMENTSOF(iovec
), &ucred
, NULL
, NULL
, 0, NULL
, LOG_INFO
, 0);
897 free(iovec
[m
++].iov_base
);
900 /* We failed to format the message. Emit a warning instead. */
903 xsprintf(buf
, "MESSAGE=Entry printing failed: %s", strerror(-r
));
906 IOVEC_SET_STRING(iovec
[n
++], "PRIORITY=4");
907 IOVEC_SET_STRING(iovec
[n
++], buf
);
908 dispatch_message_real(s
, iovec
, n
, ELEMENTSOF(iovec
), &ucred
, NULL
, NULL
, 0, NULL
, LOG_INFO
, 0);
912 void server_dispatch_message(
914 struct iovec
*iovec
, unsigned n
, unsigned m
,
915 const struct ucred
*ucred
,
916 const struct timeval
*tv
,
917 const char *label
, size_t label_len
,
923 _cleanup_free_
char *path
= NULL
;
924 uint64_t available
= 0;
928 assert(iovec
|| n
== 0);
933 if (LOG_PRI(priority
) > s
->max_level_store
)
936 /* Stop early in case the information will not be stored
938 if (s
->storage
== STORAGE_NONE
)
944 r
= cg_pid_get_path_shifted(ucred
->pid
, s
->cgroup_root
, &path
);
948 /* example: /user/lennart/3/foobar
949 * /system/dbus.service/foobar
951 * So let's cut of everything past the third /, since that is
952 * where user directories start */
954 c
= strchr(path
, '/');
956 c
= strchr(c
+1, '/');
958 c
= strchr(c
+1, '/');
964 (void) determine_space(s
, false, false, &available
, NULL
);
965 rl
= journal_rate_limit_test(s
->rate_limit
, path
, priority
& LOG_PRIMASK
, available
);
969 /* Write a suppression message if we suppressed something */
971 server_driver_message(s
, SD_MESSAGE_JOURNAL_DROPPED
,
972 LOG_MESSAGE("Suppressed %u messages from %s", rl
- 1, path
),
976 dispatch_message_real(s
, iovec
, n
, m
, ucred
, tv
, label
, label_len
, unit_id
, priority
, object_pid
);
980 static int system_journal_open(Server
*s
, bool flush_requested
) {
984 if (!s
->system_journal
&&
985 (s
->storage
== STORAGE_PERSISTENT
|| s
->storage
== STORAGE_AUTO
) &&
987 || access("/run/systemd/journal/flushed", F_OK
) >= 0)) {
989 /* If in auto mode: first try to create the machine
990 * path, but not the prefix.
992 * If in persistent mode: create /var/log/journal and
993 * the machine path */
995 if (s
->storage
== STORAGE_PERSISTENT
)
996 (void) mkdir_p("/var/log/journal/", 0755);
998 fn
= strjoina("/var/log/journal/", SERVER_MACHINE_ID(s
));
999 (void) mkdir(fn
, 0755);
1001 fn
= strjoina(fn
, "/system.journal");
1002 r
= open_journal(s
, true, fn
, O_RDWR
|O_CREAT
, s
->seal
, &s
->system_metrics
, &s
->system_journal
);
1004 server_add_acls(s
->system_journal
, 0);
1005 (void) determine_space_for(s
, &s
->system_metrics
, "/var/log/journal/", "System journal", true, true, NULL
, NULL
);
1007 if (r
!= -ENOENT
&& r
!= -EROFS
)
1008 log_warning_errno(r
, "Failed to open system journal: %m");
1014 if (!s
->runtime_journal
&&
1015 (s
->storage
!= STORAGE_NONE
)) {
1017 fn
= strjoina("/run/log/journal/", SERVER_MACHINE_ID(s
), "/system.journal");
1019 if (s
->system_journal
) {
1021 /* Try to open the runtime journal, but only
1022 * if it already exists, so that we can flush
1023 * it into the system journal */
1025 r
= open_journal(s
, false, fn
, O_RDWR
, false, &s
->runtime_metrics
, &s
->runtime_journal
);
1028 log_warning_errno(r
, "Failed to open runtime journal: %m");
1035 /* OK, we really need the runtime journal, so create
1036 * it if necessary. */
1038 (void) mkdir("/run/log", 0755);
1039 (void) mkdir("/run/log/journal", 0755);
1040 (void) mkdir_parents(fn
, 0750);
1042 r
= open_journal(s
, true, fn
, O_RDWR
|O_CREAT
, false, &s
->runtime_metrics
, &s
->runtime_journal
);
1044 return log_error_errno(r
, "Failed to open runtime journal: %m");
1047 if (s
->runtime_journal
) {
1048 server_add_acls(s
->runtime_journal
, 0);
1049 (void) determine_space_for(s
, &s
->runtime_metrics
, "/run/log/journal/", "Runtime journal", true, true, NULL
, NULL
);
1056 int server_flush_to_var(Server
*s
) {
1058 sd_journal
*j
= NULL
;
1059 char ts
[FORMAT_TIMESPAN_MAX
];
1066 if (s
->storage
!= STORAGE_AUTO
&&
1067 s
->storage
!= STORAGE_PERSISTENT
)
1070 if (!s
->runtime_journal
)
1073 (void) system_journal_open(s
, true);
1075 if (!s
->system_journal
)
1078 log_debug("Flushing to /var...");
1080 start
= now(CLOCK_MONOTONIC
);
1082 r
= sd_id128_get_machine(&machine
);
1086 r
= sd_journal_open(&j
, SD_JOURNAL_RUNTIME_ONLY
);
1088 return log_error_errno(r
, "Failed to read runtime journal: %m");
1090 sd_journal_set_data_threshold(j
, 0);
1092 SD_JOURNAL_FOREACH(j
) {
1096 f
= j
->current_file
;
1097 assert(f
&& f
->current_offset
> 0);
1101 r
= journal_file_move_to_object(f
, OBJECT_ENTRY
, f
->current_offset
, &o
);
1103 log_error_errno(r
, "Can't read entry: %m");
1107 r
= journal_file_copy_entry(f
, s
->system_journal
, o
, f
->current_offset
, NULL
, NULL
, NULL
);
1111 if (!shall_try_append_again(s
->system_journal
, r
)) {
1112 log_error_errno(r
, "Can't write entry: %m");
1117 server_vacuum(s
, false, false);
1119 if (!s
->system_journal
) {
1120 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1125 log_debug("Retrying write.");
1126 r
= journal_file_copy_entry(f
, s
->system_journal
, o
, f
->current_offset
, NULL
, NULL
, NULL
);
1128 log_error_errno(r
, "Can't write entry: %m");
1136 journal_file_post_change(s
->system_journal
);
1138 s
->runtime_journal
= journal_file_close(s
->runtime_journal
);
1141 (void) rm_rf("/run/log/journal", REMOVE_ROOT
);
1143 sd_journal_close(j
);
1145 server_driver_message(s
, SD_ID128_NULL
,
1146 LOG_MESSAGE("Time spent on flushing to /var is %s for %u entries.",
1147 format_timespan(ts
, sizeof(ts
), now(CLOCK_MONOTONIC
) - start
, 0),
1154 int server_process_datagram(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1155 Server
*s
= userdata
;
1156 struct ucred
*ucred
= NULL
;
1157 struct timeval
*tv
= NULL
;
1158 struct cmsghdr
*cmsg
;
1160 size_t label_len
= 0, m
;
1163 int *fds
= NULL
, v
= 0;
1167 struct cmsghdr cmsghdr
;
1169 /* We use NAME_MAX space for the SELinux label
1170 * here. The kernel currently enforces no
1171 * limit, but according to suggestions from
1172 * the SELinux people this will change and it
1173 * will probably be identical to NAME_MAX. For
1174 * now we use that, but this should be updated
1175 * one day when the final limit is known. */
1176 uint8_t buf
[CMSG_SPACE(sizeof(struct ucred
)) +
1177 CMSG_SPACE(sizeof(struct timeval
)) +
1178 CMSG_SPACE(sizeof(int)) + /* fd */
1179 CMSG_SPACE(NAME_MAX
)]; /* selinux label */
1182 union sockaddr_union sa
= {};
1184 struct msghdr msghdr
= {
1187 .msg_control
= &control
,
1188 .msg_controllen
= sizeof(control
),
1190 .msg_namelen
= sizeof(sa
),
1194 assert(fd
== s
->native_fd
|| fd
== s
->syslog_fd
|| fd
== s
->audit_fd
);
1196 if (revents
!= EPOLLIN
) {
1197 log_error("Got invalid event from epoll for datagram fd: %"PRIx32
, revents
);
1201 /* Try to get the right size, if we can. (Not all
1202 * sockets support SIOCINQ, hence we just try, but
1203 * don't rely on it. */
1204 (void) ioctl(fd
, SIOCINQ
, &v
);
1206 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1207 m
= PAGE_ALIGN(MAX3((size_t) v
+ 1,
1209 ALIGN(sizeof(struct nlmsghdr
)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH
)) + 1);
1211 if (!GREEDY_REALLOC(s
->buffer
, s
->buffer_size
, m
))
1214 iovec
.iov_base
= s
->buffer
;
1215 iovec
.iov_len
= s
->buffer_size
- 1; /* Leave room for trailing NUL we add later */
1217 n
= recvmsg(fd
, &msghdr
, MSG_DONTWAIT
|MSG_CMSG_CLOEXEC
);
1219 if (errno
== EINTR
|| errno
== EAGAIN
)
1222 return log_error_errno(errno
, "recvmsg() failed: %m");
1225 CMSG_FOREACH(cmsg
, &msghdr
) {
1227 if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1228 cmsg
->cmsg_type
== SCM_CREDENTIALS
&&
1229 cmsg
->cmsg_len
== CMSG_LEN(sizeof(struct ucred
)))
1230 ucred
= (struct ucred
*) CMSG_DATA(cmsg
);
1231 else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1232 cmsg
->cmsg_type
== SCM_SECURITY
) {
1233 label
= (char*) CMSG_DATA(cmsg
);
1234 label_len
= cmsg
->cmsg_len
- CMSG_LEN(0);
1235 } else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1236 cmsg
->cmsg_type
== SO_TIMESTAMP
&&
1237 cmsg
->cmsg_len
== CMSG_LEN(sizeof(struct timeval
)))
1238 tv
= (struct timeval
*) CMSG_DATA(cmsg
);
1239 else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1240 cmsg
->cmsg_type
== SCM_RIGHTS
) {
1241 fds
= (int*) CMSG_DATA(cmsg
);
1242 n_fds
= (cmsg
->cmsg_len
- CMSG_LEN(0)) / sizeof(int);
1246 /* And a trailing NUL, just in case */
1249 if (fd
== s
->syslog_fd
) {
1250 if (n
> 0 && n_fds
== 0)
1251 server_process_syslog_message(s
, strstrip(s
->buffer
), ucred
, tv
, label
, label_len
);
1253 log_warning("Got file descriptors via syslog socket. Ignoring.");
1255 } else if (fd
== s
->native_fd
) {
1256 if (n
> 0 && n_fds
== 0)
1257 server_process_native_message(s
, s
->buffer
, n
, ucred
, tv
, label
, label_len
);
1258 else if (n
== 0 && n_fds
== 1)
1259 server_process_native_file(s
, fds
[0], ucred
, tv
, label
, label_len
);
1261 log_warning("Got too many file descriptors via native socket. Ignoring.");
1264 assert(fd
== s
->audit_fd
);
1266 if (n
> 0 && n_fds
== 0)
1267 server_process_audit_message(s
, s
->buffer
, n
, ucred
, &sa
, msghdr
.msg_namelen
);
1269 log_warning("Got file descriptors via audit socket. Ignoring.");
1272 close_many(fds
, n_fds
);
1276 static int dispatch_sigusr1(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1277 Server
*s
= userdata
;
1282 log_info("Received request to flush runtime journal from PID " PID_FMT
, si
->ssi_pid
);
1284 server_flush_to_var(s
);
1286 server_vacuum(s
, false, false);
1288 r
= touch("/run/systemd/journal/flushed");
1290 log_warning_errno(r
, "Failed to touch /run/systemd/journal/flushed, ignoring: %m");
1295 static int dispatch_sigusr2(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1296 Server
*s
= userdata
;
1301 log_info("Received request to rotate journal from PID " PID_FMT
, si
->ssi_pid
);
1303 server_vacuum(s
, true, true);
1305 /* Let clients know when the most recent rotation happened. */
1306 r
= write_timestamp_file_atomic("/run/systemd/journal/rotated", now(CLOCK_MONOTONIC
));
1308 log_warning_errno(r
, "Failed to write /run/systemd/journal/rotated, ignoring: %m");
1313 static int dispatch_sigterm(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1314 Server
*s
= userdata
;
1318 log_received_signal(LOG_INFO
, si
);
1320 sd_event_exit(s
->event
, 0);
1324 static int dispatch_sigrtmin1(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1325 Server
*s
= userdata
;
1330 log_debug("Received request to sync from PID " PID_FMT
, si
->ssi_pid
);
1334 /* Let clients know when the most recent sync happened. */
1335 r
= write_timestamp_file_atomic("/run/systemd/journal/synced", now(CLOCK_MONOTONIC
));
1337 log_warning_errno(r
, "Failed to write /run/systemd/journal/synced, ignoring: %m");
1342 static int setup_signals(Server
*s
) {
1347 assert(sigprocmask_many(SIG_SETMASK
, NULL
, SIGINT
, SIGTERM
, SIGUSR1
, SIGUSR2
, SIGRTMIN
+1, -1) >= 0);
1349 r
= sd_event_add_signal(s
->event
, &s
->sigusr1_event_source
, SIGUSR1
, dispatch_sigusr1
, s
);
1353 r
= sd_event_add_signal(s
->event
, &s
->sigusr2_event_source
, SIGUSR2
, dispatch_sigusr2
, s
);
1357 r
= sd_event_add_signal(s
->event
, &s
->sigterm_event_source
, SIGTERM
, dispatch_sigterm
, s
);
1361 /* Let's process SIGTERM late, so that we flush all queued
1362 * messages to disk before we exit */
1363 r
= sd_event_source_set_priority(s
->sigterm_event_source
, SD_EVENT_PRIORITY_NORMAL
+20);
1367 /* When journald is invoked on the terminal (when debugging),
1368 * it's useful if C-c is handled equivalent to SIGTERM. */
1369 r
= sd_event_add_signal(s
->event
, &s
->sigint_event_source
, SIGINT
, dispatch_sigterm
, s
);
1373 r
= sd_event_source_set_priority(s
->sigint_event_source
, SD_EVENT_PRIORITY_NORMAL
+20);
1377 /* SIGRTMIN+1 causes an immediate sync. We process this very
1378 * late, so that everything else queued at this point is
1379 * really written to disk. Clients can watch
1380 * /run/systemd/journal/synced with inotify until its mtime
1381 * changes to see when a sync happened. */
1382 r
= sd_event_add_signal(s
->event
, &s
->sigrtmin1_event_source
, SIGRTMIN
+1, dispatch_sigrtmin1
, s
);
1386 r
= sd_event_source_set_priority(s
->sigrtmin1_event_source
, SD_EVENT_PRIORITY_NORMAL
+15);
1393 static int server_parse_proc_cmdline(Server
*s
) {
1394 _cleanup_free_
char *line
= NULL
;
1398 r
= proc_cmdline(&line
);
1400 log_warning_errno(r
, "Failed to read /proc/cmdline, ignoring: %m");
1406 _cleanup_free_
char *word
= NULL
;
1408 r
= extract_first_word(&p
, &word
, NULL
, 0);
1410 return log_error_errno(r
, "Failed to parse journald syntax \"%s\": %m", line
);
1415 if (startswith(word
, "systemd.journald.forward_to_syslog=")) {
1416 r
= parse_boolean(word
+ 35);
1418 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word
+ 35);
1420 s
->forward_to_syslog
= r
;
1421 } else if (startswith(word
, "systemd.journald.forward_to_kmsg=")) {
1422 r
= parse_boolean(word
+ 33);
1424 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word
+ 33);
1426 s
->forward_to_kmsg
= r
;
1427 } else if (startswith(word
, "systemd.journald.forward_to_console=")) {
1428 r
= parse_boolean(word
+ 36);
1430 log_warning("Failed to parse forward to console switch %s. Ignoring.", word
+ 36);
1432 s
->forward_to_console
= r
;
1433 } else if (startswith(word
, "systemd.journald.forward_to_wall=")) {
1434 r
= parse_boolean(word
+ 33);
1436 log_warning("Failed to parse forward to wall switch %s. Ignoring.", word
+ 33);
1438 s
->forward_to_wall
= r
;
1439 } else if (startswith(word
, "systemd.journald"))
1440 log_warning("Invalid systemd.journald parameter. Ignoring.");
1443 /* do not warn about state here, since probably systemd already did */
1447 static int server_parse_config_file(Server
*s
) {
1450 return config_parse_many(PKGSYSCONFDIR
"/journald.conf",
1451 CONF_PATHS_NULSTR("systemd/journald.conf.d"),
1453 config_item_perf_lookup
, journald_gperf_lookup
,
1457 static int server_dispatch_sync(sd_event_source
*es
, usec_t t
, void *userdata
) {
1458 Server
*s
= userdata
;
1466 int server_schedule_sync(Server
*s
, int priority
) {
1471 if (priority
<= LOG_CRIT
) {
1472 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1477 if (s
->sync_scheduled
)
1480 if (s
->sync_interval_usec
> 0) {
1483 r
= sd_event_now(s
->event
, CLOCK_MONOTONIC
, &when
);
1487 when
+= s
->sync_interval_usec
;
1489 if (!s
->sync_event_source
) {
1490 r
= sd_event_add_time(
1492 &s
->sync_event_source
,
1495 server_dispatch_sync
, s
);
1499 r
= sd_event_source_set_priority(s
->sync_event_source
, SD_EVENT_PRIORITY_IMPORTANT
);
1501 r
= sd_event_source_set_time(s
->sync_event_source
, when
);
1505 r
= sd_event_source_set_enabled(s
->sync_event_source
, SD_EVENT_ONESHOT
);
1510 s
->sync_scheduled
= true;
1516 static int dispatch_hostname_change(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1517 Server
*s
= userdata
;
1521 server_cache_hostname(s
);
1525 static int server_open_hostname(Server
*s
) {
1530 s
->hostname_fd
= open("/proc/sys/kernel/hostname", O_RDONLY
|O_CLOEXEC
|O_NDELAY
|O_NOCTTY
);
1531 if (s
->hostname_fd
< 0)
1532 return log_error_errno(errno
, "Failed to open /proc/sys/kernel/hostname: %m");
1534 r
= sd_event_add_io(s
->event
, &s
->hostname_event_source
, s
->hostname_fd
, 0, dispatch_hostname_change
, s
);
1536 /* kernels prior to 3.2 don't support polling this file. Ignore
1539 log_warning_errno(r
, "Failed to register hostname fd in event loop, ignoring: %m");
1540 s
->hostname_fd
= safe_close(s
->hostname_fd
);
1544 return log_error_errno(r
, "Failed to register hostname fd in event loop: %m");
1547 r
= sd_event_source_set_priority(s
->hostname_event_source
, SD_EVENT_PRIORITY_IMPORTANT
-10);
1549 return log_error_errno(r
, "Failed to adjust priority of host name event source: %m");
1554 static int dispatch_notify_event(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1555 Server
*s
= userdata
;
1559 assert(s
->notify_event_source
== es
);
1560 assert(s
->notify_fd
== fd
);
1562 /* The $NOTIFY_SOCKET is writable again, now send exactly one
1563 * message on it. Either it's the wtachdog event, the initial
1564 * READY=1 event or an stdout stream event. If there's nothing
1565 * to write anymore, turn our event source off. The next time
1566 * there's something to send it will be turned on again. */
1568 if (!s
->sent_notify_ready
) {
1569 static const char p
[] =
1571 "STATUS=Processing requests...";
1574 l
= send(s
->notify_fd
, p
, strlen(p
), MSG_DONTWAIT
);
1576 if (errno
== EAGAIN
)
1579 return log_error_errno(errno
, "Failed to send READY=1 notification message: %m");
1582 s
->sent_notify_ready
= true;
1583 log_debug("Sent READY=1 notification.");
1585 } else if (s
->send_watchdog
) {
1587 static const char p
[] =
1592 l
= send(s
->notify_fd
, p
, strlen(p
), MSG_DONTWAIT
);
1594 if (errno
== EAGAIN
)
1597 return log_error_errno(errno
, "Failed to send WATCHDOG=1 notification message: %m");
1600 s
->send_watchdog
= false;
1601 log_debug("Sent WATCHDOG=1 notification.");
1603 } else if (s
->stdout_streams_notify_queue
)
1604 /* Dispatch one stream notification event */
1605 stdout_stream_send_notify(s
->stdout_streams_notify_queue
);
1607 /* Leave us enabled if there's still more to to do. */
1608 if (s
->send_watchdog
|| s
->stdout_streams_notify_queue
)
1611 /* There was nothing to do anymore, let's turn ourselves off. */
1612 r
= sd_event_source_set_enabled(es
, SD_EVENT_OFF
);
1614 return log_error_errno(r
, "Failed to turn off notify event source: %m");
1619 static int dispatch_watchdog(sd_event_source
*es
, uint64_t usec
, void *userdata
) {
1620 Server
*s
= userdata
;
1625 s
->send_watchdog
= true;
1627 r
= sd_event_source_set_enabled(s
->notify_event_source
, SD_EVENT_ON
);
1629 log_warning_errno(r
, "Failed to turn on notify event source: %m");
1631 r
= sd_event_source_set_time(s
->watchdog_event_source
, usec
+ s
->watchdog_usec
/ 2);
1633 return log_error_errno(r
, "Failed to restart watchdog event source: %m");
1635 r
= sd_event_source_set_enabled(s
->watchdog_event_source
, SD_EVENT_ON
);
1637 return log_error_errno(r
, "Failed to enable watchdog event source: %m");
1642 static int server_connect_notify(Server
*s
) {
1643 union sockaddr_union sa
= {
1644 .un
.sun_family
= AF_UNIX
,
1650 assert(s
->notify_fd
< 0);
1651 assert(!s
->notify_event_source
);
1654 So here's the problem: we'd like to send notification
1655 messages to PID 1, but we cannot do that via sd_notify(),
1656 since that's synchronous, and we might end up blocking on
1657 it. Specifically: given that PID 1 might block on
1658 dbus-daemon during IPC, and dbus-daemon is logging to us,
1659 and might hence block on us, we might end up in a deadlock
1660 if we block on sending PID 1 notification messages -- by
1661 generating a full blocking circle. To avoid this, let's
1662 create a non-blocking socket, and connect it to the
1663 notification socket, and then wait for POLLOUT before we
1664 send anything. This should efficiently avoid any deadlocks,
1665 as we'll never block on PID 1, hence PID 1 can safely block
1666 on dbus-daemon which can safely block on us again.
1668 Don't think that this issue is real? It is, see:
1669 https://github.com/systemd/systemd/issues/1505
1672 e
= getenv("NOTIFY_SOCKET");
1676 if ((e
[0] != '@' && e
[0] != '/') || e
[1] == 0) {
1677 log_error("NOTIFY_SOCKET set to an invalid value: %s", e
);
1681 if (strlen(e
) > sizeof(sa
.un
.sun_path
)) {
1682 log_error("NOTIFY_SOCKET path too long: %s", e
);
1686 s
->notify_fd
= socket(AF_UNIX
, SOCK_DGRAM
|SOCK_CLOEXEC
|SOCK_NONBLOCK
, 0);
1687 if (s
->notify_fd
< 0)
1688 return log_error_errno(errno
, "Failed to create notify socket: %m");
1690 (void) fd_inc_sndbuf(s
->notify_fd
, NOTIFY_SNDBUF_SIZE
);
1692 strncpy(sa
.un
.sun_path
, e
, sizeof(sa
.un
.sun_path
));
1693 if (sa
.un
.sun_path
[0] == '@')
1694 sa
.un
.sun_path
[0] = 0;
1696 r
= connect(s
->notify_fd
, &sa
.sa
, offsetof(struct sockaddr_un
, sun_path
) + strlen(e
));
1698 return log_error_errno(errno
, "Failed to connect to notify socket: %m");
1700 r
= sd_event_add_io(s
->event
, &s
->notify_event_source
, s
->notify_fd
, EPOLLOUT
, dispatch_notify_event
, s
);
1702 return log_error_errno(r
, "Failed to watch notification socket: %m");
1704 if (sd_watchdog_enabled(false, &s
->watchdog_usec
) > 0) {
1705 s
->send_watchdog
= true;
1707 r
= sd_event_add_time(s
->event
, &s
->watchdog_event_source
, CLOCK_MONOTONIC
, now(CLOCK_MONOTONIC
) + s
->watchdog_usec
/2, s
->watchdog_usec
/4, dispatch_watchdog
, s
);
1709 return log_error_errno(r
, "Failed to add watchdog time event: %m");
1712 /* This should fire pretty soon, which we'll use to send the
1718 int server_init(Server
*s
) {
1719 _cleanup_fdset_free_ FDSet
*fds
= NULL
;
1726 s
->syslog_fd
= s
->native_fd
= s
->stdout_fd
= s
->dev_kmsg_fd
= s
->audit_fd
= s
->hostname_fd
= s
->notify_fd
= -1;
1730 s
->watchdog_usec
= USEC_INFINITY
;
1732 s
->sync_interval_usec
= DEFAULT_SYNC_INTERVAL_USEC
;
1733 s
->sync_scheduled
= false;
1735 s
->rate_limit_interval
= DEFAULT_RATE_LIMIT_INTERVAL
;
1736 s
->rate_limit_burst
= DEFAULT_RATE_LIMIT_BURST
;
1738 s
->forward_to_wall
= true;
1740 s
->max_file_usec
= DEFAULT_MAX_FILE_USEC
;
1742 s
->max_level_store
= LOG_DEBUG
;
1743 s
->max_level_syslog
= LOG_DEBUG
;
1744 s
->max_level_kmsg
= LOG_NOTICE
;
1745 s
->max_level_console
= LOG_INFO
;
1746 s
->max_level_wall
= LOG_EMERG
;
1748 journal_reset_metrics(&s
->system_metrics
);
1749 journal_reset_metrics(&s
->runtime_metrics
);
1751 server_parse_config_file(s
);
1752 server_parse_proc_cmdline(s
);
1754 if (!!s
->rate_limit_interval
^ !!s
->rate_limit_burst
) {
1755 log_debug("Setting both rate limit interval and burst from "USEC_FMT
",%u to 0,0",
1756 s
->rate_limit_interval
, s
->rate_limit_burst
);
1757 s
->rate_limit_interval
= s
->rate_limit_burst
= 0;
1760 (void) mkdir_p("/run/systemd/journal", 0755);
1762 s
->user_journals
= ordered_hashmap_new(NULL
);
1763 if (!s
->user_journals
)
1766 s
->mmap
= mmap_cache_new();
1770 r
= sd_event_default(&s
->event
);
1772 return log_error_errno(r
, "Failed to create event loop: %m");
1774 n
= sd_listen_fds(true);
1776 return log_error_errno(n
, "Failed to read listening file descriptors from environment: %m");
1778 for (fd
= SD_LISTEN_FDS_START
; fd
< SD_LISTEN_FDS_START
+ n
; fd
++) {
1780 if (sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/run/systemd/journal/socket", 0) > 0) {
1782 if (s
->native_fd
>= 0) {
1783 log_error("Too many native sockets passed.");
1789 } else if (sd_is_socket_unix(fd
, SOCK_STREAM
, 1, "/run/systemd/journal/stdout", 0) > 0) {
1791 if (s
->stdout_fd
>= 0) {
1792 log_error("Too many stdout sockets passed.");
1798 } else if (sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/dev/log", 0) > 0 ||
1799 sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/run/systemd/journal/dev-log", 0) > 0) {
1801 if (s
->syslog_fd
>= 0) {
1802 log_error("Too many /dev/log sockets passed.");
1808 } else if (sd_is_socket(fd
, AF_NETLINK
, SOCK_RAW
, -1) > 0) {
1810 if (s
->audit_fd
>= 0) {
1811 log_error("Too many audit sockets passed.");
1825 r
= fdset_put(fds
, fd
);
1831 /* Try to restore streams, but don't bother if this fails */
1832 (void) server_restore_streams(s
, fds
);
1834 if (fdset_size(fds
) > 0) {
1835 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds
));
1836 fds
= fdset_free(fds
);
1839 no_sockets
= s
->native_fd
< 0 && s
->stdout_fd
< 0 && s
->syslog_fd
< 0 && s
->audit_fd
< 0;
1841 /* always open stdout, syslog, native, and kmsg sockets */
1843 /* systemd-journald.socket: /run/systemd/journal/stdout */
1844 r
= server_open_stdout_socket(s
);
1848 /* systemd-journald-dev-log.socket: /run/systemd/journal/dev-log */
1849 r
= server_open_syslog_socket(s
);
1853 /* systemd-journald.socket: /run/systemd/journal/socket */
1854 r
= server_open_native_socket(s
);
1859 r
= server_open_dev_kmsg(s
);
1863 /* Unless we got *some* sockets and not audit, open audit socket */
1864 if (s
->audit_fd
>= 0 || no_sockets
) {
1865 r
= server_open_audit(s
);
1870 r
= server_open_kernel_seqnum(s
);
1874 r
= server_open_hostname(s
);
1878 r
= setup_signals(s
);
1882 s
->udev
= udev_new();
1886 s
->rate_limit
= journal_rate_limit_new(s
->rate_limit_interval
, s
->rate_limit_burst
);
1890 r
= cg_get_root_path(&s
->cgroup_root
);
1894 server_cache_hostname(s
);
1895 server_cache_boot_id(s
);
1896 server_cache_machine_id(s
);
1898 (void) server_connect_notify(s
);
1900 return system_journal_open(s
, false);
1903 void server_maybe_append_tags(Server
*s
) {
1909 n
= now(CLOCK_REALTIME
);
1911 if (s
->system_journal
)
1912 journal_file_maybe_append_tag(s
->system_journal
, n
);
1914 ORDERED_HASHMAP_FOREACH(f
, s
->user_journals
, i
)
1915 journal_file_maybe_append_tag(f
, n
);
1919 void server_done(Server
*s
) {
1923 while (s
->stdout_streams
)
1924 stdout_stream_free(s
->stdout_streams
);
1926 if (s
->system_journal
)
1927 journal_file_close(s
->system_journal
);
1929 if (s
->runtime_journal
)
1930 journal_file_close(s
->runtime_journal
);
1932 while ((f
= ordered_hashmap_steal_first(s
->user_journals
)))
1933 journal_file_close(f
);
1935 ordered_hashmap_free(s
->user_journals
);
1937 sd_event_source_unref(s
->syslog_event_source
);
1938 sd_event_source_unref(s
->native_event_source
);
1939 sd_event_source_unref(s
->stdout_event_source
);
1940 sd_event_source_unref(s
->dev_kmsg_event_source
);
1941 sd_event_source_unref(s
->audit_event_source
);
1942 sd_event_source_unref(s
->sync_event_source
);
1943 sd_event_source_unref(s
->sigusr1_event_source
);
1944 sd_event_source_unref(s
->sigusr2_event_source
);
1945 sd_event_source_unref(s
->sigterm_event_source
);
1946 sd_event_source_unref(s
->sigint_event_source
);
1947 sd_event_source_unref(s
->sigrtmin1_event_source
);
1948 sd_event_source_unref(s
->hostname_event_source
);
1949 sd_event_source_unref(s
->notify_event_source
);
1950 sd_event_source_unref(s
->watchdog_event_source
);
1951 sd_event_unref(s
->event
);
1953 safe_close(s
->syslog_fd
);
1954 safe_close(s
->native_fd
);
1955 safe_close(s
->stdout_fd
);
1956 safe_close(s
->dev_kmsg_fd
);
1957 safe_close(s
->audit_fd
);
1958 safe_close(s
->hostname_fd
);
1959 safe_close(s
->notify_fd
);
1962 journal_rate_limit_free(s
->rate_limit
);
1964 if (s
->kernel_seqnum
)
1965 munmap(s
->kernel_seqnum
, sizeof(uint64_t));
1969 free(s
->cgroup_root
);
1970 free(s
->hostname_field
);
1973 mmap_cache_unref(s
->mmap
);
1975 udev_unref(s
->udev
);
1978 static const char* const storage_table
[_STORAGE_MAX
] = {
1979 [STORAGE_AUTO
] = "auto",
1980 [STORAGE_VOLATILE
] = "volatile",
1981 [STORAGE_PERSISTENT
] = "persistent",
1982 [STORAGE_NONE
] = "none"
1985 DEFINE_STRING_TABLE_LOOKUP(storage
, Storage
);
1986 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage
, storage
, Storage
, "Failed to parse storage setting");
1988 static const char* const split_mode_table
[_SPLIT_MAX
] = {
1989 [SPLIT_LOGIN
] = "login",
1990 [SPLIT_UID
] = "uid",
1991 [SPLIT_NONE
] = "none",
1994 DEFINE_STRING_TABLE_LOOKUP(split_mode
, SplitMode
);
1995 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode
, split_mode
, SplitMode
, "Failed to parse split mode setting");