2 This file is part of systemd.
4 Copyright 2011 Lennart Poettering
6 systemd is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public License
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
21 #include <selinux/selinux.h>
23 #include <sys/ioctl.h>
25 #include <sys/signalfd.h>
26 #include <sys/statvfs.h>
27 #include <linux/sockios.h>
30 #include "sd-daemon.h"
31 #include "sd-journal.h"
32 #include "sd-messages.h"
35 #include "alloc-util.h"
36 #include "audit-util.h"
37 #include "cgroup-util.h"
38 #include "conf-parser.h"
39 #include "dirent-util.h"
40 #include "extract-word.h"
43 #include "formats-util.h"
46 #include "hostname-util.h"
48 #include "journal-authenticate.h"
49 #include "journal-file.h"
50 #include "journal-internal.h"
51 #include "journal-vacuum.h"
52 #include "journald-audit.h"
53 #include "journald-kmsg.h"
54 #include "journald-native.h"
55 #include "journald-rate-limit.h"
56 #include "journald-server.h"
57 #include "journald-stream.h"
58 #include "journald-syslog.h"
61 #include "parse-util.h"
62 #include "proc-cmdline.h"
63 #include "process-util.h"
65 #include "selinux-util.h"
66 #include "signal-util.h"
67 #include "socket-util.h"
68 #include "stdio-util.h"
69 #include "string-table.h"
70 #include "string-util.h"
71 #include "user-util.h"
74 #define USER_JOURNALS_MAX 1024
76 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
77 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
78 #define DEFAULT_RATE_LIMIT_BURST 1000
79 #define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
81 #define RECHECK_SPACE_USEC (30*USEC_PER_SEC)
83 #define NOTIFY_SNDBUF_SIZE (8*1024*1024)
85 /* The period to insert between posting changes for coalescing */
86 #define POST_CHANGE_TIMER_INTERVAL_USEC (250*USEC_PER_MSEC)
88 static int determine_space_for(
90 JournalMetrics
*metrics
,
98 uint64_t sum
= 0, ss_avail
, avail
;
99 _cleanup_closedir_
DIR *d
= NULL
;
110 ts
= now(CLOCK_MONOTONIC
);
112 if (!verbose
&& s
->cached_space_timestamp
+ RECHECK_SPACE_USEC
> ts
) {
115 *available
= s
->cached_space_available
;
117 *limit
= s
->cached_space_limit
;
122 p
= strjoina(path
, SERVER_MACHINE_ID(s
));
125 return log_full_errno(errno
== ENOENT
? LOG_DEBUG
: LOG_ERR
, errno
, "Failed to open %s: %m", p
);
127 if (fstatvfs(dirfd(d
), &ss
) < 0)
128 return log_error_errno(errno
, "Failed to fstatvfs(%s): %m", p
);
130 FOREACH_DIRENT_ALL(de
, d
, break) {
133 if (!endswith(de
->d_name
, ".journal") &&
134 !endswith(de
->d_name
, ".journal~"))
137 if (fstatat(dirfd(d
), de
->d_name
, &st
, AT_SYMLINK_NOFOLLOW
) < 0) {
138 log_debug_errno(errno
, "Failed to stat %s/%s, ignoring: %m", p
, de
->d_name
);
142 if (!S_ISREG(st
.st_mode
))
145 sum
+= (uint64_t) st
.st_blocks
* 512UL;
148 /* If requested, then let's bump the min_use limit to the
149 * current usage on disk. We do this when starting up and
150 * first opening the journal files. This way sudden spikes in
151 * disk usage will not cause journald to vacuum files without
152 * bounds. Note that this means that only a restart of
153 * journald will make it reset this value. */
156 metrics
->min_use
= MAX(metrics
->min_use
, sum
);
158 ss_avail
= ss
.f_bsize
* ss
.f_bavail
;
159 avail
= LESS_BY(ss_avail
, metrics
->keep_free
);
161 s
->cached_space_limit
= MIN(MAX(sum
+ avail
, metrics
->min_use
), metrics
->max_use
);
162 s
->cached_space_available
= LESS_BY(s
->cached_space_limit
, sum
);
163 s
->cached_space_timestamp
= ts
;
166 char fb1
[FORMAT_BYTES_MAX
], fb2
[FORMAT_BYTES_MAX
], fb3
[FORMAT_BYTES_MAX
],
167 fb4
[FORMAT_BYTES_MAX
], fb5
[FORMAT_BYTES_MAX
], fb6
[FORMAT_BYTES_MAX
];
168 format_bytes(fb1
, sizeof(fb1
), sum
);
169 format_bytes(fb2
, sizeof(fb2
), metrics
->max_use
);
170 format_bytes(fb3
, sizeof(fb3
), metrics
->keep_free
);
171 format_bytes(fb4
, sizeof(fb4
), ss_avail
);
172 format_bytes(fb5
, sizeof(fb5
), s
->cached_space_limit
);
173 format_bytes(fb6
, sizeof(fb6
), s
->cached_space_available
);
175 server_driver_message(s
, SD_MESSAGE_JOURNAL_USAGE
,
176 LOG_MESSAGE("%s (%s) is %s, max %s, %s free.",
177 name
, path
, fb1
, fb5
, fb6
),
178 "JOURNAL_NAME=%s", name
,
179 "JOURNAL_PATH=%s", path
,
180 "CURRENT_USE=%"PRIu64
, sum
,
181 "CURRENT_USE_PRETTY=%s", fb1
,
182 "MAX_USE=%"PRIu64
, metrics
->max_use
,
183 "MAX_USE_PRETTY=%s", fb2
,
184 "DISK_KEEP_FREE=%"PRIu64
, metrics
->keep_free
,
185 "DISK_KEEP_FREE_PRETTY=%s", fb3
,
186 "DISK_AVAILABLE=%"PRIu64
, ss_avail
,
187 "DISK_AVAILABLE_PRETTY=%s", fb4
,
188 "LIMIT=%"PRIu64
, s
->cached_space_limit
,
189 "LIMIT_PRETTY=%s", fb5
,
190 "AVAILABLE=%"PRIu64
, s
->cached_space_available
,
191 "AVAILABLE_PRETTY=%s", fb6
,
196 *available
= s
->cached_space_available
;
198 *limit
= s
->cached_space_limit
;
203 static int determine_space(Server
*s
, bool verbose
, bool patch_min_use
, uint64_t *available
, uint64_t *limit
) {
204 JournalMetrics
*metrics
;
205 const char *path
, *name
;
209 if (s
->system_journal
) {
210 path
= "/var/log/journal/";
211 metrics
= &s
->system_metrics
;
212 name
= "System journal";
214 path
= "/run/log/journal/";
215 metrics
= &s
->runtime_metrics
;
216 name
= "Runtime journal";
219 return determine_space_for(s
, metrics
, path
, name
, verbose
, patch_min_use
, available
, limit
);
222 static void server_add_acls(JournalFile
*f
, uid_t uid
) {
229 if (uid
<= SYSTEM_UID_MAX
)
232 r
= add_acls_for_user(f
->fd
, uid
);
234 log_warning_errno(r
, "Failed to set ACL on %s, ignoring: %m", f
->path
);
238 static int open_journal(
244 JournalMetrics
*metrics
,
254 r
= journal_file_open_reliably(fname
, flags
, 0640, s
->compress
, seal
, metrics
, s
->mmap
, s
->deferred_closes
, NULL
, &f
);
256 r
= journal_file_open(fname
, flags
, 0640, s
->compress
, seal
, metrics
, s
->mmap
, s
->deferred_closes
, NULL
, &f
);
260 r
= journal_file_enable_post_change_timer(f
, s
->event
, POST_CHANGE_TIMER_INTERVAL_USEC
);
262 (void) journal_file_close(f
);
270 static JournalFile
* find_journal(Server
*s
, uid_t uid
) {
271 _cleanup_free_
char *p
= NULL
;
278 /* We split up user logs only on /var, not on /run. If the
279 * runtime file is open, we write to it exclusively, in order
280 * to guarantee proper order as soon as we flush /run to
281 * /var and close the runtime file. */
283 if (s
->runtime_journal
)
284 return s
->runtime_journal
;
286 if (uid
<= SYSTEM_UID_MAX
)
287 return s
->system_journal
;
289 r
= sd_id128_get_machine(&machine
);
291 return s
->system_journal
;
293 f
= ordered_hashmap_get(s
->user_journals
, UID_TO_PTR(uid
));
297 if (asprintf(&p
, "/var/log/journal/" SD_ID128_FORMAT_STR
"/user-"UID_FMT
".journal",
298 SD_ID128_FORMAT_VAL(machine
), uid
) < 0)
299 return s
->system_journal
;
301 while (ordered_hashmap_size(s
->user_journals
) >= USER_JOURNALS_MAX
) {
302 /* Too many open? Then let's close one */
303 f
= ordered_hashmap_steal_first(s
->user_journals
);
305 (void) journal_file_close(f
);
308 r
= open_journal(s
, true, p
, O_RDWR
|O_CREAT
, s
->seal
, &s
->system_metrics
, &f
);
310 return s
->system_journal
;
312 server_add_acls(f
, uid
);
314 r
= ordered_hashmap_put(s
->user_journals
, UID_TO_PTR(uid
), f
);
316 (void) journal_file_close(f
);
317 return s
->system_journal
;
323 static int do_rotate(
336 r
= journal_file_rotate(f
, s
->compress
, seal
, s
->deferred_closes
);
339 log_error_errno(r
, "Failed to rotate %s: %m", (*f
)->path
);
341 log_error_errno(r
, "Failed to create new %s journal: %m", name
);
343 server_add_acls(*f
, uid
);
348 void server_rotate(Server
*s
) {
354 log_debug("Rotating...");
356 (void) do_rotate(s
, &s
->runtime_journal
, "runtime", false, 0);
357 (void) do_rotate(s
, &s
->system_journal
, "system", s
->seal
, 0);
359 ORDERED_HASHMAP_FOREACH_KEY(f
, k
, s
->user_journals
, i
) {
360 r
= do_rotate(s
, &f
, "user", s
->seal
, PTR_TO_UID(k
));
362 ordered_hashmap_replace(s
->user_journals
, k
, f
);
364 /* Old file has been closed and deallocated */
365 ordered_hashmap_remove(s
->user_journals
, k
);
368 /* Perform any deferred closes which aren't still offlining. */
369 SET_FOREACH(f
, s
->deferred_closes
, i
)
370 if (!journal_file_is_offlining(f
)) {
371 (void) set_remove(s
->deferred_closes
, f
);
372 (void) journal_file_close(f
);
376 void server_sync(Server
*s
) {
381 if (s
->system_journal
) {
382 r
= journal_file_set_offline(s
->system_journal
, false);
384 log_warning_errno(r
, "Failed to sync system journal, ignoring: %m");
387 ORDERED_HASHMAP_FOREACH(f
, s
->user_journals
, i
) {
388 r
= journal_file_set_offline(f
, false);
390 log_warning_errno(r
, "Failed to sync user journal, ignoring: %m");
393 if (s
->sync_event_source
) {
394 r
= sd_event_source_set_enabled(s
->sync_event_source
, SD_EVENT_OFF
);
396 log_error_errno(r
, "Failed to disable sync timer source: %m");
399 s
->sync_scheduled
= false;
402 static void do_vacuum(
405 JournalMetrics
*metrics
,
409 bool patch_min_use
) {
423 p
= strjoina(path
, SERVER_MACHINE_ID(s
));
425 limit
= metrics
->max_use
;
426 (void) determine_space_for(s
, metrics
, path
, name
, verbose
, patch_min_use
, NULL
, &limit
);
428 r
= journal_directory_vacuum(p
, limit
, metrics
->n_max_files
, s
->max_retention_usec
, &s
->oldest_file_usec
, verbose
);
429 if (r
< 0 && r
!= -ENOENT
)
430 log_warning_errno(r
, "Failed to vacuum %s, ignoring: %m", p
);
433 int server_vacuum(Server
*s
, bool verbose
, bool patch_min_use
) {
436 log_debug("Vacuuming...");
438 s
->oldest_file_usec
= 0;
440 do_vacuum(s
, s
->system_journal
, &s
->system_metrics
, "/var/log/journal/", "System journal", verbose
, patch_min_use
);
441 do_vacuum(s
, s
->runtime_journal
, &s
->runtime_metrics
, "/run/log/journal/", "Runtime journal", verbose
, patch_min_use
);
443 s
->cached_space_limit
= 0;
444 s
->cached_space_available
= 0;
445 s
->cached_space_timestamp
= 0;
450 static void server_cache_machine_id(Server
*s
) {
456 r
= sd_id128_get_machine(&id
);
460 sd_id128_to_string(id
, stpcpy(s
->machine_id_field
, "_MACHINE_ID="));
463 static void server_cache_boot_id(Server
*s
) {
469 r
= sd_id128_get_boot(&id
);
473 sd_id128_to_string(id
, stpcpy(s
->boot_id_field
, "_BOOT_ID="));
476 static void server_cache_hostname(Server
*s
) {
477 _cleanup_free_
char *t
= NULL
;
482 t
= gethostname_malloc();
486 x
= strappend("_HOSTNAME=", t
);
490 free(s
->hostname_field
);
491 s
->hostname_field
= x
;
494 static bool shall_try_append_again(JournalFile
*f
, int r
) {
496 /* -E2BIG Hit configured limit
498 -EDQUOT Quota limit hit
500 -EIO I/O error of some kind (mmap)
501 -EHOSTDOWN Other machine
502 -EBUSY Unclean shutdown
503 -EPROTONOSUPPORT Unsupported feature
506 -ESHUTDOWN Already archived
507 -EIDRM Journal file has been deleted */
509 if (r
== -E2BIG
|| r
== -EFBIG
|| r
== -EDQUOT
|| r
== -ENOSPC
)
510 log_debug("%s: Allocation limit reached, rotating.", f
->path
);
511 else if (r
== -EHOSTDOWN
)
512 log_info("%s: Journal file from other machine, rotating.", f
->path
);
513 else if (r
== -EBUSY
)
514 log_info("%s: Unclean shutdown, rotating.", f
->path
);
515 else if (r
== -EPROTONOSUPPORT
)
516 log_info("%s: Unsupported feature, rotating.", f
->path
);
517 else if (r
== -EBADMSG
|| r
== -ENODATA
|| r
== ESHUTDOWN
)
518 log_warning("%s: Journal file corrupted, rotating.", f
->path
);
520 log_warning("%s: IO error, rotating.", f
->path
);
521 else if (r
== -EIDRM
)
522 log_warning("%s: Journal file has been deleted, rotating.", f
->path
);
529 static void write_to_journal(Server
*s
, uid_t uid
, struct iovec
*iovec
, unsigned n
, int priority
) {
531 bool vacuumed
= false;
538 f
= find_journal(s
, uid
);
542 if (journal_file_rotate_suggested(f
, s
->max_file_usec
)) {
543 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f
->path
);
545 server_vacuum(s
, false, false);
548 f
= find_journal(s
, uid
);
553 r
= journal_file_append_entry(f
, NULL
, iovec
, n
, &s
->seqnum
, NULL
, NULL
);
555 server_schedule_sync(s
, priority
);
559 if (vacuumed
|| !shall_try_append_again(f
, r
)) {
560 log_error_errno(r
, "Failed to write entry (%d items, %zu bytes), ignoring: %m", n
, IOVEC_TOTAL_SIZE(iovec
, n
));
565 server_vacuum(s
, false, false);
567 f
= find_journal(s
, uid
);
571 log_debug("Retrying write.");
572 r
= journal_file_append_entry(f
, NULL
, iovec
, n
, &s
->seqnum
, NULL
, NULL
);
574 log_error_errno(r
, "Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %m", n
, IOVEC_TOTAL_SIZE(iovec
, n
));
576 server_schedule_sync(s
, priority
);
579 static void dispatch_message_real(
581 struct iovec
*iovec
, unsigned n
, unsigned m
,
582 const struct ucred
*ucred
,
583 const struct timeval
*tv
,
584 const char *label
, size_t label_len
,
589 char pid
[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t
)],
590 uid
[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t
)],
591 gid
[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t
)],
592 owner_uid
[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t
)],
593 source_time
[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t
)],
594 o_uid
[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t
)],
595 o_gid
[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t
)],
596 o_owner_uid
[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t
)];
602 uid_t realuid
= 0, owner
= 0, journal_uid
;
603 bool owner_valid
= false;
605 char audit_session
[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
606 audit_loginuid
[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t
)],
607 o_audit_session
[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
608 o_audit_loginuid
[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t
)];
617 assert(n
+ N_IOVEC_META_FIELDS
+ (object_pid
? N_IOVEC_OBJECT_FIELDS
: 0) <= m
);
620 realuid
= ucred
->uid
;
622 sprintf(pid
, "_PID="PID_FMT
, ucred
->pid
);
623 IOVEC_SET_STRING(iovec
[n
++], pid
);
625 sprintf(uid
, "_UID="UID_FMT
, ucred
->uid
);
626 IOVEC_SET_STRING(iovec
[n
++], uid
);
628 sprintf(gid
, "_GID="GID_FMT
, ucred
->gid
);
629 IOVEC_SET_STRING(iovec
[n
++], gid
);
631 r
= get_process_comm(ucred
->pid
, &t
);
633 x
= strjoina("_COMM=", t
);
635 IOVEC_SET_STRING(iovec
[n
++], x
);
638 r
= get_process_exe(ucred
->pid
, &t
);
640 x
= strjoina("_EXE=", t
);
642 IOVEC_SET_STRING(iovec
[n
++], x
);
645 r
= get_process_cmdline(ucred
->pid
, 0, false, &t
);
647 x
= strjoina("_CMDLINE=", t
);
649 IOVEC_SET_STRING(iovec
[n
++], x
);
652 r
= get_process_capeff(ucred
->pid
, &t
);
654 x
= strjoina("_CAP_EFFECTIVE=", t
);
656 IOVEC_SET_STRING(iovec
[n
++], x
);
660 r
= audit_session_from_pid(ucred
->pid
, &audit
);
662 sprintf(audit_session
, "_AUDIT_SESSION=%"PRIu32
, audit
);
663 IOVEC_SET_STRING(iovec
[n
++], audit_session
);
666 r
= audit_loginuid_from_pid(ucred
->pid
, &loginuid
);
668 sprintf(audit_loginuid
, "_AUDIT_LOGINUID="UID_FMT
, loginuid
);
669 IOVEC_SET_STRING(iovec
[n
++], audit_loginuid
);
673 r
= cg_pid_get_path_shifted(ucred
->pid
, s
->cgroup_root
, &c
);
675 char *session
= NULL
;
677 x
= strjoina("_SYSTEMD_CGROUP=", c
);
678 IOVEC_SET_STRING(iovec
[n
++], x
);
680 r
= cg_path_get_session(c
, &t
);
682 session
= strjoina("_SYSTEMD_SESSION=", t
);
684 IOVEC_SET_STRING(iovec
[n
++], session
);
687 if (cg_path_get_owner_uid(c
, &owner
) >= 0) {
690 sprintf(owner_uid
, "_SYSTEMD_OWNER_UID="UID_FMT
, owner
);
691 IOVEC_SET_STRING(iovec
[n
++], owner_uid
);
694 if (cg_path_get_unit(c
, &t
) >= 0) {
695 x
= strjoina("_SYSTEMD_UNIT=", t
);
697 IOVEC_SET_STRING(iovec
[n
++], x
);
698 } else if (unit_id
&& !session
) {
699 x
= strjoina("_SYSTEMD_UNIT=", unit_id
);
700 IOVEC_SET_STRING(iovec
[n
++], x
);
703 if (cg_path_get_user_unit(c
, &t
) >= 0) {
704 x
= strjoina("_SYSTEMD_USER_UNIT=", t
);
706 IOVEC_SET_STRING(iovec
[n
++], x
);
707 } else if (unit_id
&& session
) {
708 x
= strjoina("_SYSTEMD_USER_UNIT=", unit_id
);
709 IOVEC_SET_STRING(iovec
[n
++], x
);
712 if (cg_path_get_slice(c
, &t
) >= 0) {
713 x
= strjoina("_SYSTEMD_SLICE=", t
);
715 IOVEC_SET_STRING(iovec
[n
++], x
);
719 } else if (unit_id
) {
720 x
= strjoina("_SYSTEMD_UNIT=", unit_id
);
721 IOVEC_SET_STRING(iovec
[n
++], x
);
725 if (mac_selinux_have()) {
727 x
= alloca(strlen("_SELINUX_CONTEXT=") + label_len
+ 1);
729 *((char*) mempcpy(stpcpy(x
, "_SELINUX_CONTEXT="), label
, label_len
)) = 0;
730 IOVEC_SET_STRING(iovec
[n
++], x
);
732 security_context_t con
;
734 if (getpidcon(ucred
->pid
, &con
) >= 0) {
735 x
= strjoina("_SELINUX_CONTEXT=", con
);
738 IOVEC_SET_STRING(iovec
[n
++], x
);
747 r
= get_process_uid(object_pid
, &object_uid
);
749 sprintf(o_uid
, "OBJECT_UID="UID_FMT
, object_uid
);
750 IOVEC_SET_STRING(iovec
[n
++], o_uid
);
753 r
= get_process_gid(object_pid
, &object_gid
);
755 sprintf(o_gid
, "OBJECT_GID="GID_FMT
, object_gid
);
756 IOVEC_SET_STRING(iovec
[n
++], o_gid
);
759 r
= get_process_comm(object_pid
, &t
);
761 x
= strjoina("OBJECT_COMM=", t
);
763 IOVEC_SET_STRING(iovec
[n
++], x
);
766 r
= get_process_exe(object_pid
, &t
);
768 x
= strjoina("OBJECT_EXE=", t
);
770 IOVEC_SET_STRING(iovec
[n
++], x
);
773 r
= get_process_cmdline(object_pid
, 0, false, &t
);
775 x
= strjoina("OBJECT_CMDLINE=", t
);
777 IOVEC_SET_STRING(iovec
[n
++], x
);
781 r
= audit_session_from_pid(object_pid
, &audit
);
783 sprintf(o_audit_session
, "OBJECT_AUDIT_SESSION=%"PRIu32
, audit
);
784 IOVEC_SET_STRING(iovec
[n
++], o_audit_session
);
787 r
= audit_loginuid_from_pid(object_pid
, &loginuid
);
789 sprintf(o_audit_loginuid
, "OBJECT_AUDIT_LOGINUID="UID_FMT
, loginuid
);
790 IOVEC_SET_STRING(iovec
[n
++], o_audit_loginuid
);
794 r
= cg_pid_get_path_shifted(object_pid
, s
->cgroup_root
, &c
);
796 x
= strjoina("OBJECT_SYSTEMD_CGROUP=", c
);
797 IOVEC_SET_STRING(iovec
[n
++], x
);
799 r
= cg_path_get_session(c
, &t
);
801 x
= strjoina("OBJECT_SYSTEMD_SESSION=", t
);
803 IOVEC_SET_STRING(iovec
[n
++], x
);
806 if (cg_path_get_owner_uid(c
, &owner
) >= 0) {
807 sprintf(o_owner_uid
, "OBJECT_SYSTEMD_OWNER_UID="UID_FMT
, owner
);
808 IOVEC_SET_STRING(iovec
[n
++], o_owner_uid
);
811 if (cg_path_get_unit(c
, &t
) >= 0) {
812 x
= strjoina("OBJECT_SYSTEMD_UNIT=", t
);
814 IOVEC_SET_STRING(iovec
[n
++], x
);
817 if (cg_path_get_user_unit(c
, &t
) >= 0) {
818 x
= strjoina("OBJECT_SYSTEMD_USER_UNIT=", t
);
820 IOVEC_SET_STRING(iovec
[n
++], x
);
829 sprintf(source_time
, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv
));
830 IOVEC_SET_STRING(iovec
[n
++], source_time
);
833 /* Note that strictly speaking storing the boot id here is
834 * redundant since the entry includes this in-line
835 * anyway. However, we need this indexed, too. */
836 if (!isempty(s
->boot_id_field
))
837 IOVEC_SET_STRING(iovec
[n
++], s
->boot_id_field
);
839 if (!isempty(s
->machine_id_field
))
840 IOVEC_SET_STRING(iovec
[n
++], s
->machine_id_field
);
842 if (!isempty(s
->hostname_field
))
843 IOVEC_SET_STRING(iovec
[n
++], s
->hostname_field
);
847 if (s
->split_mode
== SPLIT_UID
&& realuid
> 0)
848 /* Split up strictly by any UID */
849 journal_uid
= realuid
;
850 else if (s
->split_mode
== SPLIT_LOGIN
&& realuid
> 0 && owner_valid
&& owner
> 0)
851 /* Split up by login UIDs. We do this only if the
852 * realuid is not root, in order not to accidentally
853 * leak privileged information to the user that is
854 * logged by a privileged process that is part of an
855 * unprivileged session. */
860 write_to_journal(s
, journal_uid
, iovec
, n
, priority
);
863 void server_driver_message(Server
*s
, sd_id128_t message_id
, const char *format
, ...) {
864 char mid
[11 + 32 + 1];
865 struct iovec iovec
[N_IOVEC_META_FIELDS
+ 5 + N_IOVEC_PAYLOAD_FIELDS
];
869 struct ucred ucred
= {};
874 assert_cc(3 == LOG_FAC(LOG_DAEMON
));
875 IOVEC_SET_STRING(iovec
[n
++], "SYSLOG_FACILITY=3");
876 IOVEC_SET_STRING(iovec
[n
++], "SYSLOG_IDENTIFIER=systemd-journald");
878 IOVEC_SET_STRING(iovec
[n
++], "_TRANSPORT=driver");
879 assert_cc(6 == LOG_INFO
);
880 IOVEC_SET_STRING(iovec
[n
++], "PRIORITY=6");
882 if (!sd_id128_equal(message_id
, SD_ID128_NULL
)) {
883 snprintf(mid
, sizeof(mid
), LOG_MESSAGE_ID(message_id
));
884 IOVEC_SET_STRING(iovec
[n
++], mid
);
889 va_start(ap
, format
);
890 r
= log_format_iovec(iovec
, ELEMENTSOF(iovec
), &n
, false, 0, format
, ap
);
891 /* Error handling below */
894 ucred
.pid
= getpid();
895 ucred
.uid
= getuid();
896 ucred
.gid
= getgid();
899 dispatch_message_real(s
, iovec
, n
, ELEMENTSOF(iovec
), &ucred
, NULL
, NULL
, 0, NULL
, LOG_INFO
, 0);
902 free(iovec
[m
++].iov_base
);
905 /* We failed to format the message. Emit a warning instead. */
908 xsprintf(buf
, "MESSAGE=Entry printing failed: %s", strerror(-r
));
911 IOVEC_SET_STRING(iovec
[n
++], "PRIORITY=4");
912 IOVEC_SET_STRING(iovec
[n
++], buf
);
913 dispatch_message_real(s
, iovec
, n
, ELEMENTSOF(iovec
), &ucred
, NULL
, NULL
, 0, NULL
, LOG_INFO
, 0);
917 void server_dispatch_message(
919 struct iovec
*iovec
, unsigned n
, unsigned m
,
920 const struct ucred
*ucred
,
921 const struct timeval
*tv
,
922 const char *label
, size_t label_len
,
928 _cleanup_free_
char *path
= NULL
;
929 uint64_t available
= 0;
933 assert(iovec
|| n
== 0);
938 if (LOG_PRI(priority
) > s
->max_level_store
)
941 /* Stop early in case the information will not be stored
943 if (s
->storage
== STORAGE_NONE
)
949 r
= cg_pid_get_path_shifted(ucred
->pid
, s
->cgroup_root
, &path
);
953 /* example: /user/lennart/3/foobar
954 * /system/dbus.service/foobar
956 * So let's cut of everything past the third /, since that is
957 * where user directories start */
959 c
= strchr(path
, '/');
961 c
= strchr(c
+1, '/');
963 c
= strchr(c
+1, '/');
969 (void) determine_space(s
, false, false, &available
, NULL
);
970 rl
= journal_rate_limit_test(s
->rate_limit
, path
, priority
& LOG_PRIMASK
, available
);
974 /* Write a suppression message if we suppressed something */
976 server_driver_message(s
, SD_MESSAGE_JOURNAL_DROPPED
,
977 LOG_MESSAGE("Suppressed %u messages from %s", rl
- 1, path
),
981 dispatch_message_real(s
, iovec
, n
, m
, ucred
, tv
, label
, label_len
, unit_id
, priority
, object_pid
);
985 static int system_journal_open(Server
*s
, bool flush_requested
) {
989 if (!s
->system_journal
&&
990 (s
->storage
== STORAGE_PERSISTENT
|| s
->storage
== STORAGE_AUTO
) &&
992 || access("/run/systemd/journal/flushed", F_OK
) >= 0)) {
994 /* If in auto mode: first try to create the machine
995 * path, but not the prefix.
997 * If in persistent mode: create /var/log/journal and
998 * the machine path */
1000 if (s
->storage
== STORAGE_PERSISTENT
)
1001 (void) mkdir_p("/var/log/journal/", 0755);
1003 fn
= strjoina("/var/log/journal/", SERVER_MACHINE_ID(s
));
1004 (void) mkdir(fn
, 0755);
1006 fn
= strjoina(fn
, "/system.journal");
1007 r
= open_journal(s
, true, fn
, O_RDWR
|O_CREAT
, s
->seal
, &s
->system_metrics
, &s
->system_journal
);
1009 server_add_acls(s
->system_journal
, 0);
1010 (void) determine_space_for(s
, &s
->system_metrics
, "/var/log/journal/", "System journal", true, true, NULL
, NULL
);
1012 if (r
!= -ENOENT
&& r
!= -EROFS
)
1013 log_warning_errno(r
, "Failed to open system journal: %m");
1019 if (!s
->runtime_journal
&&
1020 (s
->storage
!= STORAGE_NONE
)) {
1022 fn
= strjoina("/run/log/journal/", SERVER_MACHINE_ID(s
), "/system.journal");
1024 if (s
->system_journal
) {
1026 /* Try to open the runtime journal, but only
1027 * if it already exists, so that we can flush
1028 * it into the system journal */
1030 r
= open_journal(s
, false, fn
, O_RDWR
, false, &s
->runtime_metrics
, &s
->runtime_journal
);
1033 log_warning_errno(r
, "Failed to open runtime journal: %m");
1040 /* OK, we really need the runtime journal, so create
1041 * it if necessary. */
1043 (void) mkdir("/run/log", 0755);
1044 (void) mkdir("/run/log/journal", 0755);
1045 (void) mkdir_parents(fn
, 0750);
1047 r
= open_journal(s
, true, fn
, O_RDWR
|O_CREAT
, false, &s
->runtime_metrics
, &s
->runtime_journal
);
1049 return log_error_errno(r
, "Failed to open runtime journal: %m");
1052 if (s
->runtime_journal
) {
1053 server_add_acls(s
->runtime_journal
, 0);
1054 (void) determine_space_for(s
, &s
->runtime_metrics
, "/run/log/journal/", "Runtime journal", true, true, NULL
, NULL
);
1061 int server_flush_to_var(Server
*s
) {
1063 sd_journal
*j
= NULL
;
1064 char ts
[FORMAT_TIMESPAN_MAX
];
1071 if (s
->storage
!= STORAGE_AUTO
&&
1072 s
->storage
!= STORAGE_PERSISTENT
)
1075 if (!s
->runtime_journal
)
1078 (void) system_journal_open(s
, true);
1080 if (!s
->system_journal
)
1083 log_debug("Flushing to /var...");
1085 start
= now(CLOCK_MONOTONIC
);
1087 r
= sd_id128_get_machine(&machine
);
1091 r
= sd_journal_open(&j
, SD_JOURNAL_RUNTIME_ONLY
);
1093 return log_error_errno(r
, "Failed to read runtime journal: %m");
1095 sd_journal_set_data_threshold(j
, 0);
1097 SD_JOURNAL_FOREACH(j
) {
1101 f
= j
->current_file
;
1102 assert(f
&& f
->current_offset
> 0);
1106 r
= journal_file_move_to_object(f
, OBJECT_ENTRY
, f
->current_offset
, &o
);
1108 log_error_errno(r
, "Can't read entry: %m");
1112 r
= journal_file_copy_entry(f
, s
->system_journal
, o
, f
->current_offset
, NULL
, NULL
, NULL
);
1116 if (!shall_try_append_again(s
->system_journal
, r
)) {
1117 log_error_errno(r
, "Can't write entry: %m");
1122 server_vacuum(s
, false, false);
1124 if (!s
->system_journal
) {
1125 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1130 log_debug("Retrying write.");
1131 r
= journal_file_copy_entry(f
, s
->system_journal
, o
, f
->current_offset
, NULL
, NULL
, NULL
);
1133 log_error_errno(r
, "Can't write entry: %m");
1141 journal_file_post_change(s
->system_journal
);
1143 s
->runtime_journal
= journal_file_close(s
->runtime_journal
);
1146 (void) rm_rf("/run/log/journal", REMOVE_ROOT
);
1148 sd_journal_close(j
);
1150 server_driver_message(s
, SD_ID128_NULL
,
1151 LOG_MESSAGE("Time spent on flushing to /var is %s for %u entries.",
1152 format_timespan(ts
, sizeof(ts
), now(CLOCK_MONOTONIC
) - start
, 0),
1159 int server_process_datagram(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1160 Server
*s
= userdata
;
1161 struct ucred
*ucred
= NULL
;
1162 struct timeval
*tv
= NULL
;
1163 struct cmsghdr
*cmsg
;
1165 size_t label_len
= 0, m
;
1168 int *fds
= NULL
, v
= 0;
1172 struct cmsghdr cmsghdr
;
1174 /* We use NAME_MAX space for the SELinux label
1175 * here. The kernel currently enforces no
1176 * limit, but according to suggestions from
1177 * the SELinux people this will change and it
1178 * will probably be identical to NAME_MAX. For
1179 * now we use that, but this should be updated
1180 * one day when the final limit is known. */
1181 uint8_t buf
[CMSG_SPACE(sizeof(struct ucred
)) +
1182 CMSG_SPACE(sizeof(struct timeval
)) +
1183 CMSG_SPACE(sizeof(int)) + /* fd */
1184 CMSG_SPACE(NAME_MAX
)]; /* selinux label */
1187 union sockaddr_union sa
= {};
1189 struct msghdr msghdr
= {
1192 .msg_control
= &control
,
1193 .msg_controllen
= sizeof(control
),
1195 .msg_namelen
= sizeof(sa
),
1199 assert(fd
== s
->native_fd
|| fd
== s
->syslog_fd
|| fd
== s
->audit_fd
);
1201 if (revents
!= EPOLLIN
) {
1202 log_error("Got invalid event from epoll for datagram fd: %"PRIx32
, revents
);
1206 /* Try to get the right size, if we can. (Not all
1207 * sockets support SIOCINQ, hence we just try, but
1208 * don't rely on it. */
1209 (void) ioctl(fd
, SIOCINQ
, &v
);
1211 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1212 m
= PAGE_ALIGN(MAX3((size_t) v
+ 1,
1214 ALIGN(sizeof(struct nlmsghdr
)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH
)) + 1);
1216 if (!GREEDY_REALLOC(s
->buffer
, s
->buffer_size
, m
))
1219 iovec
.iov_base
= s
->buffer
;
1220 iovec
.iov_len
= s
->buffer_size
- 1; /* Leave room for trailing NUL we add later */
1222 n
= recvmsg(fd
, &msghdr
, MSG_DONTWAIT
|MSG_CMSG_CLOEXEC
);
1224 if (errno
== EINTR
|| errno
== EAGAIN
)
1227 return log_error_errno(errno
, "recvmsg() failed: %m");
1230 CMSG_FOREACH(cmsg
, &msghdr
) {
1232 if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1233 cmsg
->cmsg_type
== SCM_CREDENTIALS
&&
1234 cmsg
->cmsg_len
== CMSG_LEN(sizeof(struct ucred
)))
1235 ucred
= (struct ucred
*) CMSG_DATA(cmsg
);
1236 else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1237 cmsg
->cmsg_type
== SCM_SECURITY
) {
1238 label
= (char*) CMSG_DATA(cmsg
);
1239 label_len
= cmsg
->cmsg_len
- CMSG_LEN(0);
1240 } else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1241 cmsg
->cmsg_type
== SO_TIMESTAMP
&&
1242 cmsg
->cmsg_len
== CMSG_LEN(sizeof(struct timeval
)))
1243 tv
= (struct timeval
*) CMSG_DATA(cmsg
);
1244 else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1245 cmsg
->cmsg_type
== SCM_RIGHTS
) {
1246 fds
= (int*) CMSG_DATA(cmsg
);
1247 n_fds
= (cmsg
->cmsg_len
- CMSG_LEN(0)) / sizeof(int);
1251 /* And a trailing NUL, just in case */
1254 if (fd
== s
->syslog_fd
) {
1255 if (n
> 0 && n_fds
== 0)
1256 server_process_syslog_message(s
, strstrip(s
->buffer
), ucred
, tv
, label
, label_len
);
1258 log_warning("Got file descriptors via syslog socket. Ignoring.");
1260 } else if (fd
== s
->native_fd
) {
1261 if (n
> 0 && n_fds
== 0)
1262 server_process_native_message(s
, s
->buffer
, n
, ucred
, tv
, label
, label_len
);
1263 else if (n
== 0 && n_fds
== 1)
1264 server_process_native_file(s
, fds
[0], ucred
, tv
, label
, label_len
);
1266 log_warning("Got too many file descriptors via native socket. Ignoring.");
1269 assert(fd
== s
->audit_fd
);
1271 if (n
> 0 && n_fds
== 0)
1272 server_process_audit_message(s
, s
->buffer
, n
, ucred
, &sa
, msghdr
.msg_namelen
);
1274 log_warning("Got file descriptors via audit socket. Ignoring.");
1277 close_many(fds
, n_fds
);
1281 static int dispatch_sigusr1(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1282 Server
*s
= userdata
;
1287 log_info("Received request to flush runtime journal from PID " PID_FMT
, si
->ssi_pid
);
1289 server_flush_to_var(s
);
1291 server_vacuum(s
, false, false);
1293 r
= touch("/run/systemd/journal/flushed");
1295 log_warning_errno(r
, "Failed to touch /run/systemd/journal/flushed, ignoring: %m");
1300 static int dispatch_sigusr2(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1301 Server
*s
= userdata
;
1306 log_info("Received request to rotate journal from PID " PID_FMT
, si
->ssi_pid
);
1308 server_vacuum(s
, true, true);
1310 /* Let clients know when the most recent rotation happened. */
1311 r
= write_timestamp_file_atomic("/run/systemd/journal/rotated", now(CLOCK_MONOTONIC
));
1313 log_warning_errno(r
, "Failed to write /run/systemd/journal/rotated, ignoring: %m");
1318 static int dispatch_sigterm(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1319 Server
*s
= userdata
;
1323 log_received_signal(LOG_INFO
, si
);
1325 sd_event_exit(s
->event
, 0);
1329 static int dispatch_sigrtmin1(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1330 Server
*s
= userdata
;
1335 log_debug("Received request to sync from PID " PID_FMT
, si
->ssi_pid
);
1339 /* Let clients know when the most recent sync happened. */
1340 r
= write_timestamp_file_atomic("/run/systemd/journal/synced", now(CLOCK_MONOTONIC
));
1342 log_warning_errno(r
, "Failed to write /run/systemd/journal/synced, ignoring: %m");
1347 static int setup_signals(Server
*s
) {
1352 assert(sigprocmask_many(SIG_SETMASK
, NULL
, SIGINT
, SIGTERM
, SIGUSR1
, SIGUSR2
, SIGRTMIN
+1, -1) >= 0);
1354 r
= sd_event_add_signal(s
->event
, &s
->sigusr1_event_source
, SIGUSR1
, dispatch_sigusr1
, s
);
1358 r
= sd_event_add_signal(s
->event
, &s
->sigusr2_event_source
, SIGUSR2
, dispatch_sigusr2
, s
);
1362 r
= sd_event_add_signal(s
->event
, &s
->sigterm_event_source
, SIGTERM
, dispatch_sigterm
, s
);
1366 /* Let's process SIGTERM late, so that we flush all queued
1367 * messages to disk before we exit */
1368 r
= sd_event_source_set_priority(s
->sigterm_event_source
, SD_EVENT_PRIORITY_NORMAL
+20);
1372 /* When journald is invoked on the terminal (when debugging),
1373 * it's useful if C-c is handled equivalent to SIGTERM. */
1374 r
= sd_event_add_signal(s
->event
, &s
->sigint_event_source
, SIGINT
, dispatch_sigterm
, s
);
1378 r
= sd_event_source_set_priority(s
->sigint_event_source
, SD_EVENT_PRIORITY_NORMAL
+20);
1382 /* SIGRTMIN+1 causes an immediate sync. We process this very
1383 * late, so that everything else queued at this point is
1384 * really written to disk. Clients can watch
1385 * /run/systemd/journal/synced with inotify until its mtime
1386 * changes to see when a sync happened. */
1387 r
= sd_event_add_signal(s
->event
, &s
->sigrtmin1_event_source
, SIGRTMIN
+1, dispatch_sigrtmin1
, s
);
1391 r
= sd_event_source_set_priority(s
->sigrtmin1_event_source
, SD_EVENT_PRIORITY_NORMAL
+15);
1398 static int server_parse_proc_cmdline(Server
*s
) {
1399 _cleanup_free_
char *line
= NULL
;
1403 r
= proc_cmdline(&line
);
1405 log_warning_errno(r
, "Failed to read /proc/cmdline, ignoring: %m");
1411 _cleanup_free_
char *word
= NULL
;
1413 r
= extract_first_word(&p
, &word
, NULL
, 0);
1415 return log_error_errno(r
, "Failed to parse journald syntax \"%s\": %m", line
);
1420 if (startswith(word
, "systemd.journald.forward_to_syslog=")) {
1421 r
= parse_boolean(word
+ 35);
1423 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word
+ 35);
1425 s
->forward_to_syslog
= r
;
1426 } else if (startswith(word
, "systemd.journald.forward_to_kmsg=")) {
1427 r
= parse_boolean(word
+ 33);
1429 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word
+ 33);
1431 s
->forward_to_kmsg
= r
;
1432 } else if (startswith(word
, "systemd.journald.forward_to_console=")) {
1433 r
= parse_boolean(word
+ 36);
1435 log_warning("Failed to parse forward to console switch %s. Ignoring.", word
+ 36);
1437 s
->forward_to_console
= r
;
1438 } else if (startswith(word
, "systemd.journald.forward_to_wall=")) {
1439 r
= parse_boolean(word
+ 33);
1441 log_warning("Failed to parse forward to wall switch %s. Ignoring.", word
+ 33);
1443 s
->forward_to_wall
= r
;
1444 } else if (startswith(word
, "systemd.journald"))
1445 log_warning("Invalid systemd.journald parameter. Ignoring.");
1448 /* do not warn about state here, since probably systemd already did */
1452 static int server_parse_config_file(Server
*s
) {
1455 return config_parse_many(PKGSYSCONFDIR
"/journald.conf",
1456 CONF_PATHS_NULSTR("systemd/journald.conf.d"),
1458 config_item_perf_lookup
, journald_gperf_lookup
,
1462 static int server_dispatch_sync(sd_event_source
*es
, usec_t t
, void *userdata
) {
1463 Server
*s
= userdata
;
1471 int server_schedule_sync(Server
*s
, int priority
) {
1476 if (priority
<= LOG_CRIT
) {
1477 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1482 if (s
->sync_scheduled
)
1485 if (s
->sync_interval_usec
> 0) {
1488 r
= sd_event_now(s
->event
, CLOCK_MONOTONIC
, &when
);
1492 when
+= s
->sync_interval_usec
;
1494 if (!s
->sync_event_source
) {
1495 r
= sd_event_add_time(
1497 &s
->sync_event_source
,
1500 server_dispatch_sync
, s
);
1504 r
= sd_event_source_set_priority(s
->sync_event_source
, SD_EVENT_PRIORITY_IMPORTANT
);
1506 r
= sd_event_source_set_time(s
->sync_event_source
, when
);
1510 r
= sd_event_source_set_enabled(s
->sync_event_source
, SD_EVENT_ONESHOT
);
1515 s
->sync_scheduled
= true;
1521 static int dispatch_hostname_change(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1522 Server
*s
= userdata
;
1526 server_cache_hostname(s
);
1530 static int server_open_hostname(Server
*s
) {
1535 s
->hostname_fd
= open("/proc/sys/kernel/hostname", O_RDONLY
|O_CLOEXEC
|O_NDELAY
|O_NOCTTY
);
1536 if (s
->hostname_fd
< 0)
1537 return log_error_errno(errno
, "Failed to open /proc/sys/kernel/hostname: %m");
1539 r
= sd_event_add_io(s
->event
, &s
->hostname_event_source
, s
->hostname_fd
, 0, dispatch_hostname_change
, s
);
1541 /* kernels prior to 3.2 don't support polling this file. Ignore
1544 log_warning_errno(r
, "Failed to register hostname fd in event loop, ignoring: %m");
1545 s
->hostname_fd
= safe_close(s
->hostname_fd
);
1549 return log_error_errno(r
, "Failed to register hostname fd in event loop: %m");
1552 r
= sd_event_source_set_priority(s
->hostname_event_source
, SD_EVENT_PRIORITY_IMPORTANT
-10);
1554 return log_error_errno(r
, "Failed to adjust priority of host name event source: %m");
1559 static int dispatch_notify_event(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1560 Server
*s
= userdata
;
1564 assert(s
->notify_event_source
== es
);
1565 assert(s
->notify_fd
== fd
);
1567 /* The $NOTIFY_SOCKET is writable again, now send exactly one
1568 * message on it. Either it's the wtachdog event, the initial
1569 * READY=1 event or an stdout stream event. If there's nothing
1570 * to write anymore, turn our event source off. The next time
1571 * there's something to send it will be turned on again. */
1573 if (!s
->sent_notify_ready
) {
1574 static const char p
[] =
1576 "STATUS=Processing requests...";
1579 l
= send(s
->notify_fd
, p
, strlen(p
), MSG_DONTWAIT
);
1581 if (errno
== EAGAIN
)
1584 return log_error_errno(errno
, "Failed to send READY=1 notification message: %m");
1587 s
->sent_notify_ready
= true;
1588 log_debug("Sent READY=1 notification.");
1590 } else if (s
->send_watchdog
) {
1592 static const char p
[] =
1597 l
= send(s
->notify_fd
, p
, strlen(p
), MSG_DONTWAIT
);
1599 if (errno
== EAGAIN
)
1602 return log_error_errno(errno
, "Failed to send WATCHDOG=1 notification message: %m");
1605 s
->send_watchdog
= false;
1606 log_debug("Sent WATCHDOG=1 notification.");
1608 } else if (s
->stdout_streams_notify_queue
)
1609 /* Dispatch one stream notification event */
1610 stdout_stream_send_notify(s
->stdout_streams_notify_queue
);
1612 /* Leave us enabled if there's still more to to do. */
1613 if (s
->send_watchdog
|| s
->stdout_streams_notify_queue
)
1616 /* There was nothing to do anymore, let's turn ourselves off. */
1617 r
= sd_event_source_set_enabled(es
, SD_EVENT_OFF
);
1619 return log_error_errno(r
, "Failed to turn off notify event source: %m");
1624 static int dispatch_watchdog(sd_event_source
*es
, uint64_t usec
, void *userdata
) {
1625 Server
*s
= userdata
;
1630 s
->send_watchdog
= true;
1632 r
= sd_event_source_set_enabled(s
->notify_event_source
, SD_EVENT_ON
);
1634 log_warning_errno(r
, "Failed to turn on notify event source: %m");
1636 r
= sd_event_source_set_time(s
->watchdog_event_source
, usec
+ s
->watchdog_usec
/ 2);
1638 return log_error_errno(r
, "Failed to restart watchdog event source: %m");
1640 r
= sd_event_source_set_enabled(s
->watchdog_event_source
, SD_EVENT_ON
);
1642 return log_error_errno(r
, "Failed to enable watchdog event source: %m");
1647 static int server_connect_notify(Server
*s
) {
1648 union sockaddr_union sa
= {
1649 .un
.sun_family
= AF_UNIX
,
1655 assert(s
->notify_fd
< 0);
1656 assert(!s
->notify_event_source
);
1659 So here's the problem: we'd like to send notification
1660 messages to PID 1, but we cannot do that via sd_notify(),
1661 since that's synchronous, and we might end up blocking on
1662 it. Specifically: given that PID 1 might block on
1663 dbus-daemon during IPC, and dbus-daemon is logging to us,
1664 and might hence block on us, we might end up in a deadlock
1665 if we block on sending PID 1 notification messages -- by
1666 generating a full blocking circle. To avoid this, let's
1667 create a non-blocking socket, and connect it to the
1668 notification socket, and then wait for POLLOUT before we
1669 send anything. This should efficiently avoid any deadlocks,
1670 as we'll never block on PID 1, hence PID 1 can safely block
1671 on dbus-daemon which can safely block on us again.
1673 Don't think that this issue is real? It is, see:
1674 https://github.com/systemd/systemd/issues/1505
1677 e
= getenv("NOTIFY_SOCKET");
1681 if ((e
[0] != '@' && e
[0] != '/') || e
[1] == 0) {
1682 log_error("NOTIFY_SOCKET set to an invalid value: %s", e
);
1686 if (strlen(e
) > sizeof(sa
.un
.sun_path
)) {
1687 log_error("NOTIFY_SOCKET path too long: %s", e
);
1691 s
->notify_fd
= socket(AF_UNIX
, SOCK_DGRAM
|SOCK_CLOEXEC
|SOCK_NONBLOCK
, 0);
1692 if (s
->notify_fd
< 0)
1693 return log_error_errno(errno
, "Failed to create notify socket: %m");
1695 (void) fd_inc_sndbuf(s
->notify_fd
, NOTIFY_SNDBUF_SIZE
);
1697 strncpy(sa
.un
.sun_path
, e
, sizeof(sa
.un
.sun_path
));
1698 if (sa
.un
.sun_path
[0] == '@')
1699 sa
.un
.sun_path
[0] = 0;
1701 r
= connect(s
->notify_fd
, &sa
.sa
, offsetof(struct sockaddr_un
, sun_path
) + strlen(e
));
1703 return log_error_errno(errno
, "Failed to connect to notify socket: %m");
1705 r
= sd_event_add_io(s
->event
, &s
->notify_event_source
, s
->notify_fd
, EPOLLOUT
, dispatch_notify_event
, s
);
1707 return log_error_errno(r
, "Failed to watch notification socket: %m");
1709 if (sd_watchdog_enabled(false, &s
->watchdog_usec
) > 0) {
1710 s
->send_watchdog
= true;
1712 r
= sd_event_add_time(s
->event
, &s
->watchdog_event_source
, CLOCK_MONOTONIC
, now(CLOCK_MONOTONIC
) + s
->watchdog_usec
/2, s
->watchdog_usec
/4, dispatch_watchdog
, s
);
1714 return log_error_errno(r
, "Failed to add watchdog time event: %m");
1717 /* This should fire pretty soon, which we'll use to send the
1723 int server_init(Server
*s
) {
1724 _cleanup_fdset_free_ FDSet
*fds
= NULL
;
1731 s
->syslog_fd
= s
->native_fd
= s
->stdout_fd
= s
->dev_kmsg_fd
= s
->audit_fd
= s
->hostname_fd
= s
->notify_fd
= -1;
1735 s
->watchdog_usec
= USEC_INFINITY
;
1737 s
->sync_interval_usec
= DEFAULT_SYNC_INTERVAL_USEC
;
1738 s
->sync_scheduled
= false;
1740 s
->rate_limit_interval
= DEFAULT_RATE_LIMIT_INTERVAL
;
1741 s
->rate_limit_burst
= DEFAULT_RATE_LIMIT_BURST
;
1743 s
->forward_to_wall
= true;
1745 s
->max_file_usec
= DEFAULT_MAX_FILE_USEC
;
1747 s
->max_level_store
= LOG_DEBUG
;
1748 s
->max_level_syslog
= LOG_DEBUG
;
1749 s
->max_level_kmsg
= LOG_NOTICE
;
1750 s
->max_level_console
= LOG_INFO
;
1751 s
->max_level_wall
= LOG_EMERG
;
1753 journal_reset_metrics(&s
->system_metrics
);
1754 journal_reset_metrics(&s
->runtime_metrics
);
1756 server_parse_config_file(s
);
1757 server_parse_proc_cmdline(s
);
1759 if (!!s
->rate_limit_interval
^ !!s
->rate_limit_burst
) {
1760 log_debug("Setting both rate limit interval and burst from "USEC_FMT
",%u to 0,0",
1761 s
->rate_limit_interval
, s
->rate_limit_burst
);
1762 s
->rate_limit_interval
= s
->rate_limit_burst
= 0;
1765 (void) mkdir_p("/run/systemd/journal", 0755);
1767 s
->user_journals
= ordered_hashmap_new(NULL
);
1768 if (!s
->user_journals
)
1771 s
->mmap
= mmap_cache_new();
1775 s
->deferred_closes
= set_new(NULL
);
1776 if (!s
->deferred_closes
)
1779 r
= sd_event_default(&s
->event
);
1781 return log_error_errno(r
, "Failed to create event loop: %m");
1783 n
= sd_listen_fds(true);
1785 return log_error_errno(n
, "Failed to read listening file descriptors from environment: %m");
1787 for (fd
= SD_LISTEN_FDS_START
; fd
< SD_LISTEN_FDS_START
+ n
; fd
++) {
1789 if (sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/run/systemd/journal/socket", 0) > 0) {
1791 if (s
->native_fd
>= 0) {
1792 log_error("Too many native sockets passed.");
1798 } else if (sd_is_socket_unix(fd
, SOCK_STREAM
, 1, "/run/systemd/journal/stdout", 0) > 0) {
1800 if (s
->stdout_fd
>= 0) {
1801 log_error("Too many stdout sockets passed.");
1807 } else if (sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/dev/log", 0) > 0 ||
1808 sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/run/systemd/journal/dev-log", 0) > 0) {
1810 if (s
->syslog_fd
>= 0) {
1811 log_error("Too many /dev/log sockets passed.");
1817 } else if (sd_is_socket(fd
, AF_NETLINK
, SOCK_RAW
, -1) > 0) {
1819 if (s
->audit_fd
>= 0) {
1820 log_error("Too many audit sockets passed.");
1834 r
= fdset_put(fds
, fd
);
1840 /* Try to restore streams, but don't bother if this fails */
1841 (void) server_restore_streams(s
, fds
);
1843 if (fdset_size(fds
) > 0) {
1844 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds
));
1845 fds
= fdset_free(fds
);
1848 no_sockets
= s
->native_fd
< 0 && s
->stdout_fd
< 0 && s
->syslog_fd
< 0 && s
->audit_fd
< 0;
1850 /* always open stdout, syslog, native, and kmsg sockets */
1852 /* systemd-journald.socket: /run/systemd/journal/stdout */
1853 r
= server_open_stdout_socket(s
);
1857 /* systemd-journald-dev-log.socket: /run/systemd/journal/dev-log */
1858 r
= server_open_syslog_socket(s
);
1862 /* systemd-journald.socket: /run/systemd/journal/socket */
1863 r
= server_open_native_socket(s
);
1868 r
= server_open_dev_kmsg(s
);
1872 /* Unless we got *some* sockets and not audit, open audit socket */
1873 if (s
->audit_fd
>= 0 || no_sockets
) {
1874 r
= server_open_audit(s
);
1879 r
= server_open_kernel_seqnum(s
);
1883 r
= server_open_hostname(s
);
1887 r
= setup_signals(s
);
1891 s
->udev
= udev_new();
1895 s
->rate_limit
= journal_rate_limit_new(s
->rate_limit_interval
, s
->rate_limit_burst
);
1899 r
= cg_get_root_path(&s
->cgroup_root
);
1903 server_cache_hostname(s
);
1904 server_cache_boot_id(s
);
1905 server_cache_machine_id(s
);
1907 (void) server_connect_notify(s
);
1909 return system_journal_open(s
, false);
1912 void server_maybe_append_tags(Server
*s
) {
1918 n
= now(CLOCK_REALTIME
);
1920 if (s
->system_journal
)
1921 journal_file_maybe_append_tag(s
->system_journal
, n
);
1923 ORDERED_HASHMAP_FOREACH(f
, s
->user_journals
, i
)
1924 journal_file_maybe_append_tag(f
, n
);
1928 void server_done(Server
*s
) {
1932 if (s
->deferred_closes
) {
1933 journal_file_close_set(s
->deferred_closes
);
1934 set_free(s
->deferred_closes
);
1937 while (s
->stdout_streams
)
1938 stdout_stream_free(s
->stdout_streams
);
1940 if (s
->system_journal
)
1941 (void) journal_file_close(s
->system_journal
);
1943 if (s
->runtime_journal
)
1944 (void) journal_file_close(s
->runtime_journal
);
1946 while ((f
= ordered_hashmap_steal_first(s
->user_journals
)))
1947 (void) journal_file_close(f
);
1949 ordered_hashmap_free(s
->user_journals
);
1951 sd_event_source_unref(s
->syslog_event_source
);
1952 sd_event_source_unref(s
->native_event_source
);
1953 sd_event_source_unref(s
->stdout_event_source
);
1954 sd_event_source_unref(s
->dev_kmsg_event_source
);
1955 sd_event_source_unref(s
->audit_event_source
);
1956 sd_event_source_unref(s
->sync_event_source
);
1957 sd_event_source_unref(s
->sigusr1_event_source
);
1958 sd_event_source_unref(s
->sigusr2_event_source
);
1959 sd_event_source_unref(s
->sigterm_event_source
);
1960 sd_event_source_unref(s
->sigint_event_source
);
1961 sd_event_source_unref(s
->sigrtmin1_event_source
);
1962 sd_event_source_unref(s
->hostname_event_source
);
1963 sd_event_source_unref(s
->notify_event_source
);
1964 sd_event_source_unref(s
->watchdog_event_source
);
1965 sd_event_unref(s
->event
);
1967 safe_close(s
->syslog_fd
);
1968 safe_close(s
->native_fd
);
1969 safe_close(s
->stdout_fd
);
1970 safe_close(s
->dev_kmsg_fd
);
1971 safe_close(s
->audit_fd
);
1972 safe_close(s
->hostname_fd
);
1973 safe_close(s
->notify_fd
);
1976 journal_rate_limit_free(s
->rate_limit
);
1978 if (s
->kernel_seqnum
)
1979 munmap(s
->kernel_seqnum
, sizeof(uint64_t));
1983 free(s
->cgroup_root
);
1984 free(s
->hostname_field
);
1987 mmap_cache_unref(s
->mmap
);
1989 udev_unref(s
->udev
);
1992 static const char* const storage_table
[_STORAGE_MAX
] = {
1993 [STORAGE_AUTO
] = "auto",
1994 [STORAGE_VOLATILE
] = "volatile",
1995 [STORAGE_PERSISTENT
] = "persistent",
1996 [STORAGE_NONE
] = "none"
1999 DEFINE_STRING_TABLE_LOOKUP(storage
, Storage
);
2000 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage
, storage
, Storage
, "Failed to parse storage setting");
2002 static const char* const split_mode_table
[_SPLIT_MAX
] = {
2003 [SPLIT_LOGIN
] = "login",
2004 [SPLIT_UID
] = "uid",
2005 [SPLIT_NONE
] = "none",
2008 DEFINE_STRING_TABLE_LOOKUP(split_mode
, SplitMode
);
2009 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode
, split_mode
, SplitMode
, "Failed to parse split mode setting");