1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2011 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
23 #include <selinux/selinux.h>
25 #include <sys/ioctl.h>
27 #include <sys/signalfd.h>
28 #include <sys/statvfs.h>
29 #include <linux/sockios.h>
32 #include "sd-daemon.h"
33 #include "sd-journal.h"
34 #include "sd-messages.h"
37 #include "alloc-util.h"
38 #include "audit-util.h"
39 #include "cgroup-util.h"
40 #include "conf-parser.h"
41 #include "dirent-util.h"
42 #include "extract-word.h"
45 #include "formats-util.h"
48 #include "hostname-util.h"
50 #include "journal-authenticate.h"
51 #include "journal-file.h"
52 #include "journal-internal.h"
53 #include "journal-vacuum.h"
54 #include "journald-audit.h"
55 #include "journald-kmsg.h"
56 #include "journald-native.h"
57 #include "journald-rate-limit.h"
58 #include "journald-server.h"
59 #include "journald-stream.h"
60 #include "journald-syslog.h"
63 #include "parse-util.h"
64 #include "proc-cmdline.h"
65 #include "process-util.h"
67 #include "selinux-util.h"
68 #include "signal-util.h"
69 #include "socket-util.h"
70 #include "stdio-util.h"
71 #include "string-table.h"
72 #include "string-util.h"
73 #include "user-util.h"
76 #define USER_JOURNALS_MAX 1024
78 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
79 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
80 #define DEFAULT_RATE_LIMIT_BURST 1000
81 #define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
83 #define RECHECK_SPACE_USEC (30*USEC_PER_SEC)
85 #define NOTIFY_SNDBUF_SIZE (8*1024*1024)
87 /* The period to insert between posting changes for coalescing */
88 #define POST_CHANGE_TIMER_INTERVAL_USEC (250*USEC_PER_MSEC)
90 static int determine_space_for(
92 JournalMetrics
*metrics
,
100 uint64_t sum
= 0, ss_avail
, avail
;
101 _cleanup_closedir_
DIR *d
= NULL
;
112 ts
= now(CLOCK_MONOTONIC
);
114 if (!verbose
&& s
->cached_space_timestamp
+ RECHECK_SPACE_USEC
> ts
) {
117 *available
= s
->cached_space_available
;
119 *limit
= s
->cached_space_limit
;
124 p
= strjoina(path
, SERVER_MACHINE_ID(s
));
127 return log_full_errno(errno
== ENOENT
? LOG_DEBUG
: LOG_ERR
, errno
, "Failed to open %s: %m", p
);
129 if (fstatvfs(dirfd(d
), &ss
) < 0)
130 return log_error_errno(errno
, "Failed to fstatvfs(%s): %m", p
);
132 FOREACH_DIRENT_ALL(de
, d
, break) {
135 if (!endswith(de
->d_name
, ".journal") &&
136 !endswith(de
->d_name
, ".journal~"))
139 if (fstatat(dirfd(d
), de
->d_name
, &st
, AT_SYMLINK_NOFOLLOW
) < 0) {
140 log_debug_errno(errno
, "Failed to stat %s/%s, ignoring: %m", p
, de
->d_name
);
144 if (!S_ISREG(st
.st_mode
))
147 sum
+= (uint64_t) st
.st_blocks
* 512UL;
150 /* If requested, then let's bump the min_use limit to the
151 * current usage on disk. We do this when starting up and
152 * first opening the journal files. This way sudden spikes in
153 * disk usage will not cause journald to vacuum files without
154 * bounds. Note that this means that only a restart of
155 * journald will make it reset this value. */
158 metrics
->min_use
= MAX(metrics
->min_use
, sum
);
160 ss_avail
= ss
.f_bsize
* ss
.f_bavail
;
161 avail
= LESS_BY(ss_avail
, metrics
->keep_free
);
163 s
->cached_space_limit
= MIN(MAX(sum
+ avail
, metrics
->min_use
), metrics
->max_use
);
164 s
->cached_space_available
= LESS_BY(s
->cached_space_limit
, sum
);
165 s
->cached_space_timestamp
= ts
;
168 char fb1
[FORMAT_BYTES_MAX
], fb2
[FORMAT_BYTES_MAX
], fb3
[FORMAT_BYTES_MAX
],
169 fb4
[FORMAT_BYTES_MAX
], fb5
[FORMAT_BYTES_MAX
], fb6
[FORMAT_BYTES_MAX
];
170 format_bytes(fb1
, sizeof(fb1
), sum
);
171 format_bytes(fb2
, sizeof(fb2
), metrics
->max_use
);
172 format_bytes(fb3
, sizeof(fb3
), metrics
->keep_free
);
173 format_bytes(fb4
, sizeof(fb4
), ss_avail
);
174 format_bytes(fb5
, sizeof(fb5
), s
->cached_space_limit
);
175 format_bytes(fb6
, sizeof(fb6
), s
->cached_space_available
);
177 server_driver_message(s
, SD_MESSAGE_JOURNAL_USAGE
,
178 LOG_MESSAGE("%s (%s) is %s, max %s, %s free.",
179 name
, path
, fb1
, fb5
, fb6
),
180 "JOURNAL_NAME=%s", name
,
181 "JOURNAL_PATH=%s", path
,
182 "CURRENT_USE=%"PRIu64
, sum
,
183 "CURRENT_USE_PRETTY=%s", fb1
,
184 "MAX_USE=%"PRIu64
, metrics
->max_use
,
185 "MAX_USE_PRETTY=%s", fb2
,
186 "DISK_KEEP_FREE=%"PRIu64
, metrics
->keep_free
,
187 "DISK_KEEP_FREE_PRETTY=%s", fb3
,
188 "DISK_AVAILABLE=%"PRIu64
, ss_avail
,
189 "DISK_AVAILABLE_PRETTY=%s", fb4
,
190 "LIMIT=%"PRIu64
, s
->cached_space_limit
,
191 "LIMIT_PRETTY=%s", fb5
,
192 "AVAILABLE=%"PRIu64
, s
->cached_space_available
,
193 "AVAILABLE_PRETTY=%s", fb6
,
198 *available
= s
->cached_space_available
;
200 *limit
= s
->cached_space_limit
;
205 static int determine_space(Server
*s
, bool verbose
, bool patch_min_use
, uint64_t *available
, uint64_t *limit
) {
206 JournalMetrics
*metrics
;
207 const char *path
, *name
;
211 if (s
->system_journal
) {
212 path
= "/var/log/journal/";
213 metrics
= &s
->system_metrics
;
214 name
= "System journal";
216 path
= "/run/log/journal/";
217 metrics
= &s
->runtime_metrics
;
218 name
= "Runtime journal";
221 return determine_space_for(s
, metrics
, path
, name
, verbose
, patch_min_use
, available
, limit
);
224 static void server_add_acls(JournalFile
*f
, uid_t uid
) {
231 if (uid
<= SYSTEM_UID_MAX
)
234 r
= add_acls_for_user(f
->fd
, uid
);
236 log_warning_errno(r
, "Failed to set ACL on %s, ignoring: %m", f
->path
);
240 static int open_journal(
246 JournalMetrics
*metrics
,
247 JournalFile
*template,
257 r
= journal_file_open_reliably(fname
, flags
, 0640, s
->compress
, seal
, metrics
, s
->mmap
, template, &f
);
259 r
= journal_file_open(fname
, flags
, 0640, s
->compress
, seal
, metrics
, s
->mmap
, template, &f
);
263 r
= journal_file_enable_post_change_timer(f
, s
->event
, POST_CHANGE_TIMER_INTERVAL_USEC
);
265 journal_file_close(f
);
273 static JournalFile
* find_journal(Server
*s
, uid_t uid
) {
274 _cleanup_free_
char *p
= NULL
;
281 /* We split up user logs only on /var, not on /run. If the
282 * runtime file is open, we write to it exclusively, in order
283 * to guarantee proper order as soon as we flush /run to
284 * /var and close the runtime file. */
286 if (s
->runtime_journal
)
287 return s
->runtime_journal
;
289 if (uid
<= SYSTEM_UID_MAX
)
290 return s
->system_journal
;
292 r
= sd_id128_get_machine(&machine
);
294 return s
->system_journal
;
296 f
= ordered_hashmap_get(s
->user_journals
, UID_TO_PTR(uid
));
300 if (asprintf(&p
, "/var/log/journal/" SD_ID128_FORMAT_STR
"/user-"UID_FMT
".journal",
301 SD_ID128_FORMAT_VAL(machine
), uid
) < 0)
302 return s
->system_journal
;
304 while (ordered_hashmap_size(s
->user_journals
) >= USER_JOURNALS_MAX
) {
305 /* Too many open? Then let's close one */
306 f
= ordered_hashmap_steal_first(s
->user_journals
);
308 journal_file_close(f
);
311 r
= open_journal(s
, true, p
, O_RDWR
|O_CREAT
, s
->seal
, &s
->system_metrics
, NULL
, &f
);
313 return s
->system_journal
;
315 server_add_acls(f
, uid
);
317 r
= ordered_hashmap_put(s
->user_journals
, UID_TO_PTR(uid
), f
);
319 journal_file_close(f
);
320 return s
->system_journal
;
326 static int do_rotate(
339 r
= journal_file_rotate(f
, s
->compress
, seal
);
342 log_error_errno(r
, "Failed to rotate %s: %m", (*f
)->path
);
344 log_error_errno(r
, "Failed to create new %s journal: %m", name
);
346 server_add_acls(*f
, uid
);
351 void server_rotate(Server
*s
) {
357 log_debug("Rotating...");
359 (void) do_rotate(s
, &s
->runtime_journal
, "runtime", false, 0);
360 (void) do_rotate(s
, &s
->system_journal
, "system", s
->seal
, 0);
362 ORDERED_HASHMAP_FOREACH_KEY(f
, k
, s
->user_journals
, i
) {
363 r
= do_rotate(s
, &f
, "user", s
->seal
, PTR_TO_UID(k
));
365 ordered_hashmap_replace(s
->user_journals
, k
, f
);
367 /* Old file has been closed and deallocated */
368 ordered_hashmap_remove(s
->user_journals
, k
);
372 void server_sync(Server
*s
) {
377 if (s
->system_journal
) {
378 r
= journal_file_set_offline(s
->system_journal
);
380 log_warning_errno(r
, "Failed to sync system journal, ignoring: %m");
383 ORDERED_HASHMAP_FOREACH(f
, s
->user_journals
, i
) {
384 r
= journal_file_set_offline(f
);
386 log_warning_errno(r
, "Failed to sync user journal, ignoring: %m");
389 if (s
->sync_event_source
) {
390 r
= sd_event_source_set_enabled(s
->sync_event_source
, SD_EVENT_OFF
);
392 log_error_errno(r
, "Failed to disable sync timer source: %m");
395 s
->sync_scheduled
= false;
398 static void do_vacuum(
401 JournalMetrics
*metrics
,
405 bool patch_min_use
) {
419 p
= strjoina(path
, SERVER_MACHINE_ID(s
));
421 limit
= metrics
->max_use
;
422 (void) determine_space_for(s
, metrics
, path
, name
, verbose
, patch_min_use
, NULL
, &limit
);
424 r
= journal_directory_vacuum(p
, limit
, metrics
->n_max_files
, s
->max_retention_usec
, &s
->oldest_file_usec
, verbose
);
425 if (r
< 0 && r
!= -ENOENT
)
426 log_warning_errno(r
, "Failed to vacuum %s, ignoring: %m", p
);
429 int server_vacuum(Server
*s
, bool verbose
, bool patch_min_use
) {
432 log_debug("Vacuuming...");
434 s
->oldest_file_usec
= 0;
436 do_vacuum(s
, s
->system_journal
, &s
->system_metrics
, "/var/log/journal/", "System journal", verbose
, patch_min_use
);
437 do_vacuum(s
, s
->runtime_journal
, &s
->runtime_metrics
, "/run/log/journal/", "Runtime journal", verbose
, patch_min_use
);
439 s
->cached_space_limit
= 0;
440 s
->cached_space_available
= 0;
441 s
->cached_space_timestamp
= 0;
446 static void server_cache_machine_id(Server
*s
) {
452 r
= sd_id128_get_machine(&id
);
456 sd_id128_to_string(id
, stpcpy(s
->machine_id_field
, "_MACHINE_ID="));
459 static void server_cache_boot_id(Server
*s
) {
465 r
= sd_id128_get_boot(&id
);
469 sd_id128_to_string(id
, stpcpy(s
->boot_id_field
, "_BOOT_ID="));
472 static void server_cache_hostname(Server
*s
) {
473 _cleanup_free_
char *t
= NULL
;
478 t
= gethostname_malloc();
482 x
= strappend("_HOSTNAME=", t
);
486 free(s
->hostname_field
);
487 s
->hostname_field
= x
;
490 static bool shall_try_append_again(JournalFile
*f
, int r
) {
492 /* -E2BIG Hit configured limit
494 -EDQUOT Quota limit hit
496 -EIO I/O error of some kind (mmap)
497 -EHOSTDOWN Other machine
498 -EBUSY Unclean shutdown
499 -EPROTONOSUPPORT Unsupported feature
502 -ESHUTDOWN Already archived
503 -EIDRM Journal file has been deleted */
505 if (r
== -E2BIG
|| r
== -EFBIG
|| r
== -EDQUOT
|| r
== -ENOSPC
)
506 log_debug("%s: Allocation limit reached, rotating.", f
->path
);
507 else if (r
== -EHOSTDOWN
)
508 log_info("%s: Journal file from other machine, rotating.", f
->path
);
509 else if (r
== -EBUSY
)
510 log_info("%s: Unclean shutdown, rotating.", f
->path
);
511 else if (r
== -EPROTONOSUPPORT
)
512 log_info("%s: Unsupported feature, rotating.", f
->path
);
513 else if (r
== -EBADMSG
|| r
== -ENODATA
|| r
== ESHUTDOWN
)
514 log_warning("%s: Journal file corrupted, rotating.", f
->path
);
516 log_warning("%s: IO error, rotating.", f
->path
);
517 else if (r
== -EIDRM
)
518 log_warning("%s: Journal file has been deleted, rotating.", f
->path
);
525 static void write_to_journal(Server
*s
, uid_t uid
, struct iovec
*iovec
, unsigned n
, int priority
) {
527 bool vacuumed
= false;
534 f
= find_journal(s
, uid
);
538 if (journal_file_rotate_suggested(f
, s
->max_file_usec
)) {
539 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f
->path
);
541 server_vacuum(s
, false, false);
544 f
= find_journal(s
, uid
);
549 r
= journal_file_append_entry(f
, NULL
, iovec
, n
, &s
->seqnum
, NULL
, NULL
);
551 server_schedule_sync(s
, priority
);
555 if (vacuumed
|| !shall_try_append_again(f
, r
)) {
556 log_error_errno(r
, "Failed to write entry (%d items, %zu bytes), ignoring: %m", n
, IOVEC_TOTAL_SIZE(iovec
, n
));
561 server_vacuum(s
, false, false);
563 f
= find_journal(s
, uid
);
567 log_debug("Retrying write.");
568 r
= journal_file_append_entry(f
, NULL
, iovec
, n
, &s
->seqnum
, NULL
, NULL
);
570 log_error_errno(r
, "Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %m", n
, IOVEC_TOTAL_SIZE(iovec
, n
));
572 server_schedule_sync(s
, priority
);
575 static void dispatch_message_real(
577 struct iovec
*iovec
, unsigned n
, unsigned m
,
578 const struct ucred
*ucred
,
579 const struct timeval
*tv
,
580 const char *label
, size_t label_len
,
585 char pid
[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t
)],
586 uid
[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t
)],
587 gid
[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t
)],
588 owner_uid
[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t
)],
589 source_time
[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t
)],
590 o_uid
[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t
)],
591 o_gid
[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t
)],
592 o_owner_uid
[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t
)];
598 uid_t realuid
= 0, owner
= 0, journal_uid
;
599 bool owner_valid
= false;
601 char audit_session
[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
602 audit_loginuid
[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t
)],
603 o_audit_session
[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
604 o_audit_loginuid
[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t
)];
613 assert(n
+ N_IOVEC_META_FIELDS
+ (object_pid
? N_IOVEC_OBJECT_FIELDS
: 0) <= m
);
616 realuid
= ucred
->uid
;
618 sprintf(pid
, "_PID="PID_FMT
, ucred
->pid
);
619 IOVEC_SET_STRING(iovec
[n
++], pid
);
621 sprintf(uid
, "_UID="UID_FMT
, ucred
->uid
);
622 IOVEC_SET_STRING(iovec
[n
++], uid
);
624 sprintf(gid
, "_GID="GID_FMT
, ucred
->gid
);
625 IOVEC_SET_STRING(iovec
[n
++], gid
);
627 r
= get_process_comm(ucred
->pid
, &t
);
629 x
= strjoina("_COMM=", t
);
631 IOVEC_SET_STRING(iovec
[n
++], x
);
634 r
= get_process_exe(ucred
->pid
, &t
);
636 x
= strjoina("_EXE=", t
);
638 IOVEC_SET_STRING(iovec
[n
++], x
);
641 r
= get_process_cmdline(ucred
->pid
, 0, false, &t
);
643 x
= strjoina("_CMDLINE=", t
);
645 IOVEC_SET_STRING(iovec
[n
++], x
);
648 r
= get_process_capeff(ucred
->pid
, &t
);
650 x
= strjoina("_CAP_EFFECTIVE=", t
);
652 IOVEC_SET_STRING(iovec
[n
++], x
);
656 r
= audit_session_from_pid(ucred
->pid
, &audit
);
658 sprintf(audit_session
, "_AUDIT_SESSION=%"PRIu32
, audit
);
659 IOVEC_SET_STRING(iovec
[n
++], audit_session
);
662 r
= audit_loginuid_from_pid(ucred
->pid
, &loginuid
);
664 sprintf(audit_loginuid
, "_AUDIT_LOGINUID="UID_FMT
, loginuid
);
665 IOVEC_SET_STRING(iovec
[n
++], audit_loginuid
);
669 r
= cg_pid_get_path_shifted(ucred
->pid
, s
->cgroup_root
, &c
);
671 char *session
= NULL
;
673 x
= strjoina("_SYSTEMD_CGROUP=", c
);
674 IOVEC_SET_STRING(iovec
[n
++], x
);
676 r
= cg_path_get_session(c
, &t
);
678 session
= strjoina("_SYSTEMD_SESSION=", t
);
680 IOVEC_SET_STRING(iovec
[n
++], session
);
683 if (cg_path_get_owner_uid(c
, &owner
) >= 0) {
686 sprintf(owner_uid
, "_SYSTEMD_OWNER_UID="UID_FMT
, owner
);
687 IOVEC_SET_STRING(iovec
[n
++], owner_uid
);
690 if (cg_path_get_unit(c
, &t
) >= 0) {
691 x
= strjoina("_SYSTEMD_UNIT=", t
);
693 IOVEC_SET_STRING(iovec
[n
++], x
);
694 } else if (unit_id
&& !session
) {
695 x
= strjoina("_SYSTEMD_UNIT=", unit_id
);
696 IOVEC_SET_STRING(iovec
[n
++], x
);
699 if (cg_path_get_user_unit(c
, &t
) >= 0) {
700 x
= strjoina("_SYSTEMD_USER_UNIT=", t
);
702 IOVEC_SET_STRING(iovec
[n
++], x
);
703 } else if (unit_id
&& session
) {
704 x
= strjoina("_SYSTEMD_USER_UNIT=", unit_id
);
705 IOVEC_SET_STRING(iovec
[n
++], x
);
708 if (cg_path_get_slice(c
, &t
) >= 0) {
709 x
= strjoina("_SYSTEMD_SLICE=", t
);
711 IOVEC_SET_STRING(iovec
[n
++], x
);
715 } else if (unit_id
) {
716 x
= strjoina("_SYSTEMD_UNIT=", unit_id
);
717 IOVEC_SET_STRING(iovec
[n
++], x
);
721 if (mac_selinux_have()) {
723 x
= alloca(strlen("_SELINUX_CONTEXT=") + label_len
+ 1);
725 *((char*) mempcpy(stpcpy(x
, "_SELINUX_CONTEXT="), label
, label_len
)) = 0;
726 IOVEC_SET_STRING(iovec
[n
++], x
);
728 security_context_t con
;
730 if (getpidcon(ucred
->pid
, &con
) >= 0) {
731 x
= strjoina("_SELINUX_CONTEXT=", con
);
734 IOVEC_SET_STRING(iovec
[n
++], x
);
743 r
= get_process_uid(object_pid
, &object_uid
);
745 sprintf(o_uid
, "OBJECT_UID="UID_FMT
, object_uid
);
746 IOVEC_SET_STRING(iovec
[n
++], o_uid
);
749 r
= get_process_gid(object_pid
, &object_gid
);
751 sprintf(o_gid
, "OBJECT_GID="GID_FMT
, object_gid
);
752 IOVEC_SET_STRING(iovec
[n
++], o_gid
);
755 r
= get_process_comm(object_pid
, &t
);
757 x
= strjoina("OBJECT_COMM=", t
);
759 IOVEC_SET_STRING(iovec
[n
++], x
);
762 r
= get_process_exe(object_pid
, &t
);
764 x
= strjoina("OBJECT_EXE=", t
);
766 IOVEC_SET_STRING(iovec
[n
++], x
);
769 r
= get_process_cmdline(object_pid
, 0, false, &t
);
771 x
= strjoina("OBJECT_CMDLINE=", t
);
773 IOVEC_SET_STRING(iovec
[n
++], x
);
777 r
= audit_session_from_pid(object_pid
, &audit
);
779 sprintf(o_audit_session
, "OBJECT_AUDIT_SESSION=%"PRIu32
, audit
);
780 IOVEC_SET_STRING(iovec
[n
++], o_audit_session
);
783 r
= audit_loginuid_from_pid(object_pid
, &loginuid
);
785 sprintf(o_audit_loginuid
, "OBJECT_AUDIT_LOGINUID="UID_FMT
, loginuid
);
786 IOVEC_SET_STRING(iovec
[n
++], o_audit_loginuid
);
790 r
= cg_pid_get_path_shifted(object_pid
, s
->cgroup_root
, &c
);
792 x
= strjoina("OBJECT_SYSTEMD_CGROUP=", c
);
793 IOVEC_SET_STRING(iovec
[n
++], x
);
795 r
= cg_path_get_session(c
, &t
);
797 x
= strjoina("OBJECT_SYSTEMD_SESSION=", t
);
799 IOVEC_SET_STRING(iovec
[n
++], x
);
802 if (cg_path_get_owner_uid(c
, &owner
) >= 0) {
803 sprintf(o_owner_uid
, "OBJECT_SYSTEMD_OWNER_UID="UID_FMT
, owner
);
804 IOVEC_SET_STRING(iovec
[n
++], o_owner_uid
);
807 if (cg_path_get_unit(c
, &t
) >= 0) {
808 x
= strjoina("OBJECT_SYSTEMD_UNIT=", t
);
810 IOVEC_SET_STRING(iovec
[n
++], x
);
813 if (cg_path_get_user_unit(c
, &t
) >= 0) {
814 x
= strjoina("OBJECT_SYSTEMD_USER_UNIT=", t
);
816 IOVEC_SET_STRING(iovec
[n
++], x
);
825 sprintf(source_time
, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv
));
826 IOVEC_SET_STRING(iovec
[n
++], source_time
);
829 /* Note that strictly speaking storing the boot id here is
830 * redundant since the entry includes this in-line
831 * anyway. However, we need this indexed, too. */
832 if (!isempty(s
->boot_id_field
))
833 IOVEC_SET_STRING(iovec
[n
++], s
->boot_id_field
);
835 if (!isempty(s
->machine_id_field
))
836 IOVEC_SET_STRING(iovec
[n
++], s
->machine_id_field
);
838 if (!isempty(s
->hostname_field
))
839 IOVEC_SET_STRING(iovec
[n
++], s
->hostname_field
);
843 if (s
->split_mode
== SPLIT_UID
&& realuid
> 0)
844 /* Split up strictly by any UID */
845 journal_uid
= realuid
;
846 else if (s
->split_mode
== SPLIT_LOGIN
&& realuid
> 0 && owner_valid
&& owner
> 0)
847 /* Split up by login UIDs. We do this only if the
848 * realuid is not root, in order not to accidentally
849 * leak privileged information to the user that is
850 * logged by a privileged process that is part of an
851 * unprivileged session. */
856 write_to_journal(s
, journal_uid
, iovec
, n
, priority
);
859 void server_driver_message(Server
*s
, sd_id128_t message_id
, const char *format
, ...) {
860 char mid
[11 + 32 + 1];
861 struct iovec iovec
[N_IOVEC_META_FIELDS
+ 5 + N_IOVEC_PAYLOAD_FIELDS
];
865 struct ucred ucred
= {};
870 assert_cc(3 == LOG_FAC(LOG_DAEMON
));
871 IOVEC_SET_STRING(iovec
[n
++], "SYSLOG_FACILITY=3");
872 IOVEC_SET_STRING(iovec
[n
++], "SYSLOG_IDENTIFIER=systemd-journald");
874 IOVEC_SET_STRING(iovec
[n
++], "_TRANSPORT=driver");
875 assert_cc(6 == LOG_INFO
);
876 IOVEC_SET_STRING(iovec
[n
++], "PRIORITY=6");
878 if (!sd_id128_equal(message_id
, SD_ID128_NULL
)) {
879 snprintf(mid
, sizeof(mid
), LOG_MESSAGE_ID(message_id
));
880 IOVEC_SET_STRING(iovec
[n
++], mid
);
885 va_start(ap
, format
);
886 r
= log_format_iovec(iovec
, ELEMENTSOF(iovec
), &n
, false, 0, format
, ap
);
887 /* Error handling below */
890 ucred
.pid
= getpid();
891 ucred
.uid
= getuid();
892 ucred
.gid
= getgid();
895 dispatch_message_real(s
, iovec
, n
, ELEMENTSOF(iovec
), &ucred
, NULL
, NULL
, 0, NULL
, LOG_INFO
, 0);
898 free(iovec
[m
++].iov_base
);
901 /* We failed to format the message. Emit a warning instead. */
904 xsprintf(buf
, "MESSAGE=Entry printing failed: %s", strerror(-r
));
907 IOVEC_SET_STRING(iovec
[n
++], "PRIORITY=4");
908 IOVEC_SET_STRING(iovec
[n
++], buf
);
909 dispatch_message_real(s
, iovec
, n
, ELEMENTSOF(iovec
), &ucred
, NULL
, NULL
, 0, NULL
, LOG_INFO
, 0);
913 void server_dispatch_message(
915 struct iovec
*iovec
, unsigned n
, unsigned m
,
916 const struct ucred
*ucred
,
917 const struct timeval
*tv
,
918 const char *label
, size_t label_len
,
924 _cleanup_free_
char *path
= NULL
;
925 uint64_t available
= 0;
929 assert(iovec
|| n
== 0);
934 if (LOG_PRI(priority
) > s
->max_level_store
)
937 /* Stop early in case the information will not be stored
939 if (s
->storage
== STORAGE_NONE
)
945 r
= cg_pid_get_path_shifted(ucred
->pid
, s
->cgroup_root
, &path
);
949 /* example: /user/lennart/3/foobar
950 * /system/dbus.service/foobar
952 * So let's cut of everything past the third /, since that is
953 * where user directories start */
955 c
= strchr(path
, '/');
957 c
= strchr(c
+1, '/');
959 c
= strchr(c
+1, '/');
965 (void) determine_space(s
, false, false, &available
, NULL
);
966 rl
= journal_rate_limit_test(s
->rate_limit
, path
, priority
& LOG_PRIMASK
, available
);
970 /* Write a suppression message if we suppressed something */
972 server_driver_message(s
, SD_MESSAGE_JOURNAL_DROPPED
,
973 LOG_MESSAGE("Suppressed %u messages from %s", rl
- 1, path
),
977 dispatch_message_real(s
, iovec
, n
, m
, ucred
, tv
, label
, label_len
, unit_id
, priority
, object_pid
);
981 static int system_journal_open(Server
*s
, bool flush_requested
) {
985 if (!s
->system_journal
&&
986 (s
->storage
== STORAGE_PERSISTENT
|| s
->storage
== STORAGE_AUTO
) &&
988 || access("/run/systemd/journal/flushed", F_OK
) >= 0)) {
990 /* If in auto mode: first try to create the machine
991 * path, but not the prefix.
993 * If in persistent mode: create /var/log/journal and
994 * the machine path */
996 if (s
->storage
== STORAGE_PERSISTENT
)
997 (void) mkdir_p("/var/log/journal/", 0755);
999 fn
= strjoina("/var/log/journal/", SERVER_MACHINE_ID(s
));
1000 (void) mkdir(fn
, 0755);
1002 fn
= strjoina(fn
, "/system.journal");
1003 r
= open_journal(s
, true, fn
, O_RDWR
|O_CREAT
, s
->seal
, &s
->system_metrics
, NULL
, &s
->system_journal
);
1005 server_add_acls(s
->system_journal
, 0);
1006 (void) determine_space_for(s
, &s
->system_metrics
, "/var/log/journal/", "System journal", true, true, NULL
, NULL
);
1008 if (r
!= -ENOENT
&& r
!= -EROFS
)
1009 log_warning_errno(r
, "Failed to open system journal: %m");
1015 if (!s
->runtime_journal
&&
1016 (s
->storage
!= STORAGE_NONE
)) {
1018 fn
= strjoina("/run/log/journal/", SERVER_MACHINE_ID(s
), "/system.journal");
1020 if (s
->system_journal
) {
1022 /* Try to open the runtime journal, but only
1023 * if it already exists, so that we can flush
1024 * it into the system journal */
1026 r
= open_journal(s
, false, fn
, O_RDWR
, false, &s
->runtime_metrics
, NULL
, &s
->runtime_journal
);
1029 log_warning_errno(r
, "Failed to open runtime journal: %m");
1036 /* OK, we really need the runtime journal, so create
1037 * it if necessary. */
1039 (void) mkdir("/run/log", 0755);
1040 (void) mkdir("/run/log/journal", 0755);
1041 (void) mkdir_parents(fn
, 0750);
1043 r
= open_journal(s
, true, fn
, O_RDWR
|O_CREAT
, false, &s
->runtime_metrics
, NULL
, &s
->runtime_journal
);
1045 return log_error_errno(r
, "Failed to open runtime journal: %m");
1048 if (s
->runtime_journal
) {
1049 server_add_acls(s
->runtime_journal
, 0);
1050 (void) determine_space_for(s
, &s
->runtime_metrics
, "/run/log/journal/", "Runtime journal", true, true, NULL
, NULL
);
1057 int server_flush_to_var(Server
*s
) {
1059 sd_journal
*j
= NULL
;
1060 char ts
[FORMAT_TIMESPAN_MAX
];
1067 if (s
->storage
!= STORAGE_AUTO
&&
1068 s
->storage
!= STORAGE_PERSISTENT
)
1071 if (!s
->runtime_journal
)
1074 (void) system_journal_open(s
, true);
1076 if (!s
->system_journal
)
1079 log_debug("Flushing to /var...");
1081 start
= now(CLOCK_MONOTONIC
);
1083 r
= sd_id128_get_machine(&machine
);
1087 r
= sd_journal_open(&j
, SD_JOURNAL_RUNTIME_ONLY
);
1089 return log_error_errno(r
, "Failed to read runtime journal: %m");
1091 sd_journal_set_data_threshold(j
, 0);
1093 SD_JOURNAL_FOREACH(j
) {
1097 f
= j
->current_file
;
1098 assert(f
&& f
->current_offset
> 0);
1102 r
= journal_file_move_to_object(f
, OBJECT_ENTRY
, f
->current_offset
, &o
);
1104 log_error_errno(r
, "Can't read entry: %m");
1108 r
= journal_file_copy_entry(f
, s
->system_journal
, o
, f
->current_offset
, NULL
, NULL
, NULL
);
1112 if (!shall_try_append_again(s
->system_journal
, r
)) {
1113 log_error_errno(r
, "Can't write entry: %m");
1118 server_vacuum(s
, false, false);
1120 if (!s
->system_journal
) {
1121 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1126 log_debug("Retrying write.");
1127 r
= journal_file_copy_entry(f
, s
->system_journal
, o
, f
->current_offset
, NULL
, NULL
, NULL
);
1129 log_error_errno(r
, "Can't write entry: %m");
1137 journal_file_post_change(s
->system_journal
);
1139 s
->runtime_journal
= journal_file_close(s
->runtime_journal
);
1142 (void) rm_rf("/run/log/journal", REMOVE_ROOT
);
1144 sd_journal_close(j
);
1146 server_driver_message(s
, SD_ID128_NULL
,
1147 LOG_MESSAGE("Time spent on flushing to /var is %s for %u entries.",
1148 format_timespan(ts
, sizeof(ts
), now(CLOCK_MONOTONIC
) - start
, 0),
1155 int server_process_datagram(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1156 Server
*s
= userdata
;
1157 struct ucred
*ucred
= NULL
;
1158 struct timeval
*tv
= NULL
;
1159 struct cmsghdr
*cmsg
;
1161 size_t label_len
= 0, m
;
1164 int *fds
= NULL
, v
= 0;
1168 struct cmsghdr cmsghdr
;
1170 /* We use NAME_MAX space for the SELinux label
1171 * here. The kernel currently enforces no
1172 * limit, but according to suggestions from
1173 * the SELinux people this will change and it
1174 * will probably be identical to NAME_MAX. For
1175 * now we use that, but this should be updated
1176 * one day when the final limit is known. */
1177 uint8_t buf
[CMSG_SPACE(sizeof(struct ucred
)) +
1178 CMSG_SPACE(sizeof(struct timeval
)) +
1179 CMSG_SPACE(sizeof(int)) + /* fd */
1180 CMSG_SPACE(NAME_MAX
)]; /* selinux label */
1183 union sockaddr_union sa
= {};
1185 struct msghdr msghdr
= {
1188 .msg_control
= &control
,
1189 .msg_controllen
= sizeof(control
),
1191 .msg_namelen
= sizeof(sa
),
1195 assert(fd
== s
->native_fd
|| fd
== s
->syslog_fd
|| fd
== s
->audit_fd
);
1197 if (revents
!= EPOLLIN
) {
1198 log_error("Got invalid event from epoll for datagram fd: %"PRIx32
, revents
);
1202 /* Try to get the right size, if we can. (Not all
1203 * sockets support SIOCINQ, hence we just try, but
1204 * don't rely on it. */
1205 (void) ioctl(fd
, SIOCINQ
, &v
);
1207 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1208 m
= PAGE_ALIGN(MAX3((size_t) v
+ 1,
1210 ALIGN(sizeof(struct nlmsghdr
)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH
)) + 1);
1212 if (!GREEDY_REALLOC(s
->buffer
, s
->buffer_size
, m
))
1215 iovec
.iov_base
= s
->buffer
;
1216 iovec
.iov_len
= s
->buffer_size
- 1; /* Leave room for trailing NUL we add later */
1218 n
= recvmsg(fd
, &msghdr
, MSG_DONTWAIT
|MSG_CMSG_CLOEXEC
);
1220 if (errno
== EINTR
|| errno
== EAGAIN
)
1223 return log_error_errno(errno
, "recvmsg() failed: %m");
1226 CMSG_FOREACH(cmsg
, &msghdr
) {
1228 if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1229 cmsg
->cmsg_type
== SCM_CREDENTIALS
&&
1230 cmsg
->cmsg_len
== CMSG_LEN(sizeof(struct ucred
)))
1231 ucred
= (struct ucred
*) CMSG_DATA(cmsg
);
1232 else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1233 cmsg
->cmsg_type
== SCM_SECURITY
) {
1234 label
= (char*) CMSG_DATA(cmsg
);
1235 label_len
= cmsg
->cmsg_len
- CMSG_LEN(0);
1236 } else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1237 cmsg
->cmsg_type
== SO_TIMESTAMP
&&
1238 cmsg
->cmsg_len
== CMSG_LEN(sizeof(struct timeval
)))
1239 tv
= (struct timeval
*) CMSG_DATA(cmsg
);
1240 else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1241 cmsg
->cmsg_type
== SCM_RIGHTS
) {
1242 fds
= (int*) CMSG_DATA(cmsg
);
1243 n_fds
= (cmsg
->cmsg_len
- CMSG_LEN(0)) / sizeof(int);
1247 /* And a trailing NUL, just in case */
1250 if (fd
== s
->syslog_fd
) {
1251 if (n
> 0 && n_fds
== 0)
1252 server_process_syslog_message(s
, strstrip(s
->buffer
), ucred
, tv
, label
, label_len
);
1254 log_warning("Got file descriptors via syslog socket. Ignoring.");
1256 } else if (fd
== s
->native_fd
) {
1257 if (n
> 0 && n_fds
== 0)
1258 server_process_native_message(s
, s
->buffer
, n
, ucred
, tv
, label
, label_len
);
1259 else if (n
== 0 && n_fds
== 1)
1260 server_process_native_file(s
, fds
[0], ucred
, tv
, label
, label_len
);
1262 log_warning("Got too many file descriptors via native socket. Ignoring.");
1265 assert(fd
== s
->audit_fd
);
1267 if (n
> 0 && n_fds
== 0)
1268 server_process_audit_message(s
, s
->buffer
, n
, ucred
, &sa
, msghdr
.msg_namelen
);
1270 log_warning("Got file descriptors via audit socket. Ignoring.");
1273 close_many(fds
, n_fds
);
1277 static int dispatch_sigusr1(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1278 Server
*s
= userdata
;
1283 log_info("Received request to flush runtime journal from PID " PID_FMT
, si
->ssi_pid
);
1285 server_flush_to_var(s
);
1287 server_vacuum(s
, false, false);
1289 r
= touch("/run/systemd/journal/flushed");
1291 log_warning_errno(r
, "Failed to touch /run/systemd/journal/flushed, ignoring: %m");
1296 static int dispatch_sigusr2(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1297 Server
*s
= userdata
;
1302 log_info("Received request to rotate journal from PID " PID_FMT
, si
->ssi_pid
);
1304 server_vacuum(s
, true, true);
1306 /* Let clients know when the most recent rotation happened. */
1307 r
= write_timestamp_file_atomic("/run/systemd/journal/rotated", now(CLOCK_MONOTONIC
));
1309 log_warning_errno(r
, "Failed to write /run/systemd/journal/rotated, ignoring: %m");
1314 static int dispatch_sigterm(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1315 Server
*s
= userdata
;
1319 log_received_signal(LOG_INFO
, si
);
1321 sd_event_exit(s
->event
, 0);
1325 static int dispatch_sigrtmin1(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1326 Server
*s
= userdata
;
1331 log_debug("Received request to sync from PID " PID_FMT
, si
->ssi_pid
);
1335 /* Let clients know when the most recent sync happened. */
1336 r
= write_timestamp_file_atomic("/run/systemd/journal/synced", now(CLOCK_MONOTONIC
));
1338 log_warning_errno(r
, "Failed to write /run/systemd/journal/synced, ignoring: %m");
1343 static int setup_signals(Server
*s
) {
1348 assert(sigprocmask_many(SIG_SETMASK
, NULL
, SIGINT
, SIGTERM
, SIGUSR1
, SIGUSR2
, SIGRTMIN
+1, -1) >= 0);
1350 r
= sd_event_add_signal(s
->event
, &s
->sigusr1_event_source
, SIGUSR1
, dispatch_sigusr1
, s
);
1354 r
= sd_event_add_signal(s
->event
, &s
->sigusr2_event_source
, SIGUSR2
, dispatch_sigusr2
, s
);
1358 r
= sd_event_add_signal(s
->event
, &s
->sigterm_event_source
, SIGTERM
, dispatch_sigterm
, s
);
1362 /* Let's process SIGTERM late, so that we flush all queued
1363 * messages to disk before we exit */
1364 r
= sd_event_source_set_priority(s
->sigterm_event_source
, SD_EVENT_PRIORITY_NORMAL
+20);
1368 /* When journald is invoked on the terminal (when debugging),
1369 * it's useful if C-c is handled equivalent to SIGTERM. */
1370 r
= sd_event_add_signal(s
->event
, &s
->sigint_event_source
, SIGINT
, dispatch_sigterm
, s
);
1374 r
= sd_event_source_set_priority(s
->sigint_event_source
, SD_EVENT_PRIORITY_NORMAL
+20);
1378 /* SIGRTMIN+1 causes an immediate sync. We process this very
1379 * late, so that everything else queued at this point is
1380 * really written to disk. Clients can watch
1381 * /run/systemd/journal/synced with inotify until its mtime
1382 * changes to see when a sync happened. */
1383 r
= sd_event_add_signal(s
->event
, &s
->sigrtmin1_event_source
, SIGRTMIN
+1, dispatch_sigrtmin1
, s
);
1387 r
= sd_event_source_set_priority(s
->sigrtmin1_event_source
, SD_EVENT_PRIORITY_NORMAL
+15);
1394 static int server_parse_proc_cmdline(Server
*s
) {
1395 _cleanup_free_
char *line
= NULL
;
1399 r
= proc_cmdline(&line
);
1401 log_warning_errno(r
, "Failed to read /proc/cmdline, ignoring: %m");
1407 _cleanup_free_
char *word
= NULL
;
1409 r
= extract_first_word(&p
, &word
, NULL
, 0);
1411 return log_error_errno(r
, "Failed to parse journald syntax \"%s\": %m", line
);
1416 if (startswith(word
, "systemd.journald.forward_to_syslog=")) {
1417 r
= parse_boolean(word
+ 35);
1419 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word
+ 35);
1421 s
->forward_to_syslog
= r
;
1422 } else if (startswith(word
, "systemd.journald.forward_to_kmsg=")) {
1423 r
= parse_boolean(word
+ 33);
1425 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word
+ 33);
1427 s
->forward_to_kmsg
= r
;
1428 } else if (startswith(word
, "systemd.journald.forward_to_console=")) {
1429 r
= parse_boolean(word
+ 36);
1431 log_warning("Failed to parse forward to console switch %s. Ignoring.", word
+ 36);
1433 s
->forward_to_console
= r
;
1434 } else if (startswith(word
, "systemd.journald.forward_to_wall=")) {
1435 r
= parse_boolean(word
+ 33);
1437 log_warning("Failed to parse forward to wall switch %s. Ignoring.", word
+ 33);
1439 s
->forward_to_wall
= r
;
1440 } else if (startswith(word
, "systemd.journald"))
1441 log_warning("Invalid systemd.journald parameter. Ignoring.");
1444 /* do not warn about state here, since probably systemd already did */
1448 static int server_parse_config_file(Server
*s
) {
1451 return config_parse_many(PKGSYSCONFDIR
"/journald.conf",
1452 CONF_PATHS_NULSTR("systemd/journald.conf.d"),
1454 config_item_perf_lookup
, journald_gperf_lookup
,
1458 static int server_dispatch_sync(sd_event_source
*es
, usec_t t
, void *userdata
) {
1459 Server
*s
= userdata
;
1467 int server_schedule_sync(Server
*s
, int priority
) {
1472 if (priority
<= LOG_CRIT
) {
1473 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1478 if (s
->sync_scheduled
)
1481 if (s
->sync_interval_usec
> 0) {
1484 r
= sd_event_now(s
->event
, CLOCK_MONOTONIC
, &when
);
1488 when
+= s
->sync_interval_usec
;
1490 if (!s
->sync_event_source
) {
1491 r
= sd_event_add_time(
1493 &s
->sync_event_source
,
1496 server_dispatch_sync
, s
);
1500 r
= sd_event_source_set_priority(s
->sync_event_source
, SD_EVENT_PRIORITY_IMPORTANT
);
1502 r
= sd_event_source_set_time(s
->sync_event_source
, when
);
1506 r
= sd_event_source_set_enabled(s
->sync_event_source
, SD_EVENT_ONESHOT
);
1511 s
->sync_scheduled
= true;
1517 static int dispatch_hostname_change(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1518 Server
*s
= userdata
;
1522 server_cache_hostname(s
);
1526 static int server_open_hostname(Server
*s
) {
1531 s
->hostname_fd
= open("/proc/sys/kernel/hostname", O_RDONLY
|O_CLOEXEC
|O_NDELAY
|O_NOCTTY
);
1532 if (s
->hostname_fd
< 0)
1533 return log_error_errno(errno
, "Failed to open /proc/sys/kernel/hostname: %m");
1535 r
= sd_event_add_io(s
->event
, &s
->hostname_event_source
, s
->hostname_fd
, 0, dispatch_hostname_change
, s
);
1537 /* kernels prior to 3.2 don't support polling this file. Ignore
1540 log_warning_errno(r
, "Failed to register hostname fd in event loop, ignoring: %m");
1541 s
->hostname_fd
= safe_close(s
->hostname_fd
);
1545 return log_error_errno(r
, "Failed to register hostname fd in event loop: %m");
1548 r
= sd_event_source_set_priority(s
->hostname_event_source
, SD_EVENT_PRIORITY_IMPORTANT
-10);
1550 return log_error_errno(r
, "Failed to adjust priority of host name event source: %m");
1555 static int dispatch_notify_event(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1556 Server
*s
= userdata
;
1560 assert(s
->notify_event_source
== es
);
1561 assert(s
->notify_fd
== fd
);
1563 /* The $NOTIFY_SOCKET is writable again, now send exactly one
1564 * message on it. Either it's the wtachdog event, the initial
1565 * READY=1 event or an stdout stream event. If there's nothing
1566 * to write anymore, turn our event source off. The next time
1567 * there's something to send it will be turned on again. */
1569 if (!s
->sent_notify_ready
) {
1570 static const char p
[] =
1572 "STATUS=Processing requests...";
1575 l
= send(s
->notify_fd
, p
, strlen(p
), MSG_DONTWAIT
);
1577 if (errno
== EAGAIN
)
1580 return log_error_errno(errno
, "Failed to send READY=1 notification message: %m");
1583 s
->sent_notify_ready
= true;
1584 log_debug("Sent READY=1 notification.");
1586 } else if (s
->send_watchdog
) {
1588 static const char p
[] =
1593 l
= send(s
->notify_fd
, p
, strlen(p
), MSG_DONTWAIT
);
1595 if (errno
== EAGAIN
)
1598 return log_error_errno(errno
, "Failed to send WATCHDOG=1 notification message: %m");
1601 s
->send_watchdog
= false;
1602 log_debug("Sent WATCHDOG=1 notification.");
1604 } else if (s
->stdout_streams_notify_queue
)
1605 /* Dispatch one stream notification event */
1606 stdout_stream_send_notify(s
->stdout_streams_notify_queue
);
1608 /* Leave us enabled if there's still more to to do. */
1609 if (s
->send_watchdog
|| s
->stdout_streams_notify_queue
)
1612 /* There was nothing to do anymore, let's turn ourselves off. */
1613 r
= sd_event_source_set_enabled(es
, SD_EVENT_OFF
);
1615 return log_error_errno(r
, "Failed to turn off notify event source: %m");
1620 static int dispatch_watchdog(sd_event_source
*es
, uint64_t usec
, void *userdata
) {
1621 Server
*s
= userdata
;
1626 s
->send_watchdog
= true;
1628 r
= sd_event_source_set_enabled(s
->notify_event_source
, SD_EVENT_ON
);
1630 log_warning_errno(r
, "Failed to turn on notify event source: %m");
1632 r
= sd_event_source_set_time(s
->watchdog_event_source
, usec
+ s
->watchdog_usec
/ 2);
1634 return log_error_errno(r
, "Failed to restart watchdog event source: %m");
1636 r
= sd_event_source_set_enabled(s
->watchdog_event_source
, SD_EVENT_ON
);
1638 return log_error_errno(r
, "Failed to enable watchdog event source: %m");
1643 static int server_connect_notify(Server
*s
) {
1644 union sockaddr_union sa
= {
1645 .un
.sun_family
= AF_UNIX
,
1651 assert(s
->notify_fd
< 0);
1652 assert(!s
->notify_event_source
);
1655 So here's the problem: we'd like to send notification
1656 messages to PID 1, but we cannot do that via sd_notify(),
1657 since that's synchronous, and we might end up blocking on
1658 it. Specifically: given that PID 1 might block on
1659 dbus-daemon during IPC, and dbus-daemon is logging to us,
1660 and might hence block on us, we might end up in a deadlock
1661 if we block on sending PID 1 notification messages -- by
1662 generating a full blocking circle. To avoid this, let's
1663 create a non-blocking socket, and connect it to the
1664 notification socket, and then wait for POLLOUT before we
1665 send anything. This should efficiently avoid any deadlocks,
1666 as we'll never block on PID 1, hence PID 1 can safely block
1667 on dbus-daemon which can safely block on us again.
1669 Don't think that this issue is real? It is, see:
1670 https://github.com/systemd/systemd/issues/1505
1673 e
= getenv("NOTIFY_SOCKET");
1677 if ((e
[0] != '@' && e
[0] != '/') || e
[1] == 0) {
1678 log_error("NOTIFY_SOCKET set to an invalid value: %s", e
);
1682 if (strlen(e
) > sizeof(sa
.un
.sun_path
)) {
1683 log_error("NOTIFY_SOCKET path too long: %s", e
);
1687 s
->notify_fd
= socket(AF_UNIX
, SOCK_DGRAM
|SOCK_CLOEXEC
|SOCK_NONBLOCK
, 0);
1688 if (s
->notify_fd
< 0)
1689 return log_error_errno(errno
, "Failed to create notify socket: %m");
1691 (void) fd_inc_sndbuf(s
->notify_fd
, NOTIFY_SNDBUF_SIZE
);
1693 strncpy(sa
.un
.sun_path
, e
, sizeof(sa
.un
.sun_path
));
1694 if (sa
.un
.sun_path
[0] == '@')
1695 sa
.un
.sun_path
[0] = 0;
1697 r
= connect(s
->notify_fd
, &sa
.sa
, offsetof(struct sockaddr_un
, sun_path
) + strlen(e
));
1699 return log_error_errno(errno
, "Failed to connect to notify socket: %m");
1701 r
= sd_event_add_io(s
->event
, &s
->notify_event_source
, s
->notify_fd
, EPOLLOUT
, dispatch_notify_event
, s
);
1703 return log_error_errno(r
, "Failed to watch notification socket: %m");
1705 if (sd_watchdog_enabled(false, &s
->watchdog_usec
) > 0) {
1706 s
->send_watchdog
= true;
1708 r
= sd_event_add_time(s
->event
, &s
->watchdog_event_source
, CLOCK_MONOTONIC
, now(CLOCK_MONOTONIC
) + s
->watchdog_usec
/2, s
->watchdog_usec
/4, dispatch_watchdog
, s
);
1710 return log_error_errno(r
, "Failed to add watchdog time event: %m");
1713 /* This should fire pretty soon, which we'll use to send the
1719 int server_init(Server
*s
) {
1720 _cleanup_fdset_free_ FDSet
*fds
= NULL
;
1727 s
->syslog_fd
= s
->native_fd
= s
->stdout_fd
= s
->dev_kmsg_fd
= s
->audit_fd
= s
->hostname_fd
= s
->notify_fd
= -1;
1731 s
->watchdog_usec
= USEC_INFINITY
;
1733 s
->sync_interval_usec
= DEFAULT_SYNC_INTERVAL_USEC
;
1734 s
->sync_scheduled
= false;
1736 s
->rate_limit_interval
= DEFAULT_RATE_LIMIT_INTERVAL
;
1737 s
->rate_limit_burst
= DEFAULT_RATE_LIMIT_BURST
;
1739 s
->forward_to_wall
= true;
1741 s
->max_file_usec
= DEFAULT_MAX_FILE_USEC
;
1743 s
->max_level_store
= LOG_DEBUG
;
1744 s
->max_level_syslog
= LOG_DEBUG
;
1745 s
->max_level_kmsg
= LOG_NOTICE
;
1746 s
->max_level_console
= LOG_INFO
;
1747 s
->max_level_wall
= LOG_EMERG
;
1749 journal_reset_metrics(&s
->system_metrics
);
1750 journal_reset_metrics(&s
->runtime_metrics
);
1752 server_parse_config_file(s
);
1753 server_parse_proc_cmdline(s
);
1755 if (!!s
->rate_limit_interval
^ !!s
->rate_limit_burst
) {
1756 log_debug("Setting both rate limit interval and burst from "USEC_FMT
",%u to 0,0",
1757 s
->rate_limit_interval
, s
->rate_limit_burst
);
1758 s
->rate_limit_interval
= s
->rate_limit_burst
= 0;
1761 (void) mkdir_p("/run/systemd/journal", 0755);
1763 s
->user_journals
= ordered_hashmap_new(NULL
);
1764 if (!s
->user_journals
)
1767 s
->mmap
= mmap_cache_new();
1771 r
= sd_event_default(&s
->event
);
1773 return log_error_errno(r
, "Failed to create event loop: %m");
1775 n
= sd_listen_fds(true);
1777 return log_error_errno(n
, "Failed to read listening file descriptors from environment: %m");
1779 for (fd
= SD_LISTEN_FDS_START
; fd
< SD_LISTEN_FDS_START
+ n
; fd
++) {
1781 if (sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/run/systemd/journal/socket", 0) > 0) {
1783 if (s
->native_fd
>= 0) {
1784 log_error("Too many native sockets passed.");
1790 } else if (sd_is_socket_unix(fd
, SOCK_STREAM
, 1, "/run/systemd/journal/stdout", 0) > 0) {
1792 if (s
->stdout_fd
>= 0) {
1793 log_error("Too many stdout sockets passed.");
1799 } else if (sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/dev/log", 0) > 0 ||
1800 sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/run/systemd/journal/dev-log", 0) > 0) {
1802 if (s
->syslog_fd
>= 0) {
1803 log_error("Too many /dev/log sockets passed.");
1809 } else if (sd_is_socket(fd
, AF_NETLINK
, SOCK_RAW
, -1) > 0) {
1811 if (s
->audit_fd
>= 0) {
1812 log_error("Too many audit sockets passed.");
1826 r
= fdset_put(fds
, fd
);
1832 /* Try to restore streams, but don't bother if this fails */
1833 (void) server_restore_streams(s
, fds
);
1835 if (fdset_size(fds
) > 0) {
1836 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds
));
1837 fds
= fdset_free(fds
);
1840 no_sockets
= s
->native_fd
< 0 && s
->stdout_fd
< 0 && s
->syslog_fd
< 0 && s
->audit_fd
< 0;
1842 /* always open stdout, syslog, native, and kmsg sockets */
1844 /* systemd-journald.socket: /run/systemd/journal/stdout */
1845 r
= server_open_stdout_socket(s
);
1849 /* systemd-journald-dev-log.socket: /run/systemd/journal/dev-log */
1850 r
= server_open_syslog_socket(s
);
1854 /* systemd-journald.socket: /run/systemd/journal/socket */
1855 r
= server_open_native_socket(s
);
1860 r
= server_open_dev_kmsg(s
);
1864 /* Unless we got *some* sockets and not audit, open audit socket */
1865 if (s
->audit_fd
>= 0 || no_sockets
) {
1866 r
= server_open_audit(s
);
1871 r
= server_open_kernel_seqnum(s
);
1875 r
= server_open_hostname(s
);
1879 r
= setup_signals(s
);
1883 s
->udev
= udev_new();
1887 s
->rate_limit
= journal_rate_limit_new(s
->rate_limit_interval
, s
->rate_limit_burst
);
1891 r
= cg_get_root_path(&s
->cgroup_root
);
1895 server_cache_hostname(s
);
1896 server_cache_boot_id(s
);
1897 server_cache_machine_id(s
);
1899 (void) server_connect_notify(s
);
1901 return system_journal_open(s
, false);
1904 void server_maybe_append_tags(Server
*s
) {
1910 n
= now(CLOCK_REALTIME
);
1912 if (s
->system_journal
)
1913 journal_file_maybe_append_tag(s
->system_journal
, n
);
1915 ORDERED_HASHMAP_FOREACH(f
, s
->user_journals
, i
)
1916 journal_file_maybe_append_tag(f
, n
);
1920 void server_done(Server
*s
) {
1924 while (s
->stdout_streams
)
1925 stdout_stream_free(s
->stdout_streams
);
1927 if (s
->system_journal
)
1928 journal_file_close(s
->system_journal
);
1930 if (s
->runtime_journal
)
1931 journal_file_close(s
->runtime_journal
);
1933 while ((f
= ordered_hashmap_steal_first(s
->user_journals
)))
1934 journal_file_close(f
);
1936 ordered_hashmap_free(s
->user_journals
);
1938 sd_event_source_unref(s
->syslog_event_source
);
1939 sd_event_source_unref(s
->native_event_source
);
1940 sd_event_source_unref(s
->stdout_event_source
);
1941 sd_event_source_unref(s
->dev_kmsg_event_source
);
1942 sd_event_source_unref(s
->audit_event_source
);
1943 sd_event_source_unref(s
->sync_event_source
);
1944 sd_event_source_unref(s
->sigusr1_event_source
);
1945 sd_event_source_unref(s
->sigusr2_event_source
);
1946 sd_event_source_unref(s
->sigterm_event_source
);
1947 sd_event_source_unref(s
->sigint_event_source
);
1948 sd_event_source_unref(s
->sigrtmin1_event_source
);
1949 sd_event_source_unref(s
->hostname_event_source
);
1950 sd_event_source_unref(s
->notify_event_source
);
1951 sd_event_source_unref(s
->watchdog_event_source
);
1952 sd_event_unref(s
->event
);
1954 safe_close(s
->syslog_fd
);
1955 safe_close(s
->native_fd
);
1956 safe_close(s
->stdout_fd
);
1957 safe_close(s
->dev_kmsg_fd
);
1958 safe_close(s
->audit_fd
);
1959 safe_close(s
->hostname_fd
);
1960 safe_close(s
->notify_fd
);
1963 journal_rate_limit_free(s
->rate_limit
);
1965 if (s
->kernel_seqnum
)
1966 munmap(s
->kernel_seqnum
, sizeof(uint64_t));
1970 free(s
->cgroup_root
);
1971 free(s
->hostname_field
);
1974 mmap_cache_unref(s
->mmap
);
1976 udev_unref(s
->udev
);
1979 static const char* const storage_table
[_STORAGE_MAX
] = {
1980 [STORAGE_AUTO
] = "auto",
1981 [STORAGE_VOLATILE
] = "volatile",
1982 [STORAGE_PERSISTENT
] = "persistent",
1983 [STORAGE_NONE
] = "none"
1986 DEFINE_STRING_TABLE_LOOKUP(storage
, Storage
);
1987 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage
, storage
, Storage
, "Failed to parse storage setting");
1989 static const char* const split_mode_table
[_SPLIT_MAX
] = {
1990 [SPLIT_LOGIN
] = "login",
1991 [SPLIT_UID
] = "uid",
1992 [SPLIT_NONE
] = "none",
1995 DEFINE_STRING_TABLE_LOOKUP(split_mode
, SplitMode
);
1996 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode
, split_mode
, SplitMode
, "Failed to parse split mode setting");