1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2011 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
23 #include <selinux/selinux.h>
25 #include <sys/ioctl.h>
27 #include <sys/signalfd.h>
28 #include <sys/statvfs.h>
29 #include <linux/sockios.h>
32 #include "sd-daemon.h"
33 #include "sd-journal.h"
34 #include "sd-messages.h"
37 #include "alloc-util.h"
38 #include "audit-util.h"
39 #include "cgroup-util.h"
40 #include "conf-parser.h"
41 #include "dirent-util.h"
42 #include "extract-word.h"
44 #include "formats-util.h"
47 #include "hostname-util.h"
49 #include "journal-authenticate.h"
50 #include "journal-file.h"
51 #include "journal-internal.h"
52 #include "journal-vacuum.h"
53 #include "journald-audit.h"
54 #include "journald-kmsg.h"
55 #include "journald-native.h"
56 #include "journald-rate-limit.h"
57 #include "journald-server.h"
58 #include "journald-stream.h"
59 #include "journald-syslog.h"
62 #include "parse-util.h"
63 #include "proc-cmdline.h"
64 #include "process-util.h"
66 #include "selinux-util.h"
67 #include "signal-util.h"
68 #include "socket-util.h"
69 #include "string-table.h"
70 #include "string-util.h"
72 #define USER_JOURNALS_MAX 1024
74 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
75 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
76 #define DEFAULT_RATE_LIMIT_BURST 1000
77 #define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
79 #define RECHECK_SPACE_USEC (30*USEC_PER_SEC)
81 #define NOTIFY_SNDBUF_SIZE (8*1024*1024)
83 static int determine_space_for(
85 JournalMetrics
*metrics
,
93 uint64_t sum
= 0, ss_avail
, avail
;
94 _cleanup_closedir_
DIR *d
= NULL
;
105 ts
= now(CLOCK_MONOTONIC
);
107 if (!verbose
&& s
->cached_space_timestamp
+ RECHECK_SPACE_USEC
> ts
) {
110 *available
= s
->cached_space_available
;
112 *limit
= s
->cached_space_limit
;
117 p
= strjoina(path
, SERVER_MACHINE_ID(s
));
120 return log_full_errno(errno
== ENOENT
? LOG_DEBUG
: LOG_ERR
, errno
, "Failed to open %s: %m", p
);
122 if (fstatvfs(dirfd(d
), &ss
) < 0)
123 return log_error_errno(errno
, "Failed to fstatvfs(%s): %m", p
);
125 FOREACH_DIRENT_ALL(de
, d
, break) {
128 if (!endswith(de
->d_name
, ".journal") &&
129 !endswith(de
->d_name
, ".journal~"))
132 if (fstatat(dirfd(d
), de
->d_name
, &st
, AT_SYMLINK_NOFOLLOW
) < 0) {
133 log_debug_errno(errno
, "Failed to stat %s/%s, ignoring: %m", p
, de
->d_name
);
137 if (!S_ISREG(st
.st_mode
))
140 sum
+= (uint64_t) st
.st_blocks
* 512UL;
143 /* If request, then let's bump the min_use limit to the
144 * current usage on disk. We do this when starting up and
145 * first opening the journal files. This way sudden spikes in
146 * disk usage will not cause journald to vacuum files without
147 * bounds. Note that this means that only a restart of
148 * journald will make it reset this value. */
151 metrics
->min_use
= MAX(metrics
->min_use
, sum
);
153 ss_avail
= ss
.f_bsize
* ss
.f_bavail
;
154 avail
= LESS_BY(ss_avail
, metrics
->keep_free
);
156 s
->cached_space_limit
= MIN(MAX(sum
+ avail
, metrics
->min_use
), metrics
->max_use
);
157 s
->cached_space_available
= LESS_BY(s
->cached_space_limit
, sum
);
158 s
->cached_space_timestamp
= ts
;
161 char fb1
[FORMAT_BYTES_MAX
], fb2
[FORMAT_BYTES_MAX
], fb3
[FORMAT_BYTES_MAX
],
162 fb4
[FORMAT_BYTES_MAX
], fb5
[FORMAT_BYTES_MAX
], fb6
[FORMAT_BYTES_MAX
];
164 server_driver_message(s
, SD_MESSAGE_JOURNAL_USAGE
,
165 "%s (%s) is currently using %s.\n"
166 "Maximum allowed usage is set to %s.\n"
167 "Leaving at least %s free (of currently available %s of space).\n"
168 "Enforced usage limit is thus %s, of which %s are still available.",
170 format_bytes(fb1
, sizeof(fb1
), sum
),
171 format_bytes(fb2
, sizeof(fb2
), metrics
->max_use
),
172 format_bytes(fb3
, sizeof(fb3
), metrics
->keep_free
),
173 format_bytes(fb4
, sizeof(fb4
), ss_avail
),
174 format_bytes(fb5
, sizeof(fb5
), s
->cached_space_limit
),
175 format_bytes(fb6
, sizeof(fb6
), s
->cached_space_available
));
179 *available
= s
->cached_space_available
;
181 *limit
= s
->cached_space_limit
;
186 static int determine_space(Server
*s
, bool verbose
, bool patch_min_use
, uint64_t *available
, uint64_t *limit
) {
187 JournalMetrics
*metrics
;
188 const char *path
, *name
;
192 if (s
->system_journal
) {
193 path
= "/var/log/journal/";
194 metrics
= &s
->system_metrics
;
195 name
= "System journal";
197 path
= "/run/log/journal/";
198 metrics
= &s
->runtime_metrics
;
199 name
= "Runtime journal";
202 return determine_space_for(s
, metrics
, path
, name
, verbose
, patch_min_use
, available
, limit
);
205 void server_fix_perms(Server
*s
, JournalFile
*f
, uid_t uid
) {
208 _cleanup_(acl_freep
) acl_t acl
= NULL
;
210 acl_permset_t permset
;
215 r
= fchmod(f
->fd
, 0640);
217 log_warning_errno(errno
, "Failed to fix access mode on %s, ignoring: %m", f
->path
);
220 if (uid
<= SYSTEM_UID_MAX
)
223 acl
= acl_get_fd(f
->fd
);
225 log_warning_errno(errno
, "Failed to read ACL on %s, ignoring: %m", f
->path
);
229 r
= acl_find_uid(acl
, uid
, &entry
);
232 if (acl_create_entry(&acl
, &entry
) < 0 ||
233 acl_set_tag_type(entry
, ACL_USER
) < 0 ||
234 acl_set_qualifier(entry
, &uid
) < 0) {
235 log_warning_errno(errno
, "Failed to patch ACL on %s, ignoring: %m", f
->path
);
240 /* We do not recalculate the mask unconditionally here,
241 * so that the fchmod() mask above stays intact. */
242 if (acl_get_permset(entry
, &permset
) < 0 ||
243 acl_add_perm(permset
, ACL_READ
) < 0) {
244 log_warning_errno(errno
, "Failed to patch ACL on %s, ignoring: %m", f
->path
);
248 r
= calc_acl_mask_if_needed(&acl
);
250 log_warning_errno(r
, "Failed to patch ACL on %s, ignoring: %m", f
->path
);
254 if (acl_set_fd(f
->fd
, acl
) < 0)
255 log_warning_errno(errno
, "Failed to set ACL on %s, ignoring: %m", f
->path
);
260 static JournalFile
* find_journal(Server
*s
, uid_t uid
) {
261 _cleanup_free_
char *p
= NULL
;
268 /* We split up user logs only on /var, not on /run. If the
269 * runtime file is open, we write to it exclusively, in order
270 * to guarantee proper order as soon as we flush /run to
271 * /var and close the runtime file. */
273 if (s
->runtime_journal
)
274 return s
->runtime_journal
;
276 if (uid
<= SYSTEM_UID_MAX
)
277 return s
->system_journal
;
279 r
= sd_id128_get_machine(&machine
);
281 return s
->system_journal
;
283 f
= ordered_hashmap_get(s
->user_journals
, UINT32_TO_PTR(uid
));
287 if (asprintf(&p
, "/var/log/journal/" SD_ID128_FORMAT_STR
"/user-"UID_FMT
".journal",
288 SD_ID128_FORMAT_VAL(machine
), uid
) < 0)
289 return s
->system_journal
;
291 while (ordered_hashmap_size(s
->user_journals
) >= USER_JOURNALS_MAX
) {
292 /* Too many open? Then let's close one */
293 f
= ordered_hashmap_steal_first(s
->user_journals
);
295 journal_file_close(f
);
298 r
= journal_file_open_reliably(p
, O_RDWR
|O_CREAT
, 0640, s
->compress
, s
->seal
, &s
->system_metrics
, s
->mmap
, NULL
, &f
);
300 return s
->system_journal
;
302 server_fix_perms(s
, f
, uid
);
304 r
= ordered_hashmap_put(s
->user_journals
, UINT32_TO_PTR(uid
), f
);
306 journal_file_close(f
);
307 return s
->system_journal
;
313 static int do_rotate(
326 r
= journal_file_rotate(f
, s
->compress
, seal
);
329 log_error_errno(r
, "Failed to rotate %s: %m", (*f
)->path
);
331 log_error_errno(r
, "Failed to create new %s journal: %m", name
);
333 server_fix_perms(s
, *f
, uid
);
338 void server_rotate(Server
*s
) {
344 log_debug("Rotating...");
346 (void) do_rotate(s
, &s
->runtime_journal
, "runtime", false, 0);
347 (void) do_rotate(s
, &s
->system_journal
, "system", s
->seal
, 0);
349 ORDERED_HASHMAP_FOREACH_KEY(f
, k
, s
->user_journals
, i
) {
350 r
= do_rotate(s
, &f
, "user", s
->seal
, PTR_TO_UINT32(k
));
352 ordered_hashmap_replace(s
->user_journals
, k
, f
);
354 /* Old file has been closed and deallocated */
355 ordered_hashmap_remove(s
->user_journals
, k
);
359 void server_sync(Server
*s
) {
365 if (s
->system_journal
) {
366 r
= journal_file_set_offline(s
->system_journal
);
368 log_warning_errno(r
, "Failed to sync system journal, ignoring: %m");
371 ORDERED_HASHMAP_FOREACH_KEY(f
, k
, s
->user_journals
, i
) {
372 r
= journal_file_set_offline(f
);
374 log_warning_errno(r
, "Failed to sync user journal, ignoring: %m");
377 if (s
->sync_event_source
) {
378 r
= sd_event_source_set_enabled(s
->sync_event_source
, SD_EVENT_OFF
);
380 log_error_errno(r
, "Failed to disable sync timer source: %m");
383 s
->sync_scheduled
= false;
386 static void do_vacuum(
389 JournalMetrics
*metrics
,
393 bool patch_min_use
) {
407 p
= strjoina(path
, SERVER_MACHINE_ID(s
));
409 limit
= metrics
->max_use
;
410 (void) determine_space_for(s
, metrics
, path
, name
, verbose
, patch_min_use
, NULL
, &limit
);
412 r
= journal_directory_vacuum(p
, limit
, metrics
->n_max_files
, s
->max_retention_usec
, &s
->oldest_file_usec
, verbose
);
413 if (r
< 0 && r
!= -ENOENT
)
414 log_warning_errno(r
, "Failed to vacuum %s, ignoring: %m", p
);
417 int server_vacuum(Server
*s
, bool verbose
, bool patch_min_use
) {
420 log_debug("Vacuuming...");
422 s
->oldest_file_usec
= 0;
424 do_vacuum(s
, s
->system_journal
, &s
->system_metrics
, "/var/log/journal/", "System journal", verbose
, patch_min_use
);
425 do_vacuum(s
, s
->runtime_journal
, &s
->runtime_metrics
, "/run/log/journal/", "Runtime journal", verbose
, patch_min_use
);
427 s
->cached_space_limit
= 0;
428 s
->cached_space_available
= 0;
429 s
->cached_space_timestamp
= 0;
434 static void server_cache_machine_id(Server
*s
) {
440 r
= sd_id128_get_machine(&id
);
444 sd_id128_to_string(id
, stpcpy(s
->machine_id_field
, "_MACHINE_ID="));
447 static void server_cache_boot_id(Server
*s
) {
453 r
= sd_id128_get_boot(&id
);
457 sd_id128_to_string(id
, stpcpy(s
->boot_id_field
, "_BOOT_ID="));
460 static void server_cache_hostname(Server
*s
) {
461 _cleanup_free_
char *t
= NULL
;
466 t
= gethostname_malloc();
470 x
= strappend("_HOSTNAME=", t
);
474 free(s
->hostname_field
);
475 s
->hostname_field
= x
;
478 static bool shall_try_append_again(JournalFile
*f
, int r
) {
480 /* -E2BIG Hit configured limit
482 -EDQUOT Quota limit hit
484 -EIO I/O error of some kind (mmap)
485 -EHOSTDOWN Other machine
486 -EBUSY Unclean shutdown
487 -EPROTONOSUPPORT Unsupported feature
490 -ESHUTDOWN Already archived
491 -EIDRM Journal file has been deleted */
493 if (r
== -E2BIG
|| r
== -EFBIG
|| r
== -EDQUOT
|| r
== -ENOSPC
)
494 log_debug("%s: Allocation limit reached, rotating.", f
->path
);
495 else if (r
== -EHOSTDOWN
)
496 log_info("%s: Journal file from other machine, rotating.", f
->path
);
497 else if (r
== -EBUSY
)
498 log_info("%s: Unclean shutdown, rotating.", f
->path
);
499 else if (r
== -EPROTONOSUPPORT
)
500 log_info("%s: Unsupported feature, rotating.", f
->path
);
501 else if (r
== -EBADMSG
|| r
== -ENODATA
|| r
== ESHUTDOWN
)
502 log_warning("%s: Journal file corrupted, rotating.", f
->path
);
504 log_warning("%s: IO error, rotating.", f
->path
);
505 else if (r
== -EIDRM
)
506 log_warning("%s: Journal file has been deleted, rotating.", f
->path
);
513 static void write_to_journal(Server
*s
, uid_t uid
, struct iovec
*iovec
, unsigned n
, int priority
) {
515 bool vacuumed
= false;
522 f
= find_journal(s
, uid
);
526 if (journal_file_rotate_suggested(f
, s
->max_file_usec
)) {
527 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f
->path
);
529 server_vacuum(s
, false, false);
532 f
= find_journal(s
, uid
);
537 r
= journal_file_append_entry(f
, NULL
, iovec
, n
, &s
->seqnum
, NULL
, NULL
);
539 server_schedule_sync(s
, priority
);
543 if (vacuumed
|| !shall_try_append_again(f
, r
)) {
544 log_error_errno(r
, "Failed to write entry (%d items, %zu bytes), ignoring: %m", n
, IOVEC_TOTAL_SIZE(iovec
, n
));
549 server_vacuum(s
, false, false);
551 f
= find_journal(s
, uid
);
555 log_debug("Retrying write.");
556 r
= journal_file_append_entry(f
, NULL
, iovec
, n
, &s
->seqnum
, NULL
, NULL
);
558 log_error_errno(r
, "Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %m", n
, IOVEC_TOTAL_SIZE(iovec
, n
));
560 server_schedule_sync(s
, priority
);
563 static void dispatch_message_real(
565 struct iovec
*iovec
, unsigned n
, unsigned m
,
566 const struct ucred
*ucred
,
567 const struct timeval
*tv
,
568 const char *label
, size_t label_len
,
573 char pid
[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t
)],
574 uid
[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t
)],
575 gid
[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t
)],
576 owner_uid
[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t
)],
577 source_time
[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t
)],
578 o_uid
[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t
)],
579 o_gid
[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t
)],
580 o_owner_uid
[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t
)];
586 uid_t realuid
= 0, owner
= 0, journal_uid
;
587 bool owner_valid
= false;
589 char audit_session
[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
590 audit_loginuid
[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t
)],
591 o_audit_session
[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
592 o_audit_loginuid
[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t
)];
601 assert(n
+ N_IOVEC_META_FIELDS
+ (object_pid
? N_IOVEC_OBJECT_FIELDS
: 0) <= m
);
604 realuid
= ucred
->uid
;
606 sprintf(pid
, "_PID="PID_FMT
, ucred
->pid
);
607 IOVEC_SET_STRING(iovec
[n
++], pid
);
609 sprintf(uid
, "_UID="UID_FMT
, ucred
->uid
);
610 IOVEC_SET_STRING(iovec
[n
++], uid
);
612 sprintf(gid
, "_GID="GID_FMT
, ucred
->gid
);
613 IOVEC_SET_STRING(iovec
[n
++], gid
);
615 r
= get_process_comm(ucred
->pid
, &t
);
617 x
= strjoina("_COMM=", t
);
619 IOVEC_SET_STRING(iovec
[n
++], x
);
622 r
= get_process_exe(ucred
->pid
, &t
);
624 x
= strjoina("_EXE=", t
);
626 IOVEC_SET_STRING(iovec
[n
++], x
);
629 r
= get_process_cmdline(ucred
->pid
, 0, false, &t
);
631 x
= strjoina("_CMDLINE=", t
);
633 IOVEC_SET_STRING(iovec
[n
++], x
);
636 r
= get_process_capeff(ucred
->pid
, &t
);
638 x
= strjoina("_CAP_EFFECTIVE=", t
);
640 IOVEC_SET_STRING(iovec
[n
++], x
);
644 r
= audit_session_from_pid(ucred
->pid
, &audit
);
646 sprintf(audit_session
, "_AUDIT_SESSION=%"PRIu32
, audit
);
647 IOVEC_SET_STRING(iovec
[n
++], audit_session
);
650 r
= audit_loginuid_from_pid(ucred
->pid
, &loginuid
);
652 sprintf(audit_loginuid
, "_AUDIT_LOGINUID="UID_FMT
, loginuid
);
653 IOVEC_SET_STRING(iovec
[n
++], audit_loginuid
);
657 r
= cg_pid_get_path_shifted(ucred
->pid
, s
->cgroup_root
, &c
);
659 char *session
= NULL
;
661 x
= strjoina("_SYSTEMD_CGROUP=", c
);
662 IOVEC_SET_STRING(iovec
[n
++], x
);
664 r
= cg_path_get_session(c
, &t
);
666 session
= strjoina("_SYSTEMD_SESSION=", t
);
668 IOVEC_SET_STRING(iovec
[n
++], session
);
671 if (cg_path_get_owner_uid(c
, &owner
) >= 0) {
674 sprintf(owner_uid
, "_SYSTEMD_OWNER_UID="UID_FMT
, owner
);
675 IOVEC_SET_STRING(iovec
[n
++], owner_uid
);
678 if (cg_path_get_unit(c
, &t
) >= 0) {
679 x
= strjoina("_SYSTEMD_UNIT=", t
);
681 IOVEC_SET_STRING(iovec
[n
++], x
);
682 } else if (unit_id
&& !session
) {
683 x
= strjoina("_SYSTEMD_UNIT=", unit_id
);
684 IOVEC_SET_STRING(iovec
[n
++], x
);
687 if (cg_path_get_user_unit(c
, &t
) >= 0) {
688 x
= strjoina("_SYSTEMD_USER_UNIT=", t
);
690 IOVEC_SET_STRING(iovec
[n
++], x
);
691 } else if (unit_id
&& session
) {
692 x
= strjoina("_SYSTEMD_USER_UNIT=", unit_id
);
693 IOVEC_SET_STRING(iovec
[n
++], x
);
696 if (cg_path_get_slice(c
, &t
) >= 0) {
697 x
= strjoina("_SYSTEMD_SLICE=", t
);
699 IOVEC_SET_STRING(iovec
[n
++], x
);
703 } else if (unit_id
) {
704 x
= strjoina("_SYSTEMD_UNIT=", unit_id
);
705 IOVEC_SET_STRING(iovec
[n
++], x
);
709 if (mac_selinux_use()) {
711 x
= alloca(strlen("_SELINUX_CONTEXT=") + label_len
+ 1);
713 *((char*) mempcpy(stpcpy(x
, "_SELINUX_CONTEXT="), label
, label_len
)) = 0;
714 IOVEC_SET_STRING(iovec
[n
++], x
);
716 security_context_t con
;
718 if (getpidcon(ucred
->pid
, &con
) >= 0) {
719 x
= strjoina("_SELINUX_CONTEXT=", con
);
722 IOVEC_SET_STRING(iovec
[n
++], x
);
731 r
= get_process_uid(object_pid
, &object_uid
);
733 sprintf(o_uid
, "OBJECT_UID="UID_FMT
, object_uid
);
734 IOVEC_SET_STRING(iovec
[n
++], o_uid
);
737 r
= get_process_gid(object_pid
, &object_gid
);
739 sprintf(o_gid
, "OBJECT_GID="GID_FMT
, object_gid
);
740 IOVEC_SET_STRING(iovec
[n
++], o_gid
);
743 r
= get_process_comm(object_pid
, &t
);
745 x
= strjoina("OBJECT_COMM=", t
);
747 IOVEC_SET_STRING(iovec
[n
++], x
);
750 r
= get_process_exe(object_pid
, &t
);
752 x
= strjoina("OBJECT_EXE=", t
);
754 IOVEC_SET_STRING(iovec
[n
++], x
);
757 r
= get_process_cmdline(object_pid
, 0, false, &t
);
759 x
= strjoina("OBJECT_CMDLINE=", t
);
761 IOVEC_SET_STRING(iovec
[n
++], x
);
765 r
= audit_session_from_pid(object_pid
, &audit
);
767 sprintf(o_audit_session
, "OBJECT_AUDIT_SESSION=%"PRIu32
, audit
);
768 IOVEC_SET_STRING(iovec
[n
++], o_audit_session
);
771 r
= audit_loginuid_from_pid(object_pid
, &loginuid
);
773 sprintf(o_audit_loginuid
, "OBJECT_AUDIT_LOGINUID="UID_FMT
, loginuid
);
774 IOVEC_SET_STRING(iovec
[n
++], o_audit_loginuid
);
778 r
= cg_pid_get_path_shifted(object_pid
, s
->cgroup_root
, &c
);
780 x
= strjoina("OBJECT_SYSTEMD_CGROUP=", c
);
781 IOVEC_SET_STRING(iovec
[n
++], x
);
783 r
= cg_path_get_session(c
, &t
);
785 x
= strjoina("OBJECT_SYSTEMD_SESSION=", t
);
787 IOVEC_SET_STRING(iovec
[n
++], x
);
790 if (cg_path_get_owner_uid(c
, &owner
) >= 0) {
791 sprintf(o_owner_uid
, "OBJECT_SYSTEMD_OWNER_UID="UID_FMT
, owner
);
792 IOVEC_SET_STRING(iovec
[n
++], o_owner_uid
);
795 if (cg_path_get_unit(c
, &t
) >= 0) {
796 x
= strjoina("OBJECT_SYSTEMD_UNIT=", t
);
798 IOVEC_SET_STRING(iovec
[n
++], x
);
801 if (cg_path_get_user_unit(c
, &t
) >= 0) {
802 x
= strjoina("OBJECT_SYSTEMD_USER_UNIT=", t
);
804 IOVEC_SET_STRING(iovec
[n
++], x
);
813 sprintf(source_time
, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv
));
814 IOVEC_SET_STRING(iovec
[n
++], source_time
);
817 /* Note that strictly speaking storing the boot id here is
818 * redundant since the entry includes this in-line
819 * anyway. However, we need this indexed, too. */
820 if (!isempty(s
->boot_id_field
))
821 IOVEC_SET_STRING(iovec
[n
++], s
->boot_id_field
);
823 if (!isempty(s
->machine_id_field
))
824 IOVEC_SET_STRING(iovec
[n
++], s
->machine_id_field
);
826 if (!isempty(s
->hostname_field
))
827 IOVEC_SET_STRING(iovec
[n
++], s
->hostname_field
);
831 if (s
->split_mode
== SPLIT_UID
&& realuid
> 0)
832 /* Split up strictly by any UID */
833 journal_uid
= realuid
;
834 else if (s
->split_mode
== SPLIT_LOGIN
&& realuid
> 0 && owner_valid
&& owner
> 0)
835 /* Split up by login UIDs. We do this only if the
836 * realuid is not root, in order not to accidentally
837 * leak privileged information to the user that is
838 * logged by a privileged process that is part of an
839 * unprivileged session. */
844 write_to_journal(s
, journal_uid
, iovec
, n
, priority
);
847 void server_driver_message(Server
*s
, sd_id128_t message_id
, const char *format
, ...) {
848 char mid
[11 + 32 + 1];
849 char buffer
[16 + LINE_MAX
+ 1];
850 struct iovec iovec
[N_IOVEC_META_FIELDS
+ 6];
853 struct ucred ucred
= {};
858 IOVEC_SET_STRING(iovec
[n
++], "SYSLOG_FACILITY=3");
859 IOVEC_SET_STRING(iovec
[n
++], "SYSLOG_IDENTIFIER=systemd-journald");
861 IOVEC_SET_STRING(iovec
[n
++], "PRIORITY=6");
862 IOVEC_SET_STRING(iovec
[n
++], "_TRANSPORT=driver");
864 memcpy(buffer
, "MESSAGE=", 8);
865 va_start(ap
, format
);
866 vsnprintf(buffer
+ 8, sizeof(buffer
) - 8, format
, ap
);
868 IOVEC_SET_STRING(iovec
[n
++], buffer
);
870 if (!sd_id128_equal(message_id
, SD_ID128_NULL
)) {
871 snprintf(mid
, sizeof(mid
), LOG_MESSAGE_ID(message_id
));
872 IOVEC_SET_STRING(iovec
[n
++], mid
);
875 ucred
.pid
= getpid();
876 ucred
.uid
= getuid();
877 ucred
.gid
= getgid();
879 dispatch_message_real(s
, iovec
, n
, ELEMENTSOF(iovec
), &ucred
, NULL
, NULL
, 0, NULL
, LOG_INFO
, 0);
882 void server_dispatch_message(
884 struct iovec
*iovec
, unsigned n
, unsigned m
,
885 const struct ucred
*ucred
,
886 const struct timeval
*tv
,
887 const char *label
, size_t label_len
,
893 _cleanup_free_
char *path
= NULL
;
894 uint64_t available
= 0;
898 assert(iovec
|| n
== 0);
903 if (LOG_PRI(priority
) > s
->max_level_store
)
906 /* Stop early in case the information will not be stored
908 if (s
->storage
== STORAGE_NONE
)
914 r
= cg_pid_get_path_shifted(ucred
->pid
, s
->cgroup_root
, &path
);
918 /* example: /user/lennart/3/foobar
919 * /system/dbus.service/foobar
921 * So let's cut of everything past the third /, since that is
922 * where user directories start */
924 c
= strchr(path
, '/');
926 c
= strchr(c
+1, '/');
928 c
= strchr(c
+1, '/');
934 (void) determine_space(s
, false, false, &available
, NULL
);
935 rl
= journal_rate_limit_test(s
->rate_limit
, path
, priority
& LOG_PRIMASK
, available
);
939 /* Write a suppression message if we suppressed something */
941 server_driver_message(s
, SD_MESSAGE_JOURNAL_DROPPED
,
942 "Suppressed %u messages from %s", rl
- 1, path
);
945 dispatch_message_real(s
, iovec
, n
, m
, ucred
, tv
, label
, label_len
, unit_id
, priority
, object_pid
);
949 static int system_journal_open(Server
*s
, bool flush_requested
) {
953 if (!s
->system_journal
&&
954 (s
->storage
== STORAGE_PERSISTENT
|| s
->storage
== STORAGE_AUTO
) &&
956 || access("/run/systemd/journal/flushed", F_OK
) >= 0)) {
958 /* If in auto mode: first try to create the machine
959 * path, but not the prefix.
961 * If in persistent mode: create /var/log/journal and
962 * the machine path */
964 if (s
->storage
== STORAGE_PERSISTENT
)
965 (void) mkdir_p("/var/log/journal/", 0755);
967 fn
= strjoina("/var/log/journal/", SERVER_MACHINE_ID(s
));
968 (void) mkdir(fn
, 0755);
970 fn
= strjoina(fn
, "/system.journal");
971 r
= journal_file_open_reliably(fn
, O_RDWR
|O_CREAT
, 0640, s
->compress
, s
->seal
, &s
->system_metrics
, s
->mmap
, NULL
, &s
->system_journal
);
973 server_fix_perms(s
, s
->system_journal
, 0);
974 (void) determine_space_for(s
, &s
->system_metrics
, "/var/log/journal/", "System journal", true, true, NULL
, NULL
);
976 if (r
!= -ENOENT
&& r
!= -EROFS
)
977 log_warning_errno(r
, "Failed to open system journal: %m");
983 if (!s
->runtime_journal
&&
984 (s
->storage
!= STORAGE_NONE
)) {
986 fn
= strjoina("/run/log/journal/", SERVER_MACHINE_ID(s
), "/system.journal");
988 if (s
->system_journal
) {
990 /* Try to open the runtime journal, but only
991 * if it already exists, so that we can flush
992 * it into the system journal */
994 r
= journal_file_open(fn
, O_RDWR
, 0640, s
->compress
, false, &s
->runtime_metrics
, s
->mmap
, NULL
, &s
->runtime_journal
);
997 log_warning_errno(r
, "Failed to open runtime journal: %m");
1004 /* OK, we really need the runtime journal, so create
1005 * it if necessary. */
1007 (void) mkdir("/run/log", 0755);
1008 (void) mkdir("/run/log/journal", 0755);
1009 (void) mkdir_parents(fn
, 0750);
1011 r
= journal_file_open_reliably(fn
, O_RDWR
|O_CREAT
, 0640, s
->compress
, false, &s
->runtime_metrics
, s
->mmap
, NULL
, &s
->runtime_journal
);
1013 return log_error_errno(r
, "Failed to open runtime journal: %m");
1016 if (s
->runtime_journal
) {
1017 server_fix_perms(s
, s
->runtime_journal
, 0);
1018 (void) determine_space_for(s
, &s
->runtime_metrics
, "/run/log/journal/", "Runtime journal", true, true, NULL
, NULL
);
1025 int server_flush_to_var(Server
*s
) {
1027 sd_journal
*j
= NULL
;
1028 char ts
[FORMAT_TIMESPAN_MAX
];
1035 if (s
->storage
!= STORAGE_AUTO
&&
1036 s
->storage
!= STORAGE_PERSISTENT
)
1039 if (!s
->runtime_journal
)
1042 (void) system_journal_open(s
, true);
1044 if (!s
->system_journal
)
1047 log_debug("Flushing to /var...");
1049 start
= now(CLOCK_MONOTONIC
);
1051 r
= sd_id128_get_machine(&machine
);
1055 r
= sd_journal_open(&j
, SD_JOURNAL_RUNTIME_ONLY
);
1057 return log_error_errno(r
, "Failed to read runtime journal: %m");
1059 sd_journal_set_data_threshold(j
, 0);
1061 SD_JOURNAL_FOREACH(j
) {
1065 f
= j
->current_file
;
1066 assert(f
&& f
->current_offset
> 0);
1070 r
= journal_file_move_to_object(f
, OBJECT_ENTRY
, f
->current_offset
, &o
);
1072 log_error_errno(r
, "Can't read entry: %m");
1076 r
= journal_file_copy_entry(f
, s
->system_journal
, o
, f
->current_offset
, NULL
, NULL
, NULL
);
1080 if (!shall_try_append_again(s
->system_journal
, r
)) {
1081 log_error_errno(r
, "Can't write entry: %m");
1086 server_vacuum(s
, false, false);
1088 if (!s
->system_journal
) {
1089 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1094 log_debug("Retrying write.");
1095 r
= journal_file_copy_entry(f
, s
->system_journal
, o
, f
->current_offset
, NULL
, NULL
, NULL
);
1097 log_error_errno(r
, "Can't write entry: %m");
1105 journal_file_post_change(s
->system_journal
);
1107 s
->runtime_journal
= journal_file_close(s
->runtime_journal
);
1110 (void) rm_rf("/run/log/journal", REMOVE_ROOT
);
1112 sd_journal_close(j
);
1114 server_driver_message(s
, SD_ID128_NULL
, "Time spent on flushing to /var is %s for %u entries.", format_timespan(ts
, sizeof(ts
), now(CLOCK_MONOTONIC
) - start
, 0), n
);
1119 int server_process_datagram(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1120 Server
*s
= userdata
;
1121 struct ucred
*ucred
= NULL
;
1122 struct timeval
*tv
= NULL
;
1123 struct cmsghdr
*cmsg
;
1125 size_t label_len
= 0, m
;
1128 int *fds
= NULL
, v
= 0;
1132 struct cmsghdr cmsghdr
;
1134 /* We use NAME_MAX space for the SELinux label
1135 * here. The kernel currently enforces no
1136 * limit, but according to suggestions from
1137 * the SELinux people this will change and it
1138 * will probably be identical to NAME_MAX. For
1139 * now we use that, but this should be updated
1140 * one day when the final limit is known. */
1141 uint8_t buf
[CMSG_SPACE(sizeof(struct ucred
)) +
1142 CMSG_SPACE(sizeof(struct timeval
)) +
1143 CMSG_SPACE(sizeof(int)) + /* fd */
1144 CMSG_SPACE(NAME_MAX
)]; /* selinux label */
1147 union sockaddr_union sa
= {};
1149 struct msghdr msghdr
= {
1152 .msg_control
= &control
,
1153 .msg_controllen
= sizeof(control
),
1155 .msg_namelen
= sizeof(sa
),
1159 assert(fd
== s
->native_fd
|| fd
== s
->syslog_fd
|| fd
== s
->audit_fd
);
1161 if (revents
!= EPOLLIN
) {
1162 log_error("Got invalid event from epoll for datagram fd: %"PRIx32
, revents
);
1166 /* Try to get the right size, if we can. (Not all
1167 * sockets support SIOCINQ, hence we just try, but
1168 * don't rely on it. */
1169 (void) ioctl(fd
, SIOCINQ
, &v
);
1171 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1172 m
= PAGE_ALIGN(MAX3((size_t) v
+ 1,
1174 ALIGN(sizeof(struct nlmsghdr
)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH
)) + 1);
1176 if (!GREEDY_REALLOC(s
->buffer
, s
->buffer_size
, m
))
1179 iovec
.iov_base
= s
->buffer
;
1180 iovec
.iov_len
= s
->buffer_size
- 1; /* Leave room for trailing NUL we add later */
1182 n
= recvmsg(fd
, &msghdr
, MSG_DONTWAIT
|MSG_CMSG_CLOEXEC
);
1184 if (errno
== EINTR
|| errno
== EAGAIN
)
1187 return log_error_errno(errno
, "recvmsg() failed: %m");
1190 CMSG_FOREACH(cmsg
, &msghdr
) {
1192 if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1193 cmsg
->cmsg_type
== SCM_CREDENTIALS
&&
1194 cmsg
->cmsg_len
== CMSG_LEN(sizeof(struct ucred
)))
1195 ucred
= (struct ucred
*) CMSG_DATA(cmsg
);
1196 else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1197 cmsg
->cmsg_type
== SCM_SECURITY
) {
1198 label
= (char*) CMSG_DATA(cmsg
);
1199 label_len
= cmsg
->cmsg_len
- CMSG_LEN(0);
1200 } else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1201 cmsg
->cmsg_type
== SO_TIMESTAMP
&&
1202 cmsg
->cmsg_len
== CMSG_LEN(sizeof(struct timeval
)))
1203 tv
= (struct timeval
*) CMSG_DATA(cmsg
);
1204 else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1205 cmsg
->cmsg_type
== SCM_RIGHTS
) {
1206 fds
= (int*) CMSG_DATA(cmsg
);
1207 n_fds
= (cmsg
->cmsg_len
- CMSG_LEN(0)) / sizeof(int);
1211 /* And a trailing NUL, just in case */
1214 if (fd
== s
->syslog_fd
) {
1215 if (n
> 0 && n_fds
== 0)
1216 server_process_syslog_message(s
, strstrip(s
->buffer
), ucred
, tv
, label
, label_len
);
1218 log_warning("Got file descriptors via syslog socket. Ignoring.");
1220 } else if (fd
== s
->native_fd
) {
1221 if (n
> 0 && n_fds
== 0)
1222 server_process_native_message(s
, s
->buffer
, n
, ucred
, tv
, label
, label_len
);
1223 else if (n
== 0 && n_fds
== 1)
1224 server_process_native_file(s
, fds
[0], ucred
, tv
, label
, label_len
);
1226 log_warning("Got too many file descriptors via native socket. Ignoring.");
1229 assert(fd
== s
->audit_fd
);
1231 if (n
> 0 && n_fds
== 0)
1232 server_process_audit_message(s
, s
->buffer
, n
, ucred
, &sa
, msghdr
.msg_namelen
);
1234 log_warning("Got file descriptors via audit socket. Ignoring.");
1237 close_many(fds
, n_fds
);
1241 static int dispatch_sigusr1(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1242 Server
*s
= userdata
;
1246 log_info("Received request to flush runtime journal from PID " PID_FMT
, si
->ssi_pid
);
1248 server_flush_to_var(s
);
1250 server_vacuum(s
, false, false);
1252 (void) touch("/run/systemd/journal/flushed");
1257 static int dispatch_sigusr2(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1258 Server
*s
= userdata
;
1262 log_info("Received request to rotate journal from PID " PID_FMT
, si
->ssi_pid
);
1264 server_vacuum(s
, true, true);
1266 /* Let clients know when the most recent rotation happened. */
1267 (void) touch("/run/systemd/journal/rotated");
1272 static int dispatch_sigterm(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1273 Server
*s
= userdata
;
1277 log_received_signal(LOG_INFO
, si
);
1279 sd_event_exit(s
->event
, 0);
1283 static int dispatch_sigrtmin1(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1284 Server
*s
= userdata
;
1288 log_debug("Received request to sync from PID " PID_FMT
, si
->ssi_pid
);
1292 /* Let clients know when the most recent sync happened. */
1293 (void) touch("/run/systemd/journal/synced");
1298 static int setup_signals(Server
*s
) {
1303 assert(sigprocmask_many(SIG_SETMASK
, NULL
, SIGINT
, SIGTERM
, SIGUSR1
, SIGUSR2
, SIGRTMIN
+1, -1) >= 0);
1305 r
= sd_event_add_signal(s
->event
, &s
->sigusr1_event_source
, SIGUSR1
, dispatch_sigusr1
, s
);
1309 r
= sd_event_add_signal(s
->event
, &s
->sigusr2_event_source
, SIGUSR2
, dispatch_sigusr2
, s
);
1313 r
= sd_event_add_signal(s
->event
, &s
->sigterm_event_source
, SIGTERM
, dispatch_sigterm
, s
);
1317 /* Let's process SIGTERM late, so that we flush all queued
1318 * messages to disk before we exit */
1319 r
= sd_event_source_set_priority(s
->sigterm_event_source
, SD_EVENT_PRIORITY_NORMAL
+20);
1323 /* When journald is invoked on the terminal (when debugging),
1324 * it's useful if C-c is handled equivalent to SIGTERM. */
1325 r
= sd_event_add_signal(s
->event
, &s
->sigint_event_source
, SIGINT
, dispatch_sigterm
, s
);
1329 r
= sd_event_source_set_priority(s
->sigint_event_source
, SD_EVENT_PRIORITY_NORMAL
+20);
1333 /* SIGRTMIN+1 causes an immediate sync. We process this very
1334 * late, so that everything else queued at this point is
1335 * really written to disk. Clients can watch
1336 * /run/systemd/journal/synced with inotify until its mtime
1337 * changes to see when a sync happened. */
1338 r
= sd_event_add_signal(s
->event
, &s
->sigrtmin1_event_source
, SIGRTMIN
+1, dispatch_sigrtmin1
, s
);
1342 r
= sd_event_source_set_priority(s
->sigrtmin1_event_source
, SD_EVENT_PRIORITY_NORMAL
+15);
1349 static int server_parse_proc_cmdline(Server
*s
) {
1350 _cleanup_free_
char *line
= NULL
;
1354 r
= proc_cmdline(&line
);
1356 log_warning_errno(r
, "Failed to read /proc/cmdline, ignoring: %m");
1362 _cleanup_free_
char *word
;
1364 r
= extract_first_word(&p
, &word
, NULL
, 0);
1366 return log_error_errno(r
, "Failed to parse journald syntax \"%s\": %m", line
);
1371 if (startswith(word
, "systemd.journald.forward_to_syslog=")) {
1372 r
= parse_boolean(word
+ 35);
1374 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word
+ 35);
1376 s
->forward_to_syslog
= r
;
1377 } else if (startswith(word
, "systemd.journald.forward_to_kmsg=")) {
1378 r
= parse_boolean(word
+ 33);
1380 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word
+ 33);
1382 s
->forward_to_kmsg
= r
;
1383 } else if (startswith(word
, "systemd.journald.forward_to_console=")) {
1384 r
= parse_boolean(word
+ 36);
1386 log_warning("Failed to parse forward to console switch %s. Ignoring.", word
+ 36);
1388 s
->forward_to_console
= r
;
1389 } else if (startswith(word
, "systemd.journald.forward_to_wall=")) {
1390 r
= parse_boolean(word
+ 33);
1392 log_warning("Failed to parse forward to wall switch %s. Ignoring.", word
+ 33);
1394 s
->forward_to_wall
= r
;
1395 } else if (startswith(word
, "systemd.journald"))
1396 log_warning("Invalid systemd.journald parameter. Ignoring.");
1399 /* do not warn about state here, since probably systemd already did */
1403 static int server_parse_config_file(Server
*s
) {
1406 return config_parse_many(PKGSYSCONFDIR
"/journald.conf",
1407 CONF_PATHS_NULSTR("systemd/journald.conf.d"),
1409 config_item_perf_lookup
, journald_gperf_lookup
,
1413 static int server_dispatch_sync(sd_event_source
*es
, usec_t t
, void *userdata
) {
1414 Server
*s
= userdata
;
1422 int server_schedule_sync(Server
*s
, int priority
) {
1427 if (priority
<= LOG_CRIT
) {
1428 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1433 if (s
->sync_scheduled
)
1436 if (s
->sync_interval_usec
> 0) {
1439 r
= sd_event_now(s
->event
, CLOCK_MONOTONIC
, &when
);
1443 when
+= s
->sync_interval_usec
;
1445 if (!s
->sync_event_source
) {
1446 r
= sd_event_add_time(
1448 &s
->sync_event_source
,
1451 server_dispatch_sync
, s
);
1455 r
= sd_event_source_set_priority(s
->sync_event_source
, SD_EVENT_PRIORITY_IMPORTANT
);
1457 r
= sd_event_source_set_time(s
->sync_event_source
, when
);
1461 r
= sd_event_source_set_enabled(s
->sync_event_source
, SD_EVENT_ONESHOT
);
1466 s
->sync_scheduled
= true;
1472 static int dispatch_hostname_change(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1473 Server
*s
= userdata
;
1477 server_cache_hostname(s
);
1481 static int server_open_hostname(Server
*s
) {
1486 s
->hostname_fd
= open("/proc/sys/kernel/hostname", O_RDONLY
|O_CLOEXEC
|O_NDELAY
|O_NOCTTY
);
1487 if (s
->hostname_fd
< 0)
1488 return log_error_errno(errno
, "Failed to open /proc/sys/kernel/hostname: %m");
1490 r
= sd_event_add_io(s
->event
, &s
->hostname_event_source
, s
->hostname_fd
, 0, dispatch_hostname_change
, s
);
1492 /* kernels prior to 3.2 don't support polling this file. Ignore
1495 log_warning_errno(r
, "Failed to register hostname fd in event loop, ignoring: %m");
1496 s
->hostname_fd
= safe_close(s
->hostname_fd
);
1500 return log_error_errno(r
, "Failed to register hostname fd in event loop: %m");
1503 r
= sd_event_source_set_priority(s
->hostname_event_source
, SD_EVENT_PRIORITY_IMPORTANT
-10);
1505 return log_error_errno(r
, "Failed to adjust priority of host name event source: %m");
1510 static int dispatch_notify_event(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1511 Server
*s
= userdata
;
1515 assert(s
->notify_event_source
== es
);
1516 assert(s
->notify_fd
== fd
);
1518 /* The $NOTIFY_SOCKET is writable again, now send exactly one
1519 * message on it. Either it's the wtachdog event, the initial
1520 * READY=1 event or an stdout stream event. If there's nothing
1521 * to write anymore, turn our event source off. The next time
1522 * there's something to send it will be turned on again. */
1524 if (!s
->sent_notify_ready
) {
1525 static const char p
[] =
1527 "STATUS=Processing requests...";
1530 l
= send(s
->notify_fd
, p
, strlen(p
), MSG_DONTWAIT
);
1532 if (errno
== EAGAIN
)
1535 return log_error_errno(errno
, "Failed to send READY=1 notification message: %m");
1538 s
->sent_notify_ready
= true;
1539 log_debug("Sent READY=1 notification.");
1541 } else if (s
->send_watchdog
) {
1543 static const char p
[] =
1548 l
= send(s
->notify_fd
, p
, strlen(p
), MSG_DONTWAIT
);
1550 if (errno
== EAGAIN
)
1553 return log_error_errno(errno
, "Failed to send WATCHDOG=1 notification message: %m");
1556 s
->send_watchdog
= false;
1557 log_debug("Sent WATCHDOG=1 notification.");
1559 } else if (s
->stdout_streams_notify_queue
)
1560 /* Dispatch one stream notification event */
1561 stdout_stream_send_notify(s
->stdout_streams_notify_queue
);
1563 /* Leave us enabled if there's still more to to do. */
1564 if (s
->send_watchdog
|| s
->stdout_streams_notify_queue
)
1567 /* There was nothing to do anymore, let's turn ourselves off. */
1568 r
= sd_event_source_set_enabled(es
, SD_EVENT_OFF
);
1570 return log_error_errno(r
, "Failed to turn off notify event source: %m");
1575 static int dispatch_watchdog(sd_event_source
*es
, uint64_t usec
, void *userdata
) {
1576 Server
*s
= userdata
;
1581 s
->send_watchdog
= true;
1583 r
= sd_event_source_set_enabled(s
->notify_event_source
, SD_EVENT_ON
);
1585 log_warning_errno(r
, "Failed to turn on notify event source: %m");
1587 r
= sd_event_source_set_time(s
->watchdog_event_source
, usec
+ s
->watchdog_usec
/ 2);
1589 return log_error_errno(r
, "Failed to restart watchdog event source: %m");
1591 r
= sd_event_source_set_enabled(s
->watchdog_event_source
, SD_EVENT_ON
);
1593 return log_error_errno(r
, "Failed to enable watchdog event source: %m");
1598 static int server_connect_notify(Server
*s
) {
1599 union sockaddr_union sa
= {
1600 .un
.sun_family
= AF_UNIX
,
1606 assert(s
->notify_fd
< 0);
1607 assert(!s
->notify_event_source
);
1610 So here's the problem: we'd like to send notification
1611 messages to PID 1, but we cannot do that via sd_notify(),
1612 since that's synchronous, and we might end up blocking on
1613 it. Specifically: given that PID 1 might block on
1614 dbus-daemon during IPC, and dbus-daemon is logging to us,
1615 and might hence block on us, we might end up in a deadlock
1616 if we block on sending PID 1 notification messages -- by
1617 generating a full blocking circle. To avoid this, let's
1618 create a non-blocking socket, and connect it to the
1619 notification socket, and then wait for POLLOUT before we
1620 send anything. This should efficiently avoid any deadlocks,
1621 as we'll never block on PID 1, hence PID 1 can safely block
1622 on dbus-daemon which can safely block on us again.
1624 Don't think that this issue is real? It is, see:
1625 https://github.com/systemd/systemd/issues/1505
1628 e
= getenv("NOTIFY_SOCKET");
1632 if ((e
[0] != '@' && e
[0] != '/') || e
[1] == 0) {
1633 log_error("NOTIFY_SOCKET set to an invalid value: %s", e
);
1637 if (strlen(e
) > sizeof(sa
.un
.sun_path
)) {
1638 log_error("NOTIFY_SOCKET path too long: %s", e
);
1642 s
->notify_fd
= socket(AF_UNIX
, SOCK_DGRAM
|SOCK_CLOEXEC
|SOCK_NONBLOCK
, 0);
1643 if (s
->notify_fd
< 0)
1644 return log_error_errno(errno
, "Failed to create notify socket: %m");
1646 (void) fd_inc_sndbuf(s
->notify_fd
, NOTIFY_SNDBUF_SIZE
);
1648 strncpy(sa
.un
.sun_path
, e
, sizeof(sa
.un
.sun_path
));
1649 if (sa
.un
.sun_path
[0] == '@')
1650 sa
.un
.sun_path
[0] = 0;
1652 r
= connect(s
->notify_fd
, &sa
.sa
, offsetof(struct sockaddr_un
, sun_path
) + strlen(e
));
1654 return log_error_errno(errno
, "Failed to connect to notify socket: %m");
1656 r
= sd_event_add_io(s
->event
, &s
->notify_event_source
, s
->notify_fd
, EPOLLOUT
, dispatch_notify_event
, s
);
1658 return log_error_errno(r
, "Failed to watch notification socket: %m");
1660 if (sd_watchdog_enabled(false, &s
->watchdog_usec
) > 0) {
1661 s
->send_watchdog
= true;
1663 r
= sd_event_add_time(s
->event
, &s
->watchdog_event_source
, CLOCK_MONOTONIC
, now(CLOCK_MONOTONIC
) + s
->watchdog_usec
/2, s
->watchdog_usec
*3/4, dispatch_watchdog
, s
);
1665 return log_error_errno(r
, "Failed to add watchdog time event: %m");
1668 /* This should fire pretty soon, which we'll use to send the
1674 int server_init(Server
*s
) {
1675 _cleanup_fdset_free_ FDSet
*fds
= NULL
;
1682 s
->syslog_fd
= s
->native_fd
= s
->stdout_fd
= s
->dev_kmsg_fd
= s
->audit_fd
= s
->hostname_fd
= s
->notify_fd
= -1;
1686 s
->watchdog_usec
= USEC_INFINITY
;
1688 s
->sync_interval_usec
= DEFAULT_SYNC_INTERVAL_USEC
;
1689 s
->sync_scheduled
= false;
1691 s
->rate_limit_interval
= DEFAULT_RATE_LIMIT_INTERVAL
;
1692 s
->rate_limit_burst
= DEFAULT_RATE_LIMIT_BURST
;
1694 s
->forward_to_wall
= true;
1696 s
->max_file_usec
= DEFAULT_MAX_FILE_USEC
;
1698 s
->max_level_store
= LOG_DEBUG
;
1699 s
->max_level_syslog
= LOG_DEBUG
;
1700 s
->max_level_kmsg
= LOG_NOTICE
;
1701 s
->max_level_console
= LOG_INFO
;
1702 s
->max_level_wall
= LOG_EMERG
;
1704 journal_reset_metrics(&s
->system_metrics
);
1705 journal_reset_metrics(&s
->runtime_metrics
);
1707 server_parse_config_file(s
);
1708 server_parse_proc_cmdline(s
);
1710 if (!!s
->rate_limit_interval
^ !!s
->rate_limit_burst
) {
1711 log_debug("Setting both rate limit interval and burst from "USEC_FMT
",%u to 0,0",
1712 s
->rate_limit_interval
, s
->rate_limit_burst
);
1713 s
->rate_limit_interval
= s
->rate_limit_burst
= 0;
1716 (void) mkdir_p("/run/systemd/journal", 0755);
1718 s
->user_journals
= ordered_hashmap_new(NULL
);
1719 if (!s
->user_journals
)
1722 s
->mmap
= mmap_cache_new();
1726 r
= sd_event_default(&s
->event
);
1728 return log_error_errno(r
, "Failed to create event loop: %m");
1730 n
= sd_listen_fds(true);
1732 return log_error_errno(n
, "Failed to read listening file descriptors from environment: %m");
1734 for (fd
= SD_LISTEN_FDS_START
; fd
< SD_LISTEN_FDS_START
+ n
; fd
++) {
1736 if (sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/run/systemd/journal/socket", 0) > 0) {
1738 if (s
->native_fd
>= 0) {
1739 log_error("Too many native sockets passed.");
1745 } else if (sd_is_socket_unix(fd
, SOCK_STREAM
, 1, "/run/systemd/journal/stdout", 0) > 0) {
1747 if (s
->stdout_fd
>= 0) {
1748 log_error("Too many stdout sockets passed.");
1754 } else if (sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/dev/log", 0) > 0 ||
1755 sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/run/systemd/journal/dev-log", 0) > 0) {
1757 if (s
->syslog_fd
>= 0) {
1758 log_error("Too many /dev/log sockets passed.");
1764 } else if (sd_is_socket(fd
, AF_NETLINK
, SOCK_RAW
, -1) > 0) {
1766 if (s
->audit_fd
>= 0) {
1767 log_error("Too many audit sockets passed.");
1781 r
= fdset_put(fds
, fd
);
1787 /* Try to restore streams, but don't bother if this fails */
1788 (void) server_restore_streams(s
, fds
);
1790 if (fdset_size(fds
) > 0) {
1791 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds
));
1792 fds
= fdset_free(fds
);
1795 no_sockets
= s
->native_fd
< 0 && s
->stdout_fd
< 0 && s
->syslog_fd
< 0 && s
->audit_fd
< 0;
1797 /* always open stdout, syslog, native, and kmsg sockets */
1799 /* systemd-journald.socket: /run/systemd/journal/stdout */
1800 r
= server_open_stdout_socket(s
);
1804 /* systemd-journald-dev-log.socket: /run/systemd/journal/dev-log */
1805 r
= server_open_syslog_socket(s
);
1809 /* systemd-journald.socket: /run/systemd/journal/socket */
1810 r
= server_open_native_socket(s
);
1815 r
= server_open_dev_kmsg(s
);
1819 /* Unless we got *some* sockets and not audit, open audit socket */
1820 if (s
->audit_fd
>= 0 || no_sockets
) {
1821 r
= server_open_audit(s
);
1826 r
= server_open_kernel_seqnum(s
);
1830 r
= server_open_hostname(s
);
1834 r
= setup_signals(s
);
1838 s
->udev
= udev_new();
1842 s
->rate_limit
= journal_rate_limit_new(s
->rate_limit_interval
, s
->rate_limit_burst
);
1846 r
= cg_get_root_path(&s
->cgroup_root
);
1850 server_cache_hostname(s
);
1851 server_cache_boot_id(s
);
1852 server_cache_machine_id(s
);
1854 (void) server_connect_notify(s
);
1856 return system_journal_open(s
, false);
1859 void server_maybe_append_tags(Server
*s
) {
1865 n
= now(CLOCK_REALTIME
);
1867 if (s
->system_journal
)
1868 journal_file_maybe_append_tag(s
->system_journal
, n
);
1870 ORDERED_HASHMAP_FOREACH(f
, s
->user_journals
, i
)
1871 journal_file_maybe_append_tag(f
, n
);
1875 void server_done(Server
*s
) {
1879 while (s
->stdout_streams
)
1880 stdout_stream_free(s
->stdout_streams
);
1882 if (s
->system_journal
)
1883 journal_file_close(s
->system_journal
);
1885 if (s
->runtime_journal
)
1886 journal_file_close(s
->runtime_journal
);
1888 while ((f
= ordered_hashmap_steal_first(s
->user_journals
)))
1889 journal_file_close(f
);
1891 ordered_hashmap_free(s
->user_journals
);
1893 sd_event_source_unref(s
->syslog_event_source
);
1894 sd_event_source_unref(s
->native_event_source
);
1895 sd_event_source_unref(s
->stdout_event_source
);
1896 sd_event_source_unref(s
->dev_kmsg_event_source
);
1897 sd_event_source_unref(s
->audit_event_source
);
1898 sd_event_source_unref(s
->sync_event_source
);
1899 sd_event_source_unref(s
->sigusr1_event_source
);
1900 sd_event_source_unref(s
->sigusr2_event_source
);
1901 sd_event_source_unref(s
->sigterm_event_source
);
1902 sd_event_source_unref(s
->sigint_event_source
);
1903 sd_event_source_unref(s
->sigrtmin1_event_source
);
1904 sd_event_source_unref(s
->hostname_event_source
);
1905 sd_event_source_unref(s
->notify_event_source
);
1906 sd_event_source_unref(s
->watchdog_event_source
);
1907 sd_event_unref(s
->event
);
1909 safe_close(s
->syslog_fd
);
1910 safe_close(s
->native_fd
);
1911 safe_close(s
->stdout_fd
);
1912 safe_close(s
->dev_kmsg_fd
);
1913 safe_close(s
->audit_fd
);
1914 safe_close(s
->hostname_fd
);
1915 safe_close(s
->notify_fd
);
1918 journal_rate_limit_free(s
->rate_limit
);
1920 if (s
->kernel_seqnum
)
1921 munmap(s
->kernel_seqnum
, sizeof(uint64_t));
1925 free(s
->cgroup_root
);
1926 free(s
->hostname_field
);
1929 mmap_cache_unref(s
->mmap
);
1931 udev_unref(s
->udev
);
1934 static const char* const storage_table
[_STORAGE_MAX
] = {
1935 [STORAGE_AUTO
] = "auto",
1936 [STORAGE_VOLATILE
] = "volatile",
1937 [STORAGE_PERSISTENT
] = "persistent",
1938 [STORAGE_NONE
] = "none"
1941 DEFINE_STRING_TABLE_LOOKUP(storage
, Storage
);
1942 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage
, storage
, Storage
, "Failed to parse storage setting");
1944 static const char* const split_mode_table
[_SPLIT_MAX
] = {
1945 [SPLIT_LOGIN
] = "login",
1946 [SPLIT_UID
] = "uid",
1947 [SPLIT_NONE
] = "none",
1950 DEFINE_STRING_TABLE_LOOKUP(split_mode
, SplitMode
);
1951 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode
, split_mode
, SplitMode
, "Failed to parse split mode setting");