1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2011 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
23 #include <selinux/selinux.h>
25 #include <sys/ioctl.h>
27 #include <sys/signalfd.h>
28 #include <sys/statvfs.h>
29 #include <linux/sockios.h>
32 #include "sd-daemon.h"
33 #include "sd-journal.h"
34 #include "sd-messages.h"
37 #include "alloc-util.h"
38 #include "audit-util.h"
39 #include "cgroup-util.h"
40 #include "conf-parser.h"
41 #include "dirent-util.h"
42 #include "extract-word.h"
45 #include "formats-util.h"
48 #include "hostname-util.h"
50 #include "journal-authenticate.h"
51 #include "journal-file.h"
52 #include "journal-internal.h"
53 #include "journal-vacuum.h"
54 #include "journald-audit.h"
55 #include "journald-kmsg.h"
56 #include "journald-native.h"
57 #include "journald-rate-limit.h"
58 #include "journald-server.h"
59 #include "journald-stream.h"
60 #include "journald-syslog.h"
63 #include "parse-util.h"
64 #include "proc-cmdline.h"
65 #include "process-util.h"
67 #include "selinux-util.h"
68 #include "signal-util.h"
69 #include "socket-util.h"
70 #include "string-table.h"
71 #include "string-util.h"
72 #include "user-util.h"
74 #define USER_JOURNALS_MAX 1024
76 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
77 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
78 #define DEFAULT_RATE_LIMIT_BURST 1000
79 #define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
81 #define RECHECK_SPACE_USEC (30*USEC_PER_SEC)
83 #define NOTIFY_SNDBUF_SIZE (8*1024*1024)
85 static int determine_space_for(
87 JournalMetrics
*metrics
,
95 uint64_t sum
= 0, ss_avail
, avail
;
96 _cleanup_closedir_
DIR *d
= NULL
;
107 ts
= now(CLOCK_MONOTONIC
);
109 if (!verbose
&& s
->cached_space_timestamp
+ RECHECK_SPACE_USEC
> ts
) {
112 *available
= s
->cached_space_available
;
114 *limit
= s
->cached_space_limit
;
119 p
= strjoina(path
, SERVER_MACHINE_ID(s
));
122 return log_full_errno(errno
== ENOENT
? LOG_DEBUG
: LOG_ERR
, errno
, "Failed to open %s: %m", p
);
124 if (fstatvfs(dirfd(d
), &ss
) < 0)
125 return log_error_errno(errno
, "Failed to fstatvfs(%s): %m", p
);
127 FOREACH_DIRENT_ALL(de
, d
, break) {
130 if (!endswith(de
->d_name
, ".journal") &&
131 !endswith(de
->d_name
, ".journal~"))
134 if (fstatat(dirfd(d
), de
->d_name
, &st
, AT_SYMLINK_NOFOLLOW
) < 0) {
135 log_debug_errno(errno
, "Failed to stat %s/%s, ignoring: %m", p
, de
->d_name
);
139 if (!S_ISREG(st
.st_mode
))
142 sum
+= (uint64_t) st
.st_blocks
* 512UL;
145 /* If request, then let's bump the min_use limit to the
146 * current usage on disk. We do this when starting up and
147 * first opening the journal files. This way sudden spikes in
148 * disk usage will not cause journald to vacuum files without
149 * bounds. Note that this means that only a restart of
150 * journald will make it reset this value. */
153 metrics
->min_use
= MAX(metrics
->min_use
, sum
);
155 ss_avail
= ss
.f_bsize
* ss
.f_bavail
;
156 avail
= LESS_BY(ss_avail
, metrics
->keep_free
);
158 s
->cached_space_limit
= MIN(MAX(sum
+ avail
, metrics
->min_use
), metrics
->max_use
);
159 s
->cached_space_available
= LESS_BY(s
->cached_space_limit
, sum
);
160 s
->cached_space_timestamp
= ts
;
163 char fb1
[FORMAT_BYTES_MAX
], fb2
[FORMAT_BYTES_MAX
], fb3
[FORMAT_BYTES_MAX
],
164 fb4
[FORMAT_BYTES_MAX
], fb5
[FORMAT_BYTES_MAX
], fb6
[FORMAT_BYTES_MAX
];
166 server_driver_message(s
, SD_MESSAGE_JOURNAL_USAGE
,
167 "%s (%s) is currently using %s.\n"
168 "Maximum allowed usage is set to %s.\n"
169 "Leaving at least %s free (of currently available %s of space).\n"
170 "Enforced usage limit is thus %s, of which %s are still available.",
172 format_bytes(fb1
, sizeof(fb1
), sum
),
173 format_bytes(fb2
, sizeof(fb2
), metrics
->max_use
),
174 format_bytes(fb3
, sizeof(fb3
), metrics
->keep_free
),
175 format_bytes(fb4
, sizeof(fb4
), ss_avail
),
176 format_bytes(fb5
, sizeof(fb5
), s
->cached_space_limit
),
177 format_bytes(fb6
, sizeof(fb6
), s
->cached_space_available
));
181 *available
= s
->cached_space_available
;
183 *limit
= s
->cached_space_limit
;
188 static int determine_space(Server
*s
, bool verbose
, bool patch_min_use
, uint64_t *available
, uint64_t *limit
) {
189 JournalMetrics
*metrics
;
190 const char *path
, *name
;
194 if (s
->system_journal
) {
195 path
= "/var/log/journal/";
196 metrics
= &s
->system_metrics
;
197 name
= "System journal";
199 path
= "/run/log/journal/";
200 metrics
= &s
->runtime_metrics
;
201 name
= "Runtime journal";
204 return determine_space_for(s
, metrics
, path
, name
, verbose
, patch_min_use
, available
, limit
);
207 static void server_add_acls(JournalFile
*f
, uid_t uid
) {
214 if (uid
<= SYSTEM_UID_MAX
)
217 r
= add_acls_for_user(f
->fd
, uid
);
219 log_warning_errno(r
, "Failed to set ACL on %s, ignoring: %m", f
->path
);
223 static JournalFile
* find_journal(Server
*s
, uid_t uid
) {
224 _cleanup_free_
char *p
= NULL
;
231 /* We split up user logs only on /var, not on /run. If the
232 * runtime file is open, we write to it exclusively, in order
233 * to guarantee proper order as soon as we flush /run to
234 * /var and close the runtime file. */
236 if (s
->runtime_journal
)
237 return s
->runtime_journal
;
239 if (uid
<= SYSTEM_UID_MAX
)
240 return s
->system_journal
;
242 r
= sd_id128_get_machine(&machine
);
244 return s
->system_journal
;
246 f
= ordered_hashmap_get(s
->user_journals
, UID_TO_PTR(uid
));
250 if (asprintf(&p
, "/var/log/journal/" SD_ID128_FORMAT_STR
"/user-"UID_FMT
".journal",
251 SD_ID128_FORMAT_VAL(machine
), uid
) < 0)
252 return s
->system_journal
;
254 while (ordered_hashmap_size(s
->user_journals
) >= USER_JOURNALS_MAX
) {
255 /* Too many open? Then let's close one */
256 f
= ordered_hashmap_steal_first(s
->user_journals
);
258 journal_file_close(f
);
261 r
= journal_file_open_reliably(p
, O_RDWR
|O_CREAT
, 0640, s
->compress
, s
->seal
, &s
->system_metrics
, s
->mmap
, NULL
, &f
);
263 return s
->system_journal
;
265 server_add_acls(f
, uid
);
267 r
= ordered_hashmap_put(s
->user_journals
, UID_TO_PTR(uid
), f
);
269 journal_file_close(f
);
270 return s
->system_journal
;
276 static int do_rotate(
289 r
= journal_file_rotate(f
, s
->compress
, seal
);
292 log_error_errno(r
, "Failed to rotate %s: %m", (*f
)->path
);
294 log_error_errno(r
, "Failed to create new %s journal: %m", name
);
296 server_add_acls(*f
, uid
);
301 void server_rotate(Server
*s
) {
307 log_debug("Rotating...");
309 (void) do_rotate(s
, &s
->runtime_journal
, "runtime", false, 0);
310 (void) do_rotate(s
, &s
->system_journal
, "system", s
->seal
, 0);
312 ORDERED_HASHMAP_FOREACH_KEY(f
, k
, s
->user_journals
, i
) {
313 r
= do_rotate(s
, &f
, "user", s
->seal
, PTR_TO_UID(k
));
315 ordered_hashmap_replace(s
->user_journals
, k
, f
);
317 /* Old file has been closed and deallocated */
318 ordered_hashmap_remove(s
->user_journals
, k
);
322 void server_sync(Server
*s
) {
327 if (s
->system_journal
) {
328 r
= journal_file_set_offline(s
->system_journal
);
330 log_warning_errno(r
, "Failed to sync system journal, ignoring: %m");
333 ORDERED_HASHMAP_FOREACH(f
, s
->user_journals
, i
) {
334 r
= journal_file_set_offline(f
);
336 log_warning_errno(r
, "Failed to sync user journal, ignoring: %m");
339 if (s
->sync_event_source
) {
340 r
= sd_event_source_set_enabled(s
->sync_event_source
, SD_EVENT_OFF
);
342 log_error_errno(r
, "Failed to disable sync timer source: %m");
345 s
->sync_scheduled
= false;
348 static void do_vacuum(
351 JournalMetrics
*metrics
,
355 bool patch_min_use
) {
369 p
= strjoina(path
, SERVER_MACHINE_ID(s
));
371 limit
= metrics
->max_use
;
372 (void) determine_space_for(s
, metrics
, path
, name
, verbose
, patch_min_use
, NULL
, &limit
);
374 r
= journal_directory_vacuum(p
, limit
, metrics
->n_max_files
, s
->max_retention_usec
, &s
->oldest_file_usec
, verbose
);
375 if (r
< 0 && r
!= -ENOENT
)
376 log_warning_errno(r
, "Failed to vacuum %s, ignoring: %m", p
);
379 int server_vacuum(Server
*s
, bool verbose
, bool patch_min_use
) {
382 log_debug("Vacuuming...");
384 s
->oldest_file_usec
= 0;
386 do_vacuum(s
, s
->system_journal
, &s
->system_metrics
, "/var/log/journal/", "System journal", verbose
, patch_min_use
);
387 do_vacuum(s
, s
->runtime_journal
, &s
->runtime_metrics
, "/run/log/journal/", "Runtime journal", verbose
, patch_min_use
);
389 s
->cached_space_limit
= 0;
390 s
->cached_space_available
= 0;
391 s
->cached_space_timestamp
= 0;
396 static void server_cache_machine_id(Server
*s
) {
402 r
= sd_id128_get_machine(&id
);
406 sd_id128_to_string(id
, stpcpy(s
->machine_id_field
, "_MACHINE_ID="));
409 static void server_cache_boot_id(Server
*s
) {
415 r
= sd_id128_get_boot(&id
);
419 sd_id128_to_string(id
, stpcpy(s
->boot_id_field
, "_BOOT_ID="));
422 static void server_cache_hostname(Server
*s
) {
423 _cleanup_free_
char *t
= NULL
;
428 t
= gethostname_malloc();
432 x
= strappend("_HOSTNAME=", t
);
436 free(s
->hostname_field
);
437 s
->hostname_field
= x
;
440 static bool shall_try_append_again(JournalFile
*f
, int r
) {
442 /* -E2BIG Hit configured limit
444 -EDQUOT Quota limit hit
446 -EIO I/O error of some kind (mmap)
447 -EHOSTDOWN Other machine
448 -EBUSY Unclean shutdown
449 -EPROTONOSUPPORT Unsupported feature
452 -ESHUTDOWN Already archived
453 -EIDRM Journal file has been deleted */
455 if (r
== -E2BIG
|| r
== -EFBIG
|| r
== -EDQUOT
|| r
== -ENOSPC
)
456 log_debug("%s: Allocation limit reached, rotating.", f
->path
);
457 else if (r
== -EHOSTDOWN
)
458 log_info("%s: Journal file from other machine, rotating.", f
->path
);
459 else if (r
== -EBUSY
)
460 log_info("%s: Unclean shutdown, rotating.", f
->path
);
461 else if (r
== -EPROTONOSUPPORT
)
462 log_info("%s: Unsupported feature, rotating.", f
->path
);
463 else if (r
== -EBADMSG
|| r
== -ENODATA
|| r
== ESHUTDOWN
)
464 log_warning("%s: Journal file corrupted, rotating.", f
->path
);
466 log_warning("%s: IO error, rotating.", f
->path
);
467 else if (r
== -EIDRM
)
468 log_warning("%s: Journal file has been deleted, rotating.", f
->path
);
475 static void write_to_journal(Server
*s
, uid_t uid
, struct iovec
*iovec
, unsigned n
, int priority
) {
477 bool vacuumed
= false;
484 f
= find_journal(s
, uid
);
488 if (journal_file_rotate_suggested(f
, s
->max_file_usec
)) {
489 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f
->path
);
491 server_vacuum(s
, false, false);
494 f
= find_journal(s
, uid
);
499 r
= journal_file_append_entry(f
, NULL
, iovec
, n
, &s
->seqnum
, NULL
, NULL
);
501 server_schedule_sync(s
, priority
);
505 if (vacuumed
|| !shall_try_append_again(f
, r
)) {
506 log_error_errno(r
, "Failed to write entry (%d items, %zu bytes), ignoring: %m", n
, IOVEC_TOTAL_SIZE(iovec
, n
));
511 server_vacuum(s
, false, false);
513 f
= find_journal(s
, uid
);
517 log_debug("Retrying write.");
518 r
= journal_file_append_entry(f
, NULL
, iovec
, n
, &s
->seqnum
, NULL
, NULL
);
520 log_error_errno(r
, "Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %m", n
, IOVEC_TOTAL_SIZE(iovec
, n
));
522 server_schedule_sync(s
, priority
);
525 static void dispatch_message_real(
527 struct iovec
*iovec
, unsigned n
, unsigned m
,
528 const struct ucred
*ucred
,
529 const struct timeval
*tv
,
530 const char *label
, size_t label_len
,
535 char pid
[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t
)],
536 uid
[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t
)],
537 gid
[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t
)],
538 owner_uid
[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t
)],
539 source_time
[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t
)],
540 o_uid
[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t
)],
541 o_gid
[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t
)],
542 o_owner_uid
[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t
)];
548 uid_t realuid
= 0, owner
= 0, journal_uid
;
549 bool owner_valid
= false;
551 char audit_session
[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
552 audit_loginuid
[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t
)],
553 o_audit_session
[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
554 o_audit_loginuid
[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t
)];
563 assert(n
+ N_IOVEC_META_FIELDS
+ (object_pid
? N_IOVEC_OBJECT_FIELDS
: 0) <= m
);
566 realuid
= ucred
->uid
;
568 sprintf(pid
, "_PID="PID_FMT
, ucred
->pid
);
569 IOVEC_SET_STRING(iovec
[n
++], pid
);
571 sprintf(uid
, "_UID="UID_FMT
, ucred
->uid
);
572 IOVEC_SET_STRING(iovec
[n
++], uid
);
574 sprintf(gid
, "_GID="GID_FMT
, ucred
->gid
);
575 IOVEC_SET_STRING(iovec
[n
++], gid
);
577 r
= get_process_comm(ucred
->pid
, &t
);
579 x
= strjoina("_COMM=", t
);
581 IOVEC_SET_STRING(iovec
[n
++], x
);
584 r
= get_process_exe(ucred
->pid
, &t
);
586 x
= strjoina("_EXE=", t
);
588 IOVEC_SET_STRING(iovec
[n
++], x
);
591 r
= get_process_cmdline(ucred
->pid
, 0, false, &t
);
593 x
= strjoina("_CMDLINE=", t
);
595 IOVEC_SET_STRING(iovec
[n
++], x
);
598 r
= get_process_capeff(ucred
->pid
, &t
);
600 x
= strjoina("_CAP_EFFECTIVE=", t
);
602 IOVEC_SET_STRING(iovec
[n
++], x
);
606 r
= audit_session_from_pid(ucred
->pid
, &audit
);
608 sprintf(audit_session
, "_AUDIT_SESSION=%"PRIu32
, audit
);
609 IOVEC_SET_STRING(iovec
[n
++], audit_session
);
612 r
= audit_loginuid_from_pid(ucred
->pid
, &loginuid
);
614 sprintf(audit_loginuid
, "_AUDIT_LOGINUID="UID_FMT
, loginuid
);
615 IOVEC_SET_STRING(iovec
[n
++], audit_loginuid
);
619 r
= cg_pid_get_path_shifted(ucred
->pid
, s
->cgroup_root
, &c
);
621 char *session
= NULL
;
623 x
= strjoina("_SYSTEMD_CGROUP=", c
);
624 IOVEC_SET_STRING(iovec
[n
++], x
);
626 r
= cg_path_get_session(c
, &t
);
628 session
= strjoina("_SYSTEMD_SESSION=", t
);
630 IOVEC_SET_STRING(iovec
[n
++], session
);
633 if (cg_path_get_owner_uid(c
, &owner
) >= 0) {
636 sprintf(owner_uid
, "_SYSTEMD_OWNER_UID="UID_FMT
, owner
);
637 IOVEC_SET_STRING(iovec
[n
++], owner_uid
);
640 if (cg_path_get_unit(c
, &t
) >= 0) {
641 x
= strjoina("_SYSTEMD_UNIT=", t
);
643 IOVEC_SET_STRING(iovec
[n
++], x
);
644 } else if (unit_id
&& !session
) {
645 x
= strjoina("_SYSTEMD_UNIT=", unit_id
);
646 IOVEC_SET_STRING(iovec
[n
++], x
);
649 if (cg_path_get_user_unit(c
, &t
) >= 0) {
650 x
= strjoina("_SYSTEMD_USER_UNIT=", t
);
652 IOVEC_SET_STRING(iovec
[n
++], x
);
653 } else if (unit_id
&& session
) {
654 x
= strjoina("_SYSTEMD_USER_UNIT=", unit_id
);
655 IOVEC_SET_STRING(iovec
[n
++], x
);
658 if (cg_path_get_slice(c
, &t
) >= 0) {
659 x
= strjoina("_SYSTEMD_SLICE=", t
);
661 IOVEC_SET_STRING(iovec
[n
++], x
);
665 } else if (unit_id
) {
666 x
= strjoina("_SYSTEMD_UNIT=", unit_id
);
667 IOVEC_SET_STRING(iovec
[n
++], x
);
671 if (mac_selinux_have()) {
673 x
= alloca(strlen("_SELINUX_CONTEXT=") + label_len
+ 1);
675 *((char*) mempcpy(stpcpy(x
, "_SELINUX_CONTEXT="), label
, label_len
)) = 0;
676 IOVEC_SET_STRING(iovec
[n
++], x
);
678 security_context_t con
;
680 if (getpidcon(ucred
->pid
, &con
) >= 0) {
681 x
= strjoina("_SELINUX_CONTEXT=", con
);
684 IOVEC_SET_STRING(iovec
[n
++], x
);
693 r
= get_process_uid(object_pid
, &object_uid
);
695 sprintf(o_uid
, "OBJECT_UID="UID_FMT
, object_uid
);
696 IOVEC_SET_STRING(iovec
[n
++], o_uid
);
699 r
= get_process_gid(object_pid
, &object_gid
);
701 sprintf(o_gid
, "OBJECT_GID="GID_FMT
, object_gid
);
702 IOVEC_SET_STRING(iovec
[n
++], o_gid
);
705 r
= get_process_comm(object_pid
, &t
);
707 x
= strjoina("OBJECT_COMM=", t
);
709 IOVEC_SET_STRING(iovec
[n
++], x
);
712 r
= get_process_exe(object_pid
, &t
);
714 x
= strjoina("OBJECT_EXE=", t
);
716 IOVEC_SET_STRING(iovec
[n
++], x
);
719 r
= get_process_cmdline(object_pid
, 0, false, &t
);
721 x
= strjoina("OBJECT_CMDLINE=", t
);
723 IOVEC_SET_STRING(iovec
[n
++], x
);
727 r
= audit_session_from_pid(object_pid
, &audit
);
729 sprintf(o_audit_session
, "OBJECT_AUDIT_SESSION=%"PRIu32
, audit
);
730 IOVEC_SET_STRING(iovec
[n
++], o_audit_session
);
733 r
= audit_loginuid_from_pid(object_pid
, &loginuid
);
735 sprintf(o_audit_loginuid
, "OBJECT_AUDIT_LOGINUID="UID_FMT
, loginuid
);
736 IOVEC_SET_STRING(iovec
[n
++], o_audit_loginuid
);
740 r
= cg_pid_get_path_shifted(object_pid
, s
->cgroup_root
, &c
);
742 x
= strjoina("OBJECT_SYSTEMD_CGROUP=", c
);
743 IOVEC_SET_STRING(iovec
[n
++], x
);
745 r
= cg_path_get_session(c
, &t
);
747 x
= strjoina("OBJECT_SYSTEMD_SESSION=", t
);
749 IOVEC_SET_STRING(iovec
[n
++], x
);
752 if (cg_path_get_owner_uid(c
, &owner
) >= 0) {
753 sprintf(o_owner_uid
, "OBJECT_SYSTEMD_OWNER_UID="UID_FMT
, owner
);
754 IOVEC_SET_STRING(iovec
[n
++], o_owner_uid
);
757 if (cg_path_get_unit(c
, &t
) >= 0) {
758 x
= strjoina("OBJECT_SYSTEMD_UNIT=", t
);
760 IOVEC_SET_STRING(iovec
[n
++], x
);
763 if (cg_path_get_user_unit(c
, &t
) >= 0) {
764 x
= strjoina("OBJECT_SYSTEMD_USER_UNIT=", t
);
766 IOVEC_SET_STRING(iovec
[n
++], x
);
775 sprintf(source_time
, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv
));
776 IOVEC_SET_STRING(iovec
[n
++], source_time
);
779 /* Note that strictly speaking storing the boot id here is
780 * redundant since the entry includes this in-line
781 * anyway. However, we need this indexed, too. */
782 if (!isempty(s
->boot_id_field
))
783 IOVEC_SET_STRING(iovec
[n
++], s
->boot_id_field
);
785 if (!isempty(s
->machine_id_field
))
786 IOVEC_SET_STRING(iovec
[n
++], s
->machine_id_field
);
788 if (!isempty(s
->hostname_field
))
789 IOVEC_SET_STRING(iovec
[n
++], s
->hostname_field
);
793 if (s
->split_mode
== SPLIT_UID
&& realuid
> 0)
794 /* Split up strictly by any UID */
795 journal_uid
= realuid
;
796 else if (s
->split_mode
== SPLIT_LOGIN
&& realuid
> 0 && owner_valid
&& owner
> 0)
797 /* Split up by login UIDs. We do this only if the
798 * realuid is not root, in order not to accidentally
799 * leak privileged information to the user that is
800 * logged by a privileged process that is part of an
801 * unprivileged session. */
806 write_to_journal(s
, journal_uid
, iovec
, n
, priority
);
809 void server_driver_message(Server
*s
, sd_id128_t message_id
, const char *format
, ...) {
810 char mid
[11 + 32 + 1];
811 char buffer
[16 + LINE_MAX
+ 1];
812 struct iovec iovec
[N_IOVEC_META_FIELDS
+ 6];
815 struct ucred ucred
= {};
820 IOVEC_SET_STRING(iovec
[n
++], "SYSLOG_FACILITY=3");
821 IOVEC_SET_STRING(iovec
[n
++], "SYSLOG_IDENTIFIER=systemd-journald");
823 IOVEC_SET_STRING(iovec
[n
++], "PRIORITY=6");
824 IOVEC_SET_STRING(iovec
[n
++], "_TRANSPORT=driver");
826 memcpy(buffer
, "MESSAGE=", 8);
827 va_start(ap
, format
);
828 vsnprintf(buffer
+ 8, sizeof(buffer
) - 8, format
, ap
);
830 IOVEC_SET_STRING(iovec
[n
++], buffer
);
832 if (!sd_id128_equal(message_id
, SD_ID128_NULL
)) {
833 snprintf(mid
, sizeof(mid
), LOG_MESSAGE_ID(message_id
));
834 IOVEC_SET_STRING(iovec
[n
++], mid
);
837 ucred
.pid
= getpid();
838 ucred
.uid
= getuid();
839 ucred
.gid
= getgid();
841 dispatch_message_real(s
, iovec
, n
, ELEMENTSOF(iovec
), &ucred
, NULL
, NULL
, 0, NULL
, LOG_INFO
, 0);
844 void server_dispatch_message(
846 struct iovec
*iovec
, unsigned n
, unsigned m
,
847 const struct ucred
*ucred
,
848 const struct timeval
*tv
,
849 const char *label
, size_t label_len
,
855 _cleanup_free_
char *path
= NULL
;
856 uint64_t available
= 0;
860 assert(iovec
|| n
== 0);
865 if (LOG_PRI(priority
) > s
->max_level_store
)
868 /* Stop early in case the information will not be stored
870 if (s
->storage
== STORAGE_NONE
)
876 r
= cg_pid_get_path_shifted(ucred
->pid
, s
->cgroup_root
, &path
);
880 /* example: /user/lennart/3/foobar
881 * /system/dbus.service/foobar
883 * So let's cut of everything past the third /, since that is
884 * where user directories start */
886 c
= strchr(path
, '/');
888 c
= strchr(c
+1, '/');
890 c
= strchr(c
+1, '/');
896 (void) determine_space(s
, false, false, &available
, NULL
);
897 rl
= journal_rate_limit_test(s
->rate_limit
, path
, priority
& LOG_PRIMASK
, available
);
901 /* Write a suppression message if we suppressed something */
903 server_driver_message(s
, SD_MESSAGE_JOURNAL_DROPPED
,
904 "Suppressed %u messages from %s", rl
- 1, path
);
907 dispatch_message_real(s
, iovec
, n
, m
, ucred
, tv
, label
, label_len
, unit_id
, priority
, object_pid
);
911 static int system_journal_open(Server
*s
, bool flush_requested
) {
915 if (!s
->system_journal
&&
916 (s
->storage
== STORAGE_PERSISTENT
|| s
->storage
== STORAGE_AUTO
) &&
918 || access("/run/systemd/journal/flushed", F_OK
) >= 0)) {
920 /* If in auto mode: first try to create the machine
921 * path, but not the prefix.
923 * If in persistent mode: create /var/log/journal and
924 * the machine path */
926 if (s
->storage
== STORAGE_PERSISTENT
)
927 (void) mkdir_p("/var/log/journal/", 0755);
929 fn
= strjoina("/var/log/journal/", SERVER_MACHINE_ID(s
));
930 (void) mkdir(fn
, 0755);
932 fn
= strjoina(fn
, "/system.journal");
933 r
= journal_file_open_reliably(fn
, O_RDWR
|O_CREAT
, 0640, s
->compress
, s
->seal
, &s
->system_metrics
, s
->mmap
, NULL
, &s
->system_journal
);
935 server_add_acls(s
->system_journal
, 0);
936 (void) determine_space_for(s
, &s
->system_metrics
, "/var/log/journal/", "System journal", true, true, NULL
, NULL
);
938 if (r
!= -ENOENT
&& r
!= -EROFS
)
939 log_warning_errno(r
, "Failed to open system journal: %m");
945 if (!s
->runtime_journal
&&
946 (s
->storage
!= STORAGE_NONE
)) {
948 fn
= strjoina("/run/log/journal/", SERVER_MACHINE_ID(s
), "/system.journal");
950 if (s
->system_journal
) {
952 /* Try to open the runtime journal, but only
953 * if it already exists, so that we can flush
954 * it into the system journal */
956 r
= journal_file_open(fn
, O_RDWR
, 0640, s
->compress
, false, &s
->runtime_metrics
, s
->mmap
, NULL
, &s
->runtime_journal
);
959 log_warning_errno(r
, "Failed to open runtime journal: %m");
966 /* OK, we really need the runtime journal, so create
967 * it if necessary. */
969 (void) mkdir("/run/log", 0755);
970 (void) mkdir("/run/log/journal", 0755);
971 (void) mkdir_parents(fn
, 0750);
973 r
= journal_file_open_reliably(fn
, O_RDWR
|O_CREAT
, 0640, s
->compress
, false, &s
->runtime_metrics
, s
->mmap
, NULL
, &s
->runtime_journal
);
975 return log_error_errno(r
, "Failed to open runtime journal: %m");
978 if (s
->runtime_journal
) {
979 server_add_acls(s
->runtime_journal
, 0);
980 (void) determine_space_for(s
, &s
->runtime_metrics
, "/run/log/journal/", "Runtime journal", true, true, NULL
, NULL
);
987 int server_flush_to_var(Server
*s
) {
989 sd_journal
*j
= NULL
;
990 char ts
[FORMAT_TIMESPAN_MAX
];
997 if (s
->storage
!= STORAGE_AUTO
&&
998 s
->storage
!= STORAGE_PERSISTENT
)
1001 if (!s
->runtime_journal
)
1004 (void) system_journal_open(s
, true);
1006 if (!s
->system_journal
)
1009 log_debug("Flushing to /var...");
1011 start
= now(CLOCK_MONOTONIC
);
1013 r
= sd_id128_get_machine(&machine
);
1017 r
= sd_journal_open(&j
, SD_JOURNAL_RUNTIME_ONLY
);
1019 return log_error_errno(r
, "Failed to read runtime journal: %m");
1021 sd_journal_set_data_threshold(j
, 0);
1023 SD_JOURNAL_FOREACH(j
) {
1027 f
= j
->current_file
;
1028 assert(f
&& f
->current_offset
> 0);
1032 r
= journal_file_move_to_object(f
, OBJECT_ENTRY
, f
->current_offset
, &o
);
1034 log_error_errno(r
, "Can't read entry: %m");
1038 r
= journal_file_copy_entry(f
, s
->system_journal
, o
, f
->current_offset
, NULL
, NULL
, NULL
);
1042 if (!shall_try_append_again(s
->system_journal
, r
)) {
1043 log_error_errno(r
, "Can't write entry: %m");
1048 server_vacuum(s
, false, false);
1050 if (!s
->system_journal
) {
1051 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1056 log_debug("Retrying write.");
1057 r
= journal_file_copy_entry(f
, s
->system_journal
, o
, f
->current_offset
, NULL
, NULL
, NULL
);
1059 log_error_errno(r
, "Can't write entry: %m");
1067 journal_file_post_change(s
->system_journal
);
1069 s
->runtime_journal
= journal_file_close(s
->runtime_journal
);
1072 (void) rm_rf("/run/log/journal", REMOVE_ROOT
);
1074 sd_journal_close(j
);
1076 server_driver_message(s
, SD_ID128_NULL
, "Time spent on flushing to /var is %s for %u entries.", format_timespan(ts
, sizeof(ts
), now(CLOCK_MONOTONIC
) - start
, 0), n
);
1081 int server_process_datagram(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1082 Server
*s
= userdata
;
1083 struct ucred
*ucred
= NULL
;
1084 struct timeval
*tv
= NULL
;
1085 struct cmsghdr
*cmsg
;
1087 size_t label_len
= 0, m
;
1090 int *fds
= NULL
, v
= 0;
1094 struct cmsghdr cmsghdr
;
1096 /* We use NAME_MAX space for the SELinux label
1097 * here. The kernel currently enforces no
1098 * limit, but according to suggestions from
1099 * the SELinux people this will change and it
1100 * will probably be identical to NAME_MAX. For
1101 * now we use that, but this should be updated
1102 * one day when the final limit is known. */
1103 uint8_t buf
[CMSG_SPACE(sizeof(struct ucred
)) +
1104 CMSG_SPACE(sizeof(struct timeval
)) +
1105 CMSG_SPACE(sizeof(int)) + /* fd */
1106 CMSG_SPACE(NAME_MAX
)]; /* selinux label */
1109 union sockaddr_union sa
= {};
1111 struct msghdr msghdr
= {
1114 .msg_control
= &control
,
1115 .msg_controllen
= sizeof(control
),
1117 .msg_namelen
= sizeof(sa
),
1121 assert(fd
== s
->native_fd
|| fd
== s
->syslog_fd
|| fd
== s
->audit_fd
);
1123 if (revents
!= EPOLLIN
) {
1124 log_error("Got invalid event from epoll for datagram fd: %"PRIx32
, revents
);
1128 /* Try to get the right size, if we can. (Not all
1129 * sockets support SIOCINQ, hence we just try, but
1130 * don't rely on it. */
1131 (void) ioctl(fd
, SIOCINQ
, &v
);
1133 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1134 m
= PAGE_ALIGN(MAX3((size_t) v
+ 1,
1136 ALIGN(sizeof(struct nlmsghdr
)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH
)) + 1);
1138 if (!GREEDY_REALLOC(s
->buffer
, s
->buffer_size
, m
))
1141 iovec
.iov_base
= s
->buffer
;
1142 iovec
.iov_len
= s
->buffer_size
- 1; /* Leave room for trailing NUL we add later */
1144 n
= recvmsg(fd
, &msghdr
, MSG_DONTWAIT
|MSG_CMSG_CLOEXEC
);
1146 if (errno
== EINTR
|| errno
== EAGAIN
)
1149 return log_error_errno(errno
, "recvmsg() failed: %m");
1152 CMSG_FOREACH(cmsg
, &msghdr
) {
1154 if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1155 cmsg
->cmsg_type
== SCM_CREDENTIALS
&&
1156 cmsg
->cmsg_len
== CMSG_LEN(sizeof(struct ucred
)))
1157 ucred
= (struct ucred
*) CMSG_DATA(cmsg
);
1158 else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1159 cmsg
->cmsg_type
== SCM_SECURITY
) {
1160 label
= (char*) CMSG_DATA(cmsg
);
1161 label_len
= cmsg
->cmsg_len
- CMSG_LEN(0);
1162 } else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1163 cmsg
->cmsg_type
== SO_TIMESTAMP
&&
1164 cmsg
->cmsg_len
== CMSG_LEN(sizeof(struct timeval
)))
1165 tv
= (struct timeval
*) CMSG_DATA(cmsg
);
1166 else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1167 cmsg
->cmsg_type
== SCM_RIGHTS
) {
1168 fds
= (int*) CMSG_DATA(cmsg
);
1169 n_fds
= (cmsg
->cmsg_len
- CMSG_LEN(0)) / sizeof(int);
1173 /* And a trailing NUL, just in case */
1176 if (fd
== s
->syslog_fd
) {
1177 if (n
> 0 && n_fds
== 0)
1178 server_process_syslog_message(s
, strstrip(s
->buffer
), ucred
, tv
, label
, label_len
);
1180 log_warning("Got file descriptors via syslog socket. Ignoring.");
1182 } else if (fd
== s
->native_fd
) {
1183 if (n
> 0 && n_fds
== 0)
1184 server_process_native_message(s
, s
->buffer
, n
, ucred
, tv
, label
, label_len
);
1185 else if (n
== 0 && n_fds
== 1)
1186 server_process_native_file(s
, fds
[0], ucred
, tv
, label
, label_len
);
1188 log_warning("Got too many file descriptors via native socket. Ignoring.");
1191 assert(fd
== s
->audit_fd
);
1193 if (n
> 0 && n_fds
== 0)
1194 server_process_audit_message(s
, s
->buffer
, n
, ucred
, &sa
, msghdr
.msg_namelen
);
1196 log_warning("Got file descriptors via audit socket. Ignoring.");
1199 close_many(fds
, n_fds
);
1203 static int dispatch_sigusr1(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1204 Server
*s
= userdata
;
1209 log_info("Received request to flush runtime journal from PID " PID_FMT
, si
->ssi_pid
);
1211 server_flush_to_var(s
);
1213 server_vacuum(s
, false, false);
1215 r
= touch("/run/systemd/journal/flushed");
1217 log_warning_errno(r
, "Failed to touch /run/systemd/journal/flushed, ignoring: %m");
1222 static int dispatch_sigusr2(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1223 Server
*s
= userdata
;
1228 log_info("Received request to rotate journal from PID " PID_FMT
, si
->ssi_pid
);
1230 server_vacuum(s
, true, true);
1232 /* Let clients know when the most recent rotation happened. */
1233 r
= write_timestamp_file_atomic("/run/systemd/journal/rotated", now(CLOCK_MONOTONIC
));
1235 log_warning_errno(r
, "Failed to write /run/systemd/journal/rotated, ignoring: %m");
1240 static int dispatch_sigterm(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1241 Server
*s
= userdata
;
1245 log_received_signal(LOG_INFO
, si
);
1247 sd_event_exit(s
->event
, 0);
1251 static int dispatch_sigrtmin1(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1252 Server
*s
= userdata
;
1257 log_debug("Received request to sync from PID " PID_FMT
, si
->ssi_pid
);
1261 /* Let clients know when the most recent sync happened. */
1262 r
= write_timestamp_file_atomic("/run/systemd/journal/synced", now(CLOCK_MONOTONIC
));
1264 log_warning_errno(r
, "Failed to write /run/systemd/journal/synced, ignoring: %m");
1269 static int setup_signals(Server
*s
) {
1274 assert(sigprocmask_many(SIG_SETMASK
, NULL
, SIGINT
, SIGTERM
, SIGUSR1
, SIGUSR2
, SIGRTMIN
+1, -1) >= 0);
1276 r
= sd_event_add_signal(s
->event
, &s
->sigusr1_event_source
, SIGUSR1
, dispatch_sigusr1
, s
);
1280 r
= sd_event_add_signal(s
->event
, &s
->sigusr2_event_source
, SIGUSR2
, dispatch_sigusr2
, s
);
1284 r
= sd_event_add_signal(s
->event
, &s
->sigterm_event_source
, SIGTERM
, dispatch_sigterm
, s
);
1288 /* Let's process SIGTERM late, so that we flush all queued
1289 * messages to disk before we exit */
1290 r
= sd_event_source_set_priority(s
->sigterm_event_source
, SD_EVENT_PRIORITY_NORMAL
+20);
1294 /* When journald is invoked on the terminal (when debugging),
1295 * it's useful if C-c is handled equivalent to SIGTERM. */
1296 r
= sd_event_add_signal(s
->event
, &s
->sigint_event_source
, SIGINT
, dispatch_sigterm
, s
);
1300 r
= sd_event_source_set_priority(s
->sigint_event_source
, SD_EVENT_PRIORITY_NORMAL
+20);
1304 /* SIGRTMIN+1 causes an immediate sync. We process this very
1305 * late, so that everything else queued at this point is
1306 * really written to disk. Clients can watch
1307 * /run/systemd/journal/synced with inotify until its mtime
1308 * changes to see when a sync happened. */
1309 r
= sd_event_add_signal(s
->event
, &s
->sigrtmin1_event_source
, SIGRTMIN
+1, dispatch_sigrtmin1
, s
);
1313 r
= sd_event_source_set_priority(s
->sigrtmin1_event_source
, SD_EVENT_PRIORITY_NORMAL
+15);
1320 static int server_parse_proc_cmdline(Server
*s
) {
1321 _cleanup_free_
char *line
= NULL
;
1325 r
= proc_cmdline(&line
);
1327 log_warning_errno(r
, "Failed to read /proc/cmdline, ignoring: %m");
1333 _cleanup_free_
char *word
= NULL
;
1335 r
= extract_first_word(&p
, &word
, NULL
, 0);
1337 return log_error_errno(r
, "Failed to parse journald syntax \"%s\": %m", line
);
1342 if (startswith(word
, "systemd.journald.forward_to_syslog=")) {
1343 r
= parse_boolean(word
+ 35);
1345 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word
+ 35);
1347 s
->forward_to_syslog
= r
;
1348 } else if (startswith(word
, "systemd.journald.forward_to_kmsg=")) {
1349 r
= parse_boolean(word
+ 33);
1351 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word
+ 33);
1353 s
->forward_to_kmsg
= r
;
1354 } else if (startswith(word
, "systemd.journald.forward_to_console=")) {
1355 r
= parse_boolean(word
+ 36);
1357 log_warning("Failed to parse forward to console switch %s. Ignoring.", word
+ 36);
1359 s
->forward_to_console
= r
;
1360 } else if (startswith(word
, "systemd.journald.forward_to_wall=")) {
1361 r
= parse_boolean(word
+ 33);
1363 log_warning("Failed to parse forward to wall switch %s. Ignoring.", word
+ 33);
1365 s
->forward_to_wall
= r
;
1366 } else if (startswith(word
, "systemd.journald"))
1367 log_warning("Invalid systemd.journald parameter. Ignoring.");
1370 /* do not warn about state here, since probably systemd already did */
1374 static int server_parse_config_file(Server
*s
) {
1377 return config_parse_many(PKGSYSCONFDIR
"/journald.conf",
1378 CONF_PATHS_NULSTR("systemd/journald.conf.d"),
1380 config_item_perf_lookup
, journald_gperf_lookup
,
1384 static int server_dispatch_sync(sd_event_source
*es
, usec_t t
, void *userdata
) {
1385 Server
*s
= userdata
;
1393 int server_schedule_sync(Server
*s
, int priority
) {
1398 if (priority
<= LOG_CRIT
) {
1399 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1404 if (s
->sync_scheduled
)
1407 if (s
->sync_interval_usec
> 0) {
1410 r
= sd_event_now(s
->event
, CLOCK_MONOTONIC
, &when
);
1414 when
+= s
->sync_interval_usec
;
1416 if (!s
->sync_event_source
) {
1417 r
= sd_event_add_time(
1419 &s
->sync_event_source
,
1422 server_dispatch_sync
, s
);
1426 r
= sd_event_source_set_priority(s
->sync_event_source
, SD_EVENT_PRIORITY_IMPORTANT
);
1428 r
= sd_event_source_set_time(s
->sync_event_source
, when
);
1432 r
= sd_event_source_set_enabled(s
->sync_event_source
, SD_EVENT_ONESHOT
);
1437 s
->sync_scheduled
= true;
1443 static int dispatch_hostname_change(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1444 Server
*s
= userdata
;
1448 server_cache_hostname(s
);
1452 static int server_open_hostname(Server
*s
) {
1457 s
->hostname_fd
= open("/proc/sys/kernel/hostname", O_RDONLY
|O_CLOEXEC
|O_NDELAY
|O_NOCTTY
);
1458 if (s
->hostname_fd
< 0)
1459 return log_error_errno(errno
, "Failed to open /proc/sys/kernel/hostname: %m");
1461 r
= sd_event_add_io(s
->event
, &s
->hostname_event_source
, s
->hostname_fd
, 0, dispatch_hostname_change
, s
);
1463 /* kernels prior to 3.2 don't support polling this file. Ignore
1466 log_warning_errno(r
, "Failed to register hostname fd in event loop, ignoring: %m");
1467 s
->hostname_fd
= safe_close(s
->hostname_fd
);
1471 return log_error_errno(r
, "Failed to register hostname fd in event loop: %m");
1474 r
= sd_event_source_set_priority(s
->hostname_event_source
, SD_EVENT_PRIORITY_IMPORTANT
-10);
1476 return log_error_errno(r
, "Failed to adjust priority of host name event source: %m");
1481 static int dispatch_notify_event(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1482 Server
*s
= userdata
;
1486 assert(s
->notify_event_source
== es
);
1487 assert(s
->notify_fd
== fd
);
1489 /* The $NOTIFY_SOCKET is writable again, now send exactly one
1490 * message on it. Either it's the wtachdog event, the initial
1491 * READY=1 event or an stdout stream event. If there's nothing
1492 * to write anymore, turn our event source off. The next time
1493 * there's something to send it will be turned on again. */
1495 if (!s
->sent_notify_ready
) {
1496 static const char p
[] =
1498 "STATUS=Processing requests...";
1501 l
= send(s
->notify_fd
, p
, strlen(p
), MSG_DONTWAIT
);
1503 if (errno
== EAGAIN
)
1506 return log_error_errno(errno
, "Failed to send READY=1 notification message: %m");
1509 s
->sent_notify_ready
= true;
1510 log_debug("Sent READY=1 notification.");
1512 } else if (s
->send_watchdog
) {
1514 static const char p
[] =
1519 l
= send(s
->notify_fd
, p
, strlen(p
), MSG_DONTWAIT
);
1521 if (errno
== EAGAIN
)
1524 return log_error_errno(errno
, "Failed to send WATCHDOG=1 notification message: %m");
1527 s
->send_watchdog
= false;
1528 log_debug("Sent WATCHDOG=1 notification.");
1530 } else if (s
->stdout_streams_notify_queue
)
1531 /* Dispatch one stream notification event */
1532 stdout_stream_send_notify(s
->stdout_streams_notify_queue
);
1534 /* Leave us enabled if there's still more to to do. */
1535 if (s
->send_watchdog
|| s
->stdout_streams_notify_queue
)
1538 /* There was nothing to do anymore, let's turn ourselves off. */
1539 r
= sd_event_source_set_enabled(es
, SD_EVENT_OFF
);
1541 return log_error_errno(r
, "Failed to turn off notify event source: %m");
1546 static int dispatch_watchdog(sd_event_source
*es
, uint64_t usec
, void *userdata
) {
1547 Server
*s
= userdata
;
1552 s
->send_watchdog
= true;
1554 r
= sd_event_source_set_enabled(s
->notify_event_source
, SD_EVENT_ON
);
1556 log_warning_errno(r
, "Failed to turn on notify event source: %m");
1558 r
= sd_event_source_set_time(s
->watchdog_event_source
, usec
+ s
->watchdog_usec
/ 2);
1560 return log_error_errno(r
, "Failed to restart watchdog event source: %m");
1562 r
= sd_event_source_set_enabled(s
->watchdog_event_source
, SD_EVENT_ON
);
1564 return log_error_errno(r
, "Failed to enable watchdog event source: %m");
1569 static int server_connect_notify(Server
*s
) {
1570 union sockaddr_union sa
= {
1571 .un
.sun_family
= AF_UNIX
,
1577 assert(s
->notify_fd
< 0);
1578 assert(!s
->notify_event_source
);
1581 So here's the problem: we'd like to send notification
1582 messages to PID 1, but we cannot do that via sd_notify(),
1583 since that's synchronous, and we might end up blocking on
1584 it. Specifically: given that PID 1 might block on
1585 dbus-daemon during IPC, and dbus-daemon is logging to us,
1586 and might hence block on us, we might end up in a deadlock
1587 if we block on sending PID 1 notification messages -- by
1588 generating a full blocking circle. To avoid this, let's
1589 create a non-blocking socket, and connect it to the
1590 notification socket, and then wait for POLLOUT before we
1591 send anything. This should efficiently avoid any deadlocks,
1592 as we'll never block on PID 1, hence PID 1 can safely block
1593 on dbus-daemon which can safely block on us again.
1595 Don't think that this issue is real? It is, see:
1596 https://github.com/systemd/systemd/issues/1505
1599 e
= getenv("NOTIFY_SOCKET");
1603 if ((e
[0] != '@' && e
[0] != '/') || e
[1] == 0) {
1604 log_error("NOTIFY_SOCKET set to an invalid value: %s", e
);
1608 if (strlen(e
) > sizeof(sa
.un
.sun_path
)) {
1609 log_error("NOTIFY_SOCKET path too long: %s", e
);
1613 s
->notify_fd
= socket(AF_UNIX
, SOCK_DGRAM
|SOCK_CLOEXEC
|SOCK_NONBLOCK
, 0);
1614 if (s
->notify_fd
< 0)
1615 return log_error_errno(errno
, "Failed to create notify socket: %m");
1617 (void) fd_inc_sndbuf(s
->notify_fd
, NOTIFY_SNDBUF_SIZE
);
1619 strncpy(sa
.un
.sun_path
, e
, sizeof(sa
.un
.sun_path
));
1620 if (sa
.un
.sun_path
[0] == '@')
1621 sa
.un
.sun_path
[0] = 0;
1623 r
= connect(s
->notify_fd
, &sa
.sa
, offsetof(struct sockaddr_un
, sun_path
) + strlen(e
));
1625 return log_error_errno(errno
, "Failed to connect to notify socket: %m");
1627 r
= sd_event_add_io(s
->event
, &s
->notify_event_source
, s
->notify_fd
, EPOLLOUT
, dispatch_notify_event
, s
);
1629 return log_error_errno(r
, "Failed to watch notification socket: %m");
1631 if (sd_watchdog_enabled(false, &s
->watchdog_usec
) > 0) {
1632 s
->send_watchdog
= true;
1634 r
= sd_event_add_time(s
->event
, &s
->watchdog_event_source
, CLOCK_MONOTONIC
, now(CLOCK_MONOTONIC
) + s
->watchdog_usec
/2, s
->watchdog_usec
/4, dispatch_watchdog
, s
);
1636 return log_error_errno(r
, "Failed to add watchdog time event: %m");
1639 /* This should fire pretty soon, which we'll use to send the
1645 int server_init(Server
*s
) {
1646 _cleanup_fdset_free_ FDSet
*fds
= NULL
;
1653 s
->syslog_fd
= s
->native_fd
= s
->stdout_fd
= s
->dev_kmsg_fd
= s
->audit_fd
= s
->hostname_fd
= s
->notify_fd
= -1;
1657 s
->watchdog_usec
= USEC_INFINITY
;
1659 s
->sync_interval_usec
= DEFAULT_SYNC_INTERVAL_USEC
;
1660 s
->sync_scheduled
= false;
1662 s
->rate_limit_interval
= DEFAULT_RATE_LIMIT_INTERVAL
;
1663 s
->rate_limit_burst
= DEFAULT_RATE_LIMIT_BURST
;
1665 s
->forward_to_wall
= true;
1667 s
->max_file_usec
= DEFAULT_MAX_FILE_USEC
;
1669 s
->max_level_store
= LOG_DEBUG
;
1670 s
->max_level_syslog
= LOG_DEBUG
;
1671 s
->max_level_kmsg
= LOG_NOTICE
;
1672 s
->max_level_console
= LOG_INFO
;
1673 s
->max_level_wall
= LOG_EMERG
;
1675 journal_reset_metrics(&s
->system_metrics
);
1676 journal_reset_metrics(&s
->runtime_metrics
);
1678 server_parse_config_file(s
);
1679 server_parse_proc_cmdline(s
);
1681 if (!!s
->rate_limit_interval
^ !!s
->rate_limit_burst
) {
1682 log_debug("Setting both rate limit interval and burst from "USEC_FMT
",%u to 0,0",
1683 s
->rate_limit_interval
, s
->rate_limit_burst
);
1684 s
->rate_limit_interval
= s
->rate_limit_burst
= 0;
1687 (void) mkdir_p("/run/systemd/journal", 0755);
1689 s
->user_journals
= ordered_hashmap_new(NULL
);
1690 if (!s
->user_journals
)
1693 s
->mmap
= mmap_cache_new();
1697 r
= sd_event_default(&s
->event
);
1699 return log_error_errno(r
, "Failed to create event loop: %m");
1701 n
= sd_listen_fds(true);
1703 return log_error_errno(n
, "Failed to read listening file descriptors from environment: %m");
1705 for (fd
= SD_LISTEN_FDS_START
; fd
< SD_LISTEN_FDS_START
+ n
; fd
++) {
1707 if (sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/run/systemd/journal/socket", 0) > 0) {
1709 if (s
->native_fd
>= 0) {
1710 log_error("Too many native sockets passed.");
1716 } else if (sd_is_socket_unix(fd
, SOCK_STREAM
, 1, "/run/systemd/journal/stdout", 0) > 0) {
1718 if (s
->stdout_fd
>= 0) {
1719 log_error("Too many stdout sockets passed.");
1725 } else if (sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/dev/log", 0) > 0 ||
1726 sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/run/systemd/journal/dev-log", 0) > 0) {
1728 if (s
->syslog_fd
>= 0) {
1729 log_error("Too many /dev/log sockets passed.");
1735 } else if (sd_is_socket(fd
, AF_NETLINK
, SOCK_RAW
, -1) > 0) {
1737 if (s
->audit_fd
>= 0) {
1738 log_error("Too many audit sockets passed.");
1752 r
= fdset_put(fds
, fd
);
1758 /* Try to restore streams, but don't bother if this fails */
1759 (void) server_restore_streams(s
, fds
);
1761 if (fdset_size(fds
) > 0) {
1762 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds
));
1763 fds
= fdset_free(fds
);
1766 no_sockets
= s
->native_fd
< 0 && s
->stdout_fd
< 0 && s
->syslog_fd
< 0 && s
->audit_fd
< 0;
1768 /* always open stdout, syslog, native, and kmsg sockets */
1770 /* systemd-journald.socket: /run/systemd/journal/stdout */
1771 r
= server_open_stdout_socket(s
);
1775 /* systemd-journald-dev-log.socket: /run/systemd/journal/dev-log */
1776 r
= server_open_syslog_socket(s
);
1780 /* systemd-journald.socket: /run/systemd/journal/socket */
1781 r
= server_open_native_socket(s
);
1786 r
= server_open_dev_kmsg(s
);
1790 /* Unless we got *some* sockets and not audit, open audit socket */
1791 if (s
->audit_fd
>= 0 || no_sockets
) {
1792 r
= server_open_audit(s
);
1797 r
= server_open_kernel_seqnum(s
);
1801 r
= server_open_hostname(s
);
1805 r
= setup_signals(s
);
1809 s
->udev
= udev_new();
1813 s
->rate_limit
= journal_rate_limit_new(s
->rate_limit_interval
, s
->rate_limit_burst
);
1817 r
= cg_get_root_path(&s
->cgroup_root
);
1821 server_cache_hostname(s
);
1822 server_cache_boot_id(s
);
1823 server_cache_machine_id(s
);
1825 (void) server_connect_notify(s
);
1827 return system_journal_open(s
, false);
1830 void server_maybe_append_tags(Server
*s
) {
1836 n
= now(CLOCK_REALTIME
);
1838 if (s
->system_journal
)
1839 journal_file_maybe_append_tag(s
->system_journal
, n
);
1841 ORDERED_HASHMAP_FOREACH(f
, s
->user_journals
, i
)
1842 journal_file_maybe_append_tag(f
, n
);
1846 void server_done(Server
*s
) {
1850 while (s
->stdout_streams
)
1851 stdout_stream_free(s
->stdout_streams
);
1853 if (s
->system_journal
)
1854 journal_file_close(s
->system_journal
);
1856 if (s
->runtime_journal
)
1857 journal_file_close(s
->runtime_journal
);
1859 while ((f
= ordered_hashmap_steal_first(s
->user_journals
)))
1860 journal_file_close(f
);
1862 ordered_hashmap_free(s
->user_journals
);
1864 sd_event_source_unref(s
->syslog_event_source
);
1865 sd_event_source_unref(s
->native_event_source
);
1866 sd_event_source_unref(s
->stdout_event_source
);
1867 sd_event_source_unref(s
->dev_kmsg_event_source
);
1868 sd_event_source_unref(s
->audit_event_source
);
1869 sd_event_source_unref(s
->sync_event_source
);
1870 sd_event_source_unref(s
->sigusr1_event_source
);
1871 sd_event_source_unref(s
->sigusr2_event_source
);
1872 sd_event_source_unref(s
->sigterm_event_source
);
1873 sd_event_source_unref(s
->sigint_event_source
);
1874 sd_event_source_unref(s
->sigrtmin1_event_source
);
1875 sd_event_source_unref(s
->hostname_event_source
);
1876 sd_event_source_unref(s
->notify_event_source
);
1877 sd_event_source_unref(s
->watchdog_event_source
);
1878 sd_event_unref(s
->event
);
1880 safe_close(s
->syslog_fd
);
1881 safe_close(s
->native_fd
);
1882 safe_close(s
->stdout_fd
);
1883 safe_close(s
->dev_kmsg_fd
);
1884 safe_close(s
->audit_fd
);
1885 safe_close(s
->hostname_fd
);
1886 safe_close(s
->notify_fd
);
1889 journal_rate_limit_free(s
->rate_limit
);
1891 if (s
->kernel_seqnum
)
1892 munmap(s
->kernel_seqnum
, sizeof(uint64_t));
1896 free(s
->cgroup_root
);
1897 free(s
->hostname_field
);
1900 mmap_cache_unref(s
->mmap
);
1902 udev_unref(s
->udev
);
1905 static const char* const storage_table
[_STORAGE_MAX
] = {
1906 [STORAGE_AUTO
] = "auto",
1907 [STORAGE_VOLATILE
] = "volatile",
1908 [STORAGE_PERSISTENT
] = "persistent",
1909 [STORAGE_NONE
] = "none"
1912 DEFINE_STRING_TABLE_LOOKUP(storage
, Storage
);
1913 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage
, storage
, Storage
, "Failed to parse storage setting");
1915 static const char* const split_mode_table
[_SPLIT_MAX
] = {
1916 [SPLIT_LOGIN
] = "login",
1917 [SPLIT_UID
] = "uid",
1918 [SPLIT_NONE
] = "none",
1921 DEFINE_STRING_TABLE_LOOKUP(split_mode
, SplitMode
);
1922 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode
, split_mode
, SplitMode
, "Failed to parse split mode setting");