1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2011 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
23 #include <selinux/selinux.h>
25 #include <sys/ioctl.h>
27 #include <sys/signalfd.h>
28 #include <sys/statvfs.h>
29 #include <linux/sockios.h>
32 #include "sd-daemon.h"
33 #include "sd-journal.h"
34 #include "sd-messages.h"
37 #include "alloc-util.h"
38 #include "audit-util.h"
39 #include "cgroup-util.h"
40 #include "conf-parser.h"
41 #include "dirent-util.h"
42 #include "extract-word.h"
45 #include "formats-util.h"
48 #include "hostname-util.h"
50 #include "journal-authenticate.h"
51 #include "journal-file.h"
52 #include "journal-internal.h"
53 #include "journal-vacuum.h"
54 #include "journald-audit.h"
55 #include "journald-kmsg.h"
56 #include "journald-native.h"
57 #include "journald-rate-limit.h"
58 #include "journald-server.h"
59 #include "journald-stream.h"
60 #include "journald-syslog.h"
63 #include "parse-util.h"
64 #include "proc-cmdline.h"
65 #include "process-util.h"
67 #include "selinux-util.h"
68 #include "signal-util.h"
69 #include "socket-util.h"
70 #include "string-table.h"
71 #include "string-util.h"
72 #include "user-util.h"
74 #define USER_JOURNALS_MAX 1024
76 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
77 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
78 #define DEFAULT_RATE_LIMIT_BURST 1000
79 #define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
81 #define RECHECK_SPACE_USEC (30*USEC_PER_SEC)
83 #define NOTIFY_SNDBUF_SIZE (8*1024*1024)
85 static int determine_space_for(
87 JournalMetrics
*metrics
,
95 uint64_t sum
= 0, ss_avail
, avail
;
96 _cleanup_closedir_
DIR *d
= NULL
;
107 ts
= now(CLOCK_MONOTONIC
);
109 if (!verbose
&& s
->cached_space_timestamp
+ RECHECK_SPACE_USEC
> ts
) {
112 *available
= s
->cached_space_available
;
114 *limit
= s
->cached_space_limit
;
119 p
= strjoina(path
, SERVER_MACHINE_ID(s
));
122 return log_full_errno(errno
== ENOENT
? LOG_DEBUG
: LOG_ERR
, errno
, "Failed to open %s: %m", p
);
124 if (fstatvfs(dirfd(d
), &ss
) < 0)
125 return log_error_errno(errno
, "Failed to fstatvfs(%s): %m", p
);
127 FOREACH_DIRENT_ALL(de
, d
, break) {
130 if (!endswith(de
->d_name
, ".journal") &&
131 !endswith(de
->d_name
, ".journal~"))
134 if (fstatat(dirfd(d
), de
->d_name
, &st
, AT_SYMLINK_NOFOLLOW
) < 0) {
135 log_debug_errno(errno
, "Failed to stat %s/%s, ignoring: %m", p
, de
->d_name
);
139 if (!S_ISREG(st
.st_mode
))
142 sum
+= (uint64_t) st
.st_blocks
* 512UL;
145 /* If request, then let's bump the min_use limit to the
146 * current usage on disk. We do this when starting up and
147 * first opening the journal files. This way sudden spikes in
148 * disk usage will not cause journald to vacuum files without
149 * bounds. Note that this means that only a restart of
150 * journald will make it reset this value. */
153 metrics
->min_use
= MAX(metrics
->min_use
, sum
);
155 ss_avail
= ss
.f_bsize
* ss
.f_bavail
;
156 avail
= LESS_BY(ss_avail
, metrics
->keep_free
);
158 s
->cached_space_limit
= MIN(MAX(sum
+ avail
, metrics
->min_use
), metrics
->max_use
);
159 s
->cached_space_available
= LESS_BY(s
->cached_space_limit
, sum
);
160 s
->cached_space_timestamp
= ts
;
163 char fb1
[FORMAT_BYTES_MAX
], fb2
[FORMAT_BYTES_MAX
], fb3
[FORMAT_BYTES_MAX
],
164 fb4
[FORMAT_BYTES_MAX
], fb5
[FORMAT_BYTES_MAX
], fb6
[FORMAT_BYTES_MAX
];
166 server_driver_message(s
, SD_MESSAGE_JOURNAL_USAGE
,
167 "%s (%s) is currently using %s.\n"
168 "Maximum allowed usage is set to %s.\n"
169 "Leaving at least %s free (of currently available %s of space).\n"
170 "Enforced usage limit is thus %s, of which %s are still available.",
172 format_bytes(fb1
, sizeof(fb1
), sum
),
173 format_bytes(fb2
, sizeof(fb2
), metrics
->max_use
),
174 format_bytes(fb3
, sizeof(fb3
), metrics
->keep_free
),
175 format_bytes(fb4
, sizeof(fb4
), ss_avail
),
176 format_bytes(fb5
, sizeof(fb5
), s
->cached_space_limit
),
177 format_bytes(fb6
, sizeof(fb6
), s
->cached_space_available
));
181 *available
= s
->cached_space_available
;
183 *limit
= s
->cached_space_limit
;
188 static int determine_space(Server
*s
, bool verbose
, bool patch_min_use
, uint64_t *available
, uint64_t *limit
) {
189 JournalMetrics
*metrics
;
190 const char *path
, *name
;
194 if (s
->system_journal
) {
195 path
= "/var/log/journal/";
196 metrics
= &s
->system_metrics
;
197 name
= "System journal";
199 path
= "/run/log/journal/";
200 metrics
= &s
->runtime_metrics
;
201 name
= "Runtime journal";
204 return determine_space_for(s
, metrics
, path
, name
, verbose
, patch_min_use
, available
, limit
);
207 void server_fix_perms(Server
*s
, JournalFile
*f
, uid_t uid
) {
210 _cleanup_(acl_freep
) acl_t acl
= NULL
;
212 acl_permset_t permset
;
217 r
= fchmod(f
->fd
, 0640);
219 log_warning_errno(errno
, "Failed to fix access mode on %s, ignoring: %m", f
->path
);
222 if (uid
<= SYSTEM_UID_MAX
)
225 acl
= acl_get_fd(f
->fd
);
227 log_warning_errno(errno
, "Failed to read ACL on %s, ignoring: %m", f
->path
);
231 r
= acl_find_uid(acl
, uid
, &entry
);
234 if (acl_create_entry(&acl
, &entry
) < 0 ||
235 acl_set_tag_type(entry
, ACL_USER
) < 0 ||
236 acl_set_qualifier(entry
, &uid
) < 0) {
237 log_warning_errno(errno
, "Failed to patch ACL on %s, ignoring: %m", f
->path
);
242 /* We do not recalculate the mask unconditionally here,
243 * so that the fchmod() mask above stays intact. */
244 if (acl_get_permset(entry
, &permset
) < 0 ||
245 acl_add_perm(permset
, ACL_READ
) < 0) {
246 log_warning_errno(errno
, "Failed to patch ACL on %s, ignoring: %m", f
->path
);
250 r
= calc_acl_mask_if_needed(&acl
);
252 log_warning_errno(r
, "Failed to patch ACL on %s, ignoring: %m", f
->path
);
256 if (acl_set_fd(f
->fd
, acl
) < 0)
257 log_warning_errno(errno
, "Failed to set ACL on %s, ignoring: %m", f
->path
);
262 static JournalFile
* find_journal(Server
*s
, uid_t uid
) {
263 _cleanup_free_
char *p
= NULL
;
270 /* We split up user logs only on /var, not on /run. If the
271 * runtime file is open, we write to it exclusively, in order
272 * to guarantee proper order as soon as we flush /run to
273 * /var and close the runtime file. */
275 if (s
->runtime_journal
)
276 return s
->runtime_journal
;
278 if (uid
<= SYSTEM_UID_MAX
)
279 return s
->system_journal
;
281 r
= sd_id128_get_machine(&machine
);
283 return s
->system_journal
;
285 f
= ordered_hashmap_get(s
->user_journals
, UID_TO_PTR(uid
));
289 if (asprintf(&p
, "/var/log/journal/" SD_ID128_FORMAT_STR
"/user-"UID_FMT
".journal",
290 SD_ID128_FORMAT_VAL(machine
), uid
) < 0)
291 return s
->system_journal
;
293 while (ordered_hashmap_size(s
->user_journals
) >= USER_JOURNALS_MAX
) {
294 /* Too many open? Then let's close one */
295 f
= ordered_hashmap_steal_first(s
->user_journals
);
297 journal_file_close(f
);
300 r
= journal_file_open_reliably(p
, O_RDWR
|O_CREAT
, 0640, s
->compress
, s
->seal
, &s
->system_metrics
, s
->mmap
, NULL
, &f
);
302 return s
->system_journal
;
304 server_fix_perms(s
, f
, uid
);
306 r
= ordered_hashmap_put(s
->user_journals
, UID_TO_PTR(uid
), f
);
308 journal_file_close(f
);
309 return s
->system_journal
;
315 static int do_rotate(
328 r
= journal_file_rotate(f
, s
->compress
, seal
);
331 log_error_errno(r
, "Failed to rotate %s: %m", (*f
)->path
);
333 log_error_errno(r
, "Failed to create new %s journal: %m", name
);
335 server_fix_perms(s
, *f
, uid
);
340 void server_rotate(Server
*s
) {
346 log_debug("Rotating...");
348 (void) do_rotate(s
, &s
->runtime_journal
, "runtime", false, 0);
349 (void) do_rotate(s
, &s
->system_journal
, "system", s
->seal
, 0);
351 ORDERED_HASHMAP_FOREACH_KEY(f
, k
, s
->user_journals
, i
) {
352 r
= do_rotate(s
, &f
, "user", s
->seal
, PTR_TO_UID(k
));
354 ordered_hashmap_replace(s
->user_journals
, k
, f
);
356 /* Old file has been closed and deallocated */
357 ordered_hashmap_remove(s
->user_journals
, k
);
361 void server_sync(Server
*s
) {
366 if (s
->system_journal
) {
367 r
= journal_file_set_offline(s
->system_journal
);
369 log_warning_errno(r
, "Failed to sync system journal, ignoring: %m");
372 ORDERED_HASHMAP_FOREACH(f
, s
->user_journals
, i
) {
373 r
= journal_file_set_offline(f
);
375 log_warning_errno(r
, "Failed to sync user journal, ignoring: %m");
378 if (s
->sync_event_source
) {
379 r
= sd_event_source_set_enabled(s
->sync_event_source
, SD_EVENT_OFF
);
381 log_error_errno(r
, "Failed to disable sync timer source: %m");
384 s
->sync_scheduled
= false;
387 static void do_vacuum(
390 JournalMetrics
*metrics
,
394 bool patch_min_use
) {
408 p
= strjoina(path
, SERVER_MACHINE_ID(s
));
410 limit
= metrics
->max_use
;
411 (void) determine_space_for(s
, metrics
, path
, name
, verbose
, patch_min_use
, NULL
, &limit
);
413 r
= journal_directory_vacuum(p
, limit
, metrics
->n_max_files
, s
->max_retention_usec
, &s
->oldest_file_usec
, verbose
);
414 if (r
< 0 && r
!= -ENOENT
)
415 log_warning_errno(r
, "Failed to vacuum %s, ignoring: %m", p
);
418 int server_vacuum(Server
*s
, bool verbose
, bool patch_min_use
) {
421 log_debug("Vacuuming...");
423 s
->oldest_file_usec
= 0;
425 do_vacuum(s
, s
->system_journal
, &s
->system_metrics
, "/var/log/journal/", "System journal", verbose
, patch_min_use
);
426 do_vacuum(s
, s
->runtime_journal
, &s
->runtime_metrics
, "/run/log/journal/", "Runtime journal", verbose
, patch_min_use
);
428 s
->cached_space_limit
= 0;
429 s
->cached_space_available
= 0;
430 s
->cached_space_timestamp
= 0;
435 static void server_cache_machine_id(Server
*s
) {
441 r
= sd_id128_get_machine(&id
);
445 sd_id128_to_string(id
, stpcpy(s
->machine_id_field
, "_MACHINE_ID="));
448 static void server_cache_boot_id(Server
*s
) {
454 r
= sd_id128_get_boot(&id
);
458 sd_id128_to_string(id
, stpcpy(s
->boot_id_field
, "_BOOT_ID="));
461 static void server_cache_hostname(Server
*s
) {
462 _cleanup_free_
char *t
= NULL
;
467 t
= gethostname_malloc();
471 x
= strappend("_HOSTNAME=", t
);
475 free(s
->hostname_field
);
476 s
->hostname_field
= x
;
479 static bool shall_try_append_again(JournalFile
*f
, int r
) {
481 /* -E2BIG Hit configured limit
483 -EDQUOT Quota limit hit
485 -EIO I/O error of some kind (mmap)
486 -EHOSTDOWN Other machine
487 -EBUSY Unclean shutdown
488 -EPROTONOSUPPORT Unsupported feature
491 -ESHUTDOWN Already archived
492 -EIDRM Journal file has been deleted */
494 if (r
== -E2BIG
|| r
== -EFBIG
|| r
== -EDQUOT
|| r
== -ENOSPC
)
495 log_debug("%s: Allocation limit reached, rotating.", f
->path
);
496 else if (r
== -EHOSTDOWN
)
497 log_info("%s: Journal file from other machine, rotating.", f
->path
);
498 else if (r
== -EBUSY
)
499 log_info("%s: Unclean shutdown, rotating.", f
->path
);
500 else if (r
== -EPROTONOSUPPORT
)
501 log_info("%s: Unsupported feature, rotating.", f
->path
);
502 else if (r
== -EBADMSG
|| r
== -ENODATA
|| r
== ESHUTDOWN
)
503 log_warning("%s: Journal file corrupted, rotating.", f
->path
);
505 log_warning("%s: IO error, rotating.", f
->path
);
506 else if (r
== -EIDRM
)
507 log_warning("%s: Journal file has been deleted, rotating.", f
->path
);
514 static void write_to_journal(Server
*s
, uid_t uid
, struct iovec
*iovec
, unsigned n
, int priority
) {
516 bool vacuumed
= false;
523 f
= find_journal(s
, uid
);
527 if (journal_file_rotate_suggested(f
, s
->max_file_usec
)) {
528 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f
->path
);
530 server_vacuum(s
, false, false);
533 f
= find_journal(s
, uid
);
538 r
= journal_file_append_entry(f
, NULL
, iovec
, n
, &s
->seqnum
, NULL
, NULL
);
540 server_schedule_sync(s
, priority
);
544 if (vacuumed
|| !shall_try_append_again(f
, r
)) {
545 log_error_errno(r
, "Failed to write entry (%d items, %zu bytes), ignoring: %m", n
, IOVEC_TOTAL_SIZE(iovec
, n
));
550 server_vacuum(s
, false, false);
552 f
= find_journal(s
, uid
);
556 log_debug("Retrying write.");
557 r
= journal_file_append_entry(f
, NULL
, iovec
, n
, &s
->seqnum
, NULL
, NULL
);
559 log_error_errno(r
, "Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %m", n
, IOVEC_TOTAL_SIZE(iovec
, n
));
561 server_schedule_sync(s
, priority
);
564 static void dispatch_message_real(
566 struct iovec
*iovec
, unsigned n
, unsigned m
,
567 const struct ucred
*ucred
,
568 const struct timeval
*tv
,
569 const char *label
, size_t label_len
,
574 char pid
[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t
)],
575 uid
[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t
)],
576 gid
[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t
)],
577 owner_uid
[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t
)],
578 source_time
[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t
)],
579 o_uid
[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t
)],
580 o_gid
[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t
)],
581 o_owner_uid
[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t
)];
587 uid_t realuid
= 0, owner
= 0, journal_uid
;
588 bool owner_valid
= false;
590 char audit_session
[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
591 audit_loginuid
[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t
)],
592 o_audit_session
[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
593 o_audit_loginuid
[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t
)];
602 assert(n
+ N_IOVEC_META_FIELDS
+ (object_pid
? N_IOVEC_OBJECT_FIELDS
: 0) <= m
);
605 realuid
= ucred
->uid
;
607 sprintf(pid
, "_PID="PID_FMT
, ucred
->pid
);
608 IOVEC_SET_STRING(iovec
[n
++], pid
);
610 sprintf(uid
, "_UID="UID_FMT
, ucred
->uid
);
611 IOVEC_SET_STRING(iovec
[n
++], uid
);
613 sprintf(gid
, "_GID="GID_FMT
, ucred
->gid
);
614 IOVEC_SET_STRING(iovec
[n
++], gid
);
616 r
= get_process_comm(ucred
->pid
, &t
);
618 x
= strjoina("_COMM=", t
);
620 IOVEC_SET_STRING(iovec
[n
++], x
);
623 r
= get_process_exe(ucred
->pid
, &t
);
625 x
= strjoina("_EXE=", t
);
627 IOVEC_SET_STRING(iovec
[n
++], x
);
630 r
= get_process_cmdline(ucred
->pid
, 0, false, &t
);
632 x
= strjoina("_CMDLINE=", t
);
634 IOVEC_SET_STRING(iovec
[n
++], x
);
637 r
= get_process_capeff(ucred
->pid
, &t
);
639 x
= strjoina("_CAP_EFFECTIVE=", t
);
641 IOVEC_SET_STRING(iovec
[n
++], x
);
645 r
= audit_session_from_pid(ucred
->pid
, &audit
);
647 sprintf(audit_session
, "_AUDIT_SESSION=%"PRIu32
, audit
);
648 IOVEC_SET_STRING(iovec
[n
++], audit_session
);
651 r
= audit_loginuid_from_pid(ucred
->pid
, &loginuid
);
653 sprintf(audit_loginuid
, "_AUDIT_LOGINUID="UID_FMT
, loginuid
);
654 IOVEC_SET_STRING(iovec
[n
++], audit_loginuid
);
658 r
= cg_pid_get_path_shifted(ucred
->pid
, s
->cgroup_root
, &c
);
660 char *session
= NULL
;
662 x
= strjoina("_SYSTEMD_CGROUP=", c
);
663 IOVEC_SET_STRING(iovec
[n
++], x
);
665 r
= cg_path_get_session(c
, &t
);
667 session
= strjoina("_SYSTEMD_SESSION=", t
);
669 IOVEC_SET_STRING(iovec
[n
++], session
);
672 if (cg_path_get_owner_uid(c
, &owner
) >= 0) {
675 sprintf(owner_uid
, "_SYSTEMD_OWNER_UID="UID_FMT
, owner
);
676 IOVEC_SET_STRING(iovec
[n
++], owner_uid
);
679 if (cg_path_get_unit(c
, &t
) >= 0) {
680 x
= strjoina("_SYSTEMD_UNIT=", t
);
682 IOVEC_SET_STRING(iovec
[n
++], x
);
683 } else if (unit_id
&& !session
) {
684 x
= strjoina("_SYSTEMD_UNIT=", unit_id
);
685 IOVEC_SET_STRING(iovec
[n
++], x
);
688 if (cg_path_get_user_unit(c
, &t
) >= 0) {
689 x
= strjoina("_SYSTEMD_USER_UNIT=", t
);
691 IOVEC_SET_STRING(iovec
[n
++], x
);
692 } else if (unit_id
&& session
) {
693 x
= strjoina("_SYSTEMD_USER_UNIT=", unit_id
);
694 IOVEC_SET_STRING(iovec
[n
++], x
);
697 if (cg_path_get_slice(c
, &t
) >= 0) {
698 x
= strjoina("_SYSTEMD_SLICE=", t
);
700 IOVEC_SET_STRING(iovec
[n
++], x
);
704 } else if (unit_id
) {
705 x
= strjoina("_SYSTEMD_UNIT=", unit_id
);
706 IOVEC_SET_STRING(iovec
[n
++], x
);
710 if (mac_selinux_use()) {
712 x
= alloca(strlen("_SELINUX_CONTEXT=") + label_len
+ 1);
714 *((char*) mempcpy(stpcpy(x
, "_SELINUX_CONTEXT="), label
, label_len
)) = 0;
715 IOVEC_SET_STRING(iovec
[n
++], x
);
717 security_context_t con
;
719 if (getpidcon(ucred
->pid
, &con
) >= 0) {
720 x
= strjoina("_SELINUX_CONTEXT=", con
);
723 IOVEC_SET_STRING(iovec
[n
++], x
);
732 r
= get_process_uid(object_pid
, &object_uid
);
734 sprintf(o_uid
, "OBJECT_UID="UID_FMT
, object_uid
);
735 IOVEC_SET_STRING(iovec
[n
++], o_uid
);
738 r
= get_process_gid(object_pid
, &object_gid
);
740 sprintf(o_gid
, "OBJECT_GID="GID_FMT
, object_gid
);
741 IOVEC_SET_STRING(iovec
[n
++], o_gid
);
744 r
= get_process_comm(object_pid
, &t
);
746 x
= strjoina("OBJECT_COMM=", t
);
748 IOVEC_SET_STRING(iovec
[n
++], x
);
751 r
= get_process_exe(object_pid
, &t
);
753 x
= strjoina("OBJECT_EXE=", t
);
755 IOVEC_SET_STRING(iovec
[n
++], x
);
758 r
= get_process_cmdline(object_pid
, 0, false, &t
);
760 x
= strjoina("OBJECT_CMDLINE=", t
);
762 IOVEC_SET_STRING(iovec
[n
++], x
);
766 r
= audit_session_from_pid(object_pid
, &audit
);
768 sprintf(o_audit_session
, "OBJECT_AUDIT_SESSION=%"PRIu32
, audit
);
769 IOVEC_SET_STRING(iovec
[n
++], o_audit_session
);
772 r
= audit_loginuid_from_pid(object_pid
, &loginuid
);
774 sprintf(o_audit_loginuid
, "OBJECT_AUDIT_LOGINUID="UID_FMT
, loginuid
);
775 IOVEC_SET_STRING(iovec
[n
++], o_audit_loginuid
);
779 r
= cg_pid_get_path_shifted(object_pid
, s
->cgroup_root
, &c
);
781 x
= strjoina("OBJECT_SYSTEMD_CGROUP=", c
);
782 IOVEC_SET_STRING(iovec
[n
++], x
);
784 r
= cg_path_get_session(c
, &t
);
786 x
= strjoina("OBJECT_SYSTEMD_SESSION=", t
);
788 IOVEC_SET_STRING(iovec
[n
++], x
);
791 if (cg_path_get_owner_uid(c
, &owner
) >= 0) {
792 sprintf(o_owner_uid
, "OBJECT_SYSTEMD_OWNER_UID="UID_FMT
, owner
);
793 IOVEC_SET_STRING(iovec
[n
++], o_owner_uid
);
796 if (cg_path_get_unit(c
, &t
) >= 0) {
797 x
= strjoina("OBJECT_SYSTEMD_UNIT=", t
);
799 IOVEC_SET_STRING(iovec
[n
++], x
);
802 if (cg_path_get_user_unit(c
, &t
) >= 0) {
803 x
= strjoina("OBJECT_SYSTEMD_USER_UNIT=", t
);
805 IOVEC_SET_STRING(iovec
[n
++], x
);
814 sprintf(source_time
, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv
));
815 IOVEC_SET_STRING(iovec
[n
++], source_time
);
818 /* Note that strictly speaking storing the boot id here is
819 * redundant since the entry includes this in-line
820 * anyway. However, we need this indexed, too. */
821 if (!isempty(s
->boot_id_field
))
822 IOVEC_SET_STRING(iovec
[n
++], s
->boot_id_field
);
824 if (!isempty(s
->machine_id_field
))
825 IOVEC_SET_STRING(iovec
[n
++], s
->machine_id_field
);
827 if (!isempty(s
->hostname_field
))
828 IOVEC_SET_STRING(iovec
[n
++], s
->hostname_field
);
832 if (s
->split_mode
== SPLIT_UID
&& realuid
> 0)
833 /* Split up strictly by any UID */
834 journal_uid
= realuid
;
835 else if (s
->split_mode
== SPLIT_LOGIN
&& realuid
> 0 && owner_valid
&& owner
> 0)
836 /* Split up by login UIDs. We do this only if the
837 * realuid is not root, in order not to accidentally
838 * leak privileged information to the user that is
839 * logged by a privileged process that is part of an
840 * unprivileged session. */
845 write_to_journal(s
, journal_uid
, iovec
, n
, priority
);
848 void server_driver_message(Server
*s
, sd_id128_t message_id
, const char *format
, ...) {
849 char mid
[11 + 32 + 1];
850 char buffer
[16 + LINE_MAX
+ 1];
851 struct iovec iovec
[N_IOVEC_META_FIELDS
+ 6];
854 struct ucred ucred
= {};
859 IOVEC_SET_STRING(iovec
[n
++], "SYSLOG_FACILITY=3");
860 IOVEC_SET_STRING(iovec
[n
++], "SYSLOG_IDENTIFIER=systemd-journald");
862 IOVEC_SET_STRING(iovec
[n
++], "PRIORITY=6");
863 IOVEC_SET_STRING(iovec
[n
++], "_TRANSPORT=driver");
865 memcpy(buffer
, "MESSAGE=", 8);
866 va_start(ap
, format
);
867 vsnprintf(buffer
+ 8, sizeof(buffer
) - 8, format
, ap
);
869 IOVEC_SET_STRING(iovec
[n
++], buffer
);
871 if (!sd_id128_equal(message_id
, SD_ID128_NULL
)) {
872 snprintf(mid
, sizeof(mid
), LOG_MESSAGE_ID(message_id
));
873 IOVEC_SET_STRING(iovec
[n
++], mid
);
876 ucred
.pid
= getpid();
877 ucred
.uid
= getuid();
878 ucred
.gid
= getgid();
880 dispatch_message_real(s
, iovec
, n
, ELEMENTSOF(iovec
), &ucred
, NULL
, NULL
, 0, NULL
, LOG_INFO
, 0);
883 void server_dispatch_message(
885 struct iovec
*iovec
, unsigned n
, unsigned m
,
886 const struct ucred
*ucred
,
887 const struct timeval
*tv
,
888 const char *label
, size_t label_len
,
894 _cleanup_free_
char *path
= NULL
;
895 uint64_t available
= 0;
899 assert(iovec
|| n
== 0);
904 if (LOG_PRI(priority
) > s
->max_level_store
)
907 /* Stop early in case the information will not be stored
909 if (s
->storage
== STORAGE_NONE
)
915 r
= cg_pid_get_path_shifted(ucred
->pid
, s
->cgroup_root
, &path
);
919 /* example: /user/lennart/3/foobar
920 * /system/dbus.service/foobar
922 * So let's cut of everything past the third /, since that is
923 * where user directories start */
925 c
= strchr(path
, '/');
927 c
= strchr(c
+1, '/');
929 c
= strchr(c
+1, '/');
935 (void) determine_space(s
, false, false, &available
, NULL
);
936 rl
= journal_rate_limit_test(s
->rate_limit
, path
, priority
& LOG_PRIMASK
, available
);
940 /* Write a suppression message if we suppressed something */
942 server_driver_message(s
, SD_MESSAGE_JOURNAL_DROPPED
,
943 "Suppressed %u messages from %s", rl
- 1, path
);
946 dispatch_message_real(s
, iovec
, n
, m
, ucred
, tv
, label
, label_len
, unit_id
, priority
, object_pid
);
950 static int system_journal_open(Server
*s
, bool flush_requested
) {
954 if (!s
->system_journal
&&
955 (s
->storage
== STORAGE_PERSISTENT
|| s
->storage
== STORAGE_AUTO
) &&
957 || access("/run/systemd/journal/flushed", F_OK
) >= 0)) {
959 /* If in auto mode: first try to create the machine
960 * path, but not the prefix.
962 * If in persistent mode: create /var/log/journal and
963 * the machine path */
965 if (s
->storage
== STORAGE_PERSISTENT
)
966 (void) mkdir_p("/var/log/journal/", 0755);
968 fn
= strjoina("/var/log/journal/", SERVER_MACHINE_ID(s
));
969 (void) mkdir(fn
, 0755);
971 fn
= strjoina(fn
, "/system.journal");
972 r
= journal_file_open_reliably(fn
, O_RDWR
|O_CREAT
, 0640, s
->compress
, s
->seal
, &s
->system_metrics
, s
->mmap
, NULL
, &s
->system_journal
);
974 server_fix_perms(s
, s
->system_journal
, 0);
975 (void) determine_space_for(s
, &s
->system_metrics
, "/var/log/journal/", "System journal", true, true, NULL
, NULL
);
977 if (r
!= -ENOENT
&& r
!= -EROFS
)
978 log_warning_errno(r
, "Failed to open system journal: %m");
984 if (!s
->runtime_journal
&&
985 (s
->storage
!= STORAGE_NONE
)) {
987 fn
= strjoina("/run/log/journal/", SERVER_MACHINE_ID(s
), "/system.journal");
989 if (s
->system_journal
) {
991 /* Try to open the runtime journal, but only
992 * if it already exists, so that we can flush
993 * it into the system journal */
995 r
= journal_file_open(fn
, O_RDWR
, 0640, s
->compress
, false, &s
->runtime_metrics
, s
->mmap
, NULL
, &s
->runtime_journal
);
998 log_warning_errno(r
, "Failed to open runtime journal: %m");
1005 /* OK, we really need the runtime journal, so create
1006 * it if necessary. */
1008 (void) mkdir("/run/log", 0755);
1009 (void) mkdir("/run/log/journal", 0755);
1010 (void) mkdir_parents(fn
, 0750);
1012 r
= journal_file_open_reliably(fn
, O_RDWR
|O_CREAT
, 0640, s
->compress
, false, &s
->runtime_metrics
, s
->mmap
, NULL
, &s
->runtime_journal
);
1014 return log_error_errno(r
, "Failed to open runtime journal: %m");
1017 if (s
->runtime_journal
) {
1018 server_fix_perms(s
, s
->runtime_journal
, 0);
1019 (void) determine_space_for(s
, &s
->runtime_metrics
, "/run/log/journal/", "Runtime journal", true, true, NULL
, NULL
);
1026 int server_flush_to_var(Server
*s
) {
1028 sd_journal
*j
= NULL
;
1029 char ts
[FORMAT_TIMESPAN_MAX
];
1036 if (s
->storage
!= STORAGE_AUTO
&&
1037 s
->storage
!= STORAGE_PERSISTENT
)
1040 if (!s
->runtime_journal
)
1043 (void) system_journal_open(s
, true);
1045 if (!s
->system_journal
)
1048 log_debug("Flushing to /var...");
1050 start
= now(CLOCK_MONOTONIC
);
1052 r
= sd_id128_get_machine(&machine
);
1056 r
= sd_journal_open(&j
, SD_JOURNAL_RUNTIME_ONLY
);
1058 return log_error_errno(r
, "Failed to read runtime journal: %m");
1060 sd_journal_set_data_threshold(j
, 0);
1062 SD_JOURNAL_FOREACH(j
) {
1066 f
= j
->current_file
;
1067 assert(f
&& f
->current_offset
> 0);
1071 r
= journal_file_move_to_object(f
, OBJECT_ENTRY
, f
->current_offset
, &o
);
1073 log_error_errno(r
, "Can't read entry: %m");
1077 r
= journal_file_copy_entry(f
, s
->system_journal
, o
, f
->current_offset
, NULL
, NULL
, NULL
);
1081 if (!shall_try_append_again(s
->system_journal
, r
)) {
1082 log_error_errno(r
, "Can't write entry: %m");
1087 server_vacuum(s
, false, false);
1089 if (!s
->system_journal
) {
1090 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1095 log_debug("Retrying write.");
1096 r
= journal_file_copy_entry(f
, s
->system_journal
, o
, f
->current_offset
, NULL
, NULL
, NULL
);
1098 log_error_errno(r
, "Can't write entry: %m");
1106 journal_file_post_change(s
->system_journal
);
1108 s
->runtime_journal
= journal_file_close(s
->runtime_journal
);
1111 (void) rm_rf("/run/log/journal", REMOVE_ROOT
);
1113 sd_journal_close(j
);
1115 server_driver_message(s
, SD_ID128_NULL
, "Time spent on flushing to /var is %s for %u entries.", format_timespan(ts
, sizeof(ts
), now(CLOCK_MONOTONIC
) - start
, 0), n
);
1120 int server_process_datagram(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1121 Server
*s
= userdata
;
1122 struct ucred
*ucred
= NULL
;
1123 struct timeval
*tv
= NULL
;
1124 struct cmsghdr
*cmsg
;
1126 size_t label_len
= 0, m
;
1129 int *fds
= NULL
, v
= 0;
1133 struct cmsghdr cmsghdr
;
1135 /* We use NAME_MAX space for the SELinux label
1136 * here. The kernel currently enforces no
1137 * limit, but according to suggestions from
1138 * the SELinux people this will change and it
1139 * will probably be identical to NAME_MAX. For
1140 * now we use that, but this should be updated
1141 * one day when the final limit is known. */
1142 uint8_t buf
[CMSG_SPACE(sizeof(struct ucred
)) +
1143 CMSG_SPACE(sizeof(struct timeval
)) +
1144 CMSG_SPACE(sizeof(int)) + /* fd */
1145 CMSG_SPACE(NAME_MAX
)]; /* selinux label */
1148 union sockaddr_union sa
= {};
1150 struct msghdr msghdr
= {
1153 .msg_control
= &control
,
1154 .msg_controllen
= sizeof(control
),
1156 .msg_namelen
= sizeof(sa
),
1160 assert(fd
== s
->native_fd
|| fd
== s
->syslog_fd
|| fd
== s
->audit_fd
);
1162 if (revents
!= EPOLLIN
) {
1163 log_error("Got invalid event from epoll for datagram fd: %"PRIx32
, revents
);
1167 /* Try to get the right size, if we can. (Not all
1168 * sockets support SIOCINQ, hence we just try, but
1169 * don't rely on it. */
1170 (void) ioctl(fd
, SIOCINQ
, &v
);
1172 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1173 m
= PAGE_ALIGN(MAX3((size_t) v
+ 1,
1175 ALIGN(sizeof(struct nlmsghdr
)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH
)) + 1);
1177 if (!GREEDY_REALLOC(s
->buffer
, s
->buffer_size
, m
))
1180 iovec
.iov_base
= s
->buffer
;
1181 iovec
.iov_len
= s
->buffer_size
- 1; /* Leave room for trailing NUL we add later */
1183 n
= recvmsg(fd
, &msghdr
, MSG_DONTWAIT
|MSG_CMSG_CLOEXEC
);
1185 if (errno
== EINTR
|| errno
== EAGAIN
)
1188 return log_error_errno(errno
, "recvmsg() failed: %m");
1191 CMSG_FOREACH(cmsg
, &msghdr
) {
1193 if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1194 cmsg
->cmsg_type
== SCM_CREDENTIALS
&&
1195 cmsg
->cmsg_len
== CMSG_LEN(sizeof(struct ucred
)))
1196 ucred
= (struct ucred
*) CMSG_DATA(cmsg
);
1197 else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1198 cmsg
->cmsg_type
== SCM_SECURITY
) {
1199 label
= (char*) CMSG_DATA(cmsg
);
1200 label_len
= cmsg
->cmsg_len
- CMSG_LEN(0);
1201 } else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1202 cmsg
->cmsg_type
== SO_TIMESTAMP
&&
1203 cmsg
->cmsg_len
== CMSG_LEN(sizeof(struct timeval
)))
1204 tv
= (struct timeval
*) CMSG_DATA(cmsg
);
1205 else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1206 cmsg
->cmsg_type
== SCM_RIGHTS
) {
1207 fds
= (int*) CMSG_DATA(cmsg
);
1208 n_fds
= (cmsg
->cmsg_len
- CMSG_LEN(0)) / sizeof(int);
1212 /* And a trailing NUL, just in case */
1215 if (fd
== s
->syslog_fd
) {
1216 if (n
> 0 && n_fds
== 0)
1217 server_process_syslog_message(s
, strstrip(s
->buffer
), ucred
, tv
, label
, label_len
);
1219 log_warning("Got file descriptors via syslog socket. Ignoring.");
1221 } else if (fd
== s
->native_fd
) {
1222 if (n
> 0 && n_fds
== 0)
1223 server_process_native_message(s
, s
->buffer
, n
, ucred
, tv
, label
, label_len
);
1224 else if (n
== 0 && n_fds
== 1)
1225 server_process_native_file(s
, fds
[0], ucred
, tv
, label
, label_len
);
1227 log_warning("Got too many file descriptors via native socket. Ignoring.");
1230 assert(fd
== s
->audit_fd
);
1232 if (n
> 0 && n_fds
== 0)
1233 server_process_audit_message(s
, s
->buffer
, n
, ucred
, &sa
, msghdr
.msg_namelen
);
1235 log_warning("Got file descriptors via audit socket. Ignoring.");
1238 close_many(fds
, n_fds
);
1242 static int dispatch_sigusr1(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1243 Server
*s
= userdata
;
1248 log_info("Received request to flush runtime journal from PID " PID_FMT
, si
->ssi_pid
);
1250 server_flush_to_var(s
);
1252 server_vacuum(s
, false, false);
1254 r
= touch("/run/systemd/journal/flushed");
1256 log_warning_errno(r
, "Failed to touch /run/systemd/journal/flushed, ignoring: %m");
1261 static int dispatch_sigusr2(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1262 Server
*s
= userdata
;
1267 log_info("Received request to rotate journal from PID " PID_FMT
, si
->ssi_pid
);
1269 server_vacuum(s
, true, true);
1271 /* Let clients know when the most recent rotation happened. */
1272 r
= write_timestamp_file_atomic("/run/systemd/journal/rotated", now(CLOCK_MONOTONIC
));
1274 log_warning_errno(r
, "Failed to write /run/systemd/journal/rotated, ignoring: %m");
1279 static int dispatch_sigterm(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1280 Server
*s
= userdata
;
1284 log_received_signal(LOG_INFO
, si
);
1286 sd_event_exit(s
->event
, 0);
1290 static int dispatch_sigrtmin1(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1291 Server
*s
= userdata
;
1296 log_debug("Received request to sync from PID " PID_FMT
, si
->ssi_pid
);
1300 /* Let clients know when the most recent sync happened. */
1301 r
= write_timestamp_file_atomic("/run/systemd/journal/synced", now(CLOCK_MONOTONIC
));
1303 log_warning_errno(r
, "Failed to write /run/systemd/journal/synced, ignoring: %m");
1308 static int setup_signals(Server
*s
) {
1313 assert(sigprocmask_many(SIG_SETMASK
, NULL
, SIGINT
, SIGTERM
, SIGUSR1
, SIGUSR2
, SIGRTMIN
+1, -1) >= 0);
1315 r
= sd_event_add_signal(s
->event
, &s
->sigusr1_event_source
, SIGUSR1
, dispatch_sigusr1
, s
);
1319 r
= sd_event_add_signal(s
->event
, &s
->sigusr2_event_source
, SIGUSR2
, dispatch_sigusr2
, s
);
1323 r
= sd_event_add_signal(s
->event
, &s
->sigterm_event_source
, SIGTERM
, dispatch_sigterm
, s
);
1327 /* Let's process SIGTERM late, so that we flush all queued
1328 * messages to disk before we exit */
1329 r
= sd_event_source_set_priority(s
->sigterm_event_source
, SD_EVENT_PRIORITY_NORMAL
+20);
1333 /* When journald is invoked on the terminal (when debugging),
1334 * it's useful if C-c is handled equivalent to SIGTERM. */
1335 r
= sd_event_add_signal(s
->event
, &s
->sigint_event_source
, SIGINT
, dispatch_sigterm
, s
);
1339 r
= sd_event_source_set_priority(s
->sigint_event_source
, SD_EVENT_PRIORITY_NORMAL
+20);
1343 /* SIGRTMIN+1 causes an immediate sync. We process this very
1344 * late, so that everything else queued at this point is
1345 * really written to disk. Clients can watch
1346 * /run/systemd/journal/synced with inotify until its mtime
1347 * changes to see when a sync happened. */
1348 r
= sd_event_add_signal(s
->event
, &s
->sigrtmin1_event_source
, SIGRTMIN
+1, dispatch_sigrtmin1
, s
);
1352 r
= sd_event_source_set_priority(s
->sigrtmin1_event_source
, SD_EVENT_PRIORITY_NORMAL
+15);
1359 static int server_parse_proc_cmdline(Server
*s
) {
1360 _cleanup_free_
char *line
= NULL
;
1364 r
= proc_cmdline(&line
);
1366 log_warning_errno(r
, "Failed to read /proc/cmdline, ignoring: %m");
1372 _cleanup_free_
char *word
;
1374 r
= extract_first_word(&p
, &word
, NULL
, 0);
1376 return log_error_errno(r
, "Failed to parse journald syntax \"%s\": %m", line
);
1381 if (startswith(word
, "systemd.journald.forward_to_syslog=")) {
1382 r
= parse_boolean(word
+ 35);
1384 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word
+ 35);
1386 s
->forward_to_syslog
= r
;
1387 } else if (startswith(word
, "systemd.journald.forward_to_kmsg=")) {
1388 r
= parse_boolean(word
+ 33);
1390 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word
+ 33);
1392 s
->forward_to_kmsg
= r
;
1393 } else if (startswith(word
, "systemd.journald.forward_to_console=")) {
1394 r
= parse_boolean(word
+ 36);
1396 log_warning("Failed to parse forward to console switch %s. Ignoring.", word
+ 36);
1398 s
->forward_to_console
= r
;
1399 } else if (startswith(word
, "systemd.journald.forward_to_wall=")) {
1400 r
= parse_boolean(word
+ 33);
1402 log_warning("Failed to parse forward to wall switch %s. Ignoring.", word
+ 33);
1404 s
->forward_to_wall
= r
;
1405 } else if (startswith(word
, "systemd.journald"))
1406 log_warning("Invalid systemd.journald parameter. Ignoring.");
1409 /* do not warn about state here, since probably systemd already did */
1413 static int server_parse_config_file(Server
*s
) {
1416 return config_parse_many(PKGSYSCONFDIR
"/journald.conf",
1417 CONF_PATHS_NULSTR("systemd/journald.conf.d"),
1419 config_item_perf_lookup
, journald_gperf_lookup
,
1423 static int server_dispatch_sync(sd_event_source
*es
, usec_t t
, void *userdata
) {
1424 Server
*s
= userdata
;
1432 int server_schedule_sync(Server
*s
, int priority
) {
1437 if (priority
<= LOG_CRIT
) {
1438 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1443 if (s
->sync_scheduled
)
1446 if (s
->sync_interval_usec
> 0) {
1449 r
= sd_event_now(s
->event
, CLOCK_MONOTONIC
, &when
);
1453 when
+= s
->sync_interval_usec
;
1455 if (!s
->sync_event_source
) {
1456 r
= sd_event_add_time(
1458 &s
->sync_event_source
,
1461 server_dispatch_sync
, s
);
1465 r
= sd_event_source_set_priority(s
->sync_event_source
, SD_EVENT_PRIORITY_IMPORTANT
);
1467 r
= sd_event_source_set_time(s
->sync_event_source
, when
);
1471 r
= sd_event_source_set_enabled(s
->sync_event_source
, SD_EVENT_ONESHOT
);
1476 s
->sync_scheduled
= true;
1482 static int dispatch_hostname_change(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1483 Server
*s
= userdata
;
1487 server_cache_hostname(s
);
1491 static int server_open_hostname(Server
*s
) {
1496 s
->hostname_fd
= open("/proc/sys/kernel/hostname", O_RDONLY
|O_CLOEXEC
|O_NDELAY
|O_NOCTTY
);
1497 if (s
->hostname_fd
< 0)
1498 return log_error_errno(errno
, "Failed to open /proc/sys/kernel/hostname: %m");
1500 r
= sd_event_add_io(s
->event
, &s
->hostname_event_source
, s
->hostname_fd
, 0, dispatch_hostname_change
, s
);
1502 /* kernels prior to 3.2 don't support polling this file. Ignore
1505 log_warning_errno(r
, "Failed to register hostname fd in event loop, ignoring: %m");
1506 s
->hostname_fd
= safe_close(s
->hostname_fd
);
1510 return log_error_errno(r
, "Failed to register hostname fd in event loop: %m");
1513 r
= sd_event_source_set_priority(s
->hostname_event_source
, SD_EVENT_PRIORITY_IMPORTANT
-10);
1515 return log_error_errno(r
, "Failed to adjust priority of host name event source: %m");
1520 static int dispatch_notify_event(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1521 Server
*s
= userdata
;
1525 assert(s
->notify_event_source
== es
);
1526 assert(s
->notify_fd
== fd
);
1528 /* The $NOTIFY_SOCKET is writable again, now send exactly one
1529 * message on it. Either it's the wtachdog event, the initial
1530 * READY=1 event or an stdout stream event. If there's nothing
1531 * to write anymore, turn our event source off. The next time
1532 * there's something to send it will be turned on again. */
1534 if (!s
->sent_notify_ready
) {
1535 static const char p
[] =
1537 "STATUS=Processing requests...";
1540 l
= send(s
->notify_fd
, p
, strlen(p
), MSG_DONTWAIT
);
1542 if (errno
== EAGAIN
)
1545 return log_error_errno(errno
, "Failed to send READY=1 notification message: %m");
1548 s
->sent_notify_ready
= true;
1549 log_debug("Sent READY=1 notification.");
1551 } else if (s
->send_watchdog
) {
1553 static const char p
[] =
1558 l
= send(s
->notify_fd
, p
, strlen(p
), MSG_DONTWAIT
);
1560 if (errno
== EAGAIN
)
1563 return log_error_errno(errno
, "Failed to send WATCHDOG=1 notification message: %m");
1566 s
->send_watchdog
= false;
1567 log_debug("Sent WATCHDOG=1 notification.");
1569 } else if (s
->stdout_streams_notify_queue
)
1570 /* Dispatch one stream notification event */
1571 stdout_stream_send_notify(s
->stdout_streams_notify_queue
);
1573 /* Leave us enabled if there's still more to to do. */
1574 if (s
->send_watchdog
|| s
->stdout_streams_notify_queue
)
1577 /* There was nothing to do anymore, let's turn ourselves off. */
1578 r
= sd_event_source_set_enabled(es
, SD_EVENT_OFF
);
1580 return log_error_errno(r
, "Failed to turn off notify event source: %m");
1585 static int dispatch_watchdog(sd_event_source
*es
, uint64_t usec
, void *userdata
) {
1586 Server
*s
= userdata
;
1591 s
->send_watchdog
= true;
1593 r
= sd_event_source_set_enabled(s
->notify_event_source
, SD_EVENT_ON
);
1595 log_warning_errno(r
, "Failed to turn on notify event source: %m");
1597 r
= sd_event_source_set_time(s
->watchdog_event_source
, usec
+ s
->watchdog_usec
/ 2);
1599 return log_error_errno(r
, "Failed to restart watchdog event source: %m");
1601 r
= sd_event_source_set_enabled(s
->watchdog_event_source
, SD_EVENT_ON
);
1603 return log_error_errno(r
, "Failed to enable watchdog event source: %m");
1608 static int server_connect_notify(Server
*s
) {
1609 union sockaddr_union sa
= {
1610 .un
.sun_family
= AF_UNIX
,
1616 assert(s
->notify_fd
< 0);
1617 assert(!s
->notify_event_source
);
1620 So here's the problem: we'd like to send notification
1621 messages to PID 1, but we cannot do that via sd_notify(),
1622 since that's synchronous, and we might end up blocking on
1623 it. Specifically: given that PID 1 might block on
1624 dbus-daemon during IPC, and dbus-daemon is logging to us,
1625 and might hence block on us, we might end up in a deadlock
1626 if we block on sending PID 1 notification messages -- by
1627 generating a full blocking circle. To avoid this, let's
1628 create a non-blocking socket, and connect it to the
1629 notification socket, and then wait for POLLOUT before we
1630 send anything. This should efficiently avoid any deadlocks,
1631 as we'll never block on PID 1, hence PID 1 can safely block
1632 on dbus-daemon which can safely block on us again.
1634 Don't think that this issue is real? It is, see:
1635 https://github.com/systemd/systemd/issues/1505
1638 e
= getenv("NOTIFY_SOCKET");
1642 if ((e
[0] != '@' && e
[0] != '/') || e
[1] == 0) {
1643 log_error("NOTIFY_SOCKET set to an invalid value: %s", e
);
1647 if (strlen(e
) > sizeof(sa
.un
.sun_path
)) {
1648 log_error("NOTIFY_SOCKET path too long: %s", e
);
1652 s
->notify_fd
= socket(AF_UNIX
, SOCK_DGRAM
|SOCK_CLOEXEC
|SOCK_NONBLOCK
, 0);
1653 if (s
->notify_fd
< 0)
1654 return log_error_errno(errno
, "Failed to create notify socket: %m");
1656 (void) fd_inc_sndbuf(s
->notify_fd
, NOTIFY_SNDBUF_SIZE
);
1658 strncpy(sa
.un
.sun_path
, e
, sizeof(sa
.un
.sun_path
));
1659 if (sa
.un
.sun_path
[0] == '@')
1660 sa
.un
.sun_path
[0] = 0;
1662 r
= connect(s
->notify_fd
, &sa
.sa
, offsetof(struct sockaddr_un
, sun_path
) + strlen(e
));
1664 return log_error_errno(errno
, "Failed to connect to notify socket: %m");
1666 r
= sd_event_add_io(s
->event
, &s
->notify_event_source
, s
->notify_fd
, EPOLLOUT
, dispatch_notify_event
, s
);
1668 return log_error_errno(r
, "Failed to watch notification socket: %m");
1670 if (sd_watchdog_enabled(false, &s
->watchdog_usec
) > 0) {
1671 s
->send_watchdog
= true;
1673 r
= sd_event_add_time(s
->event
, &s
->watchdog_event_source
, CLOCK_MONOTONIC
, now(CLOCK_MONOTONIC
) + s
->watchdog_usec
/2, s
->watchdog_usec
/4, dispatch_watchdog
, s
);
1675 return log_error_errno(r
, "Failed to add watchdog time event: %m");
1678 /* This should fire pretty soon, which we'll use to send the
1684 int server_init(Server
*s
) {
1685 _cleanup_fdset_free_ FDSet
*fds
= NULL
;
1692 s
->syslog_fd
= s
->native_fd
= s
->stdout_fd
= s
->dev_kmsg_fd
= s
->audit_fd
= s
->hostname_fd
= s
->notify_fd
= -1;
1696 s
->watchdog_usec
= USEC_INFINITY
;
1698 s
->sync_interval_usec
= DEFAULT_SYNC_INTERVAL_USEC
;
1699 s
->sync_scheduled
= false;
1701 s
->rate_limit_interval
= DEFAULT_RATE_LIMIT_INTERVAL
;
1702 s
->rate_limit_burst
= DEFAULT_RATE_LIMIT_BURST
;
1704 s
->forward_to_wall
= true;
1706 s
->max_file_usec
= DEFAULT_MAX_FILE_USEC
;
1708 s
->max_level_store
= LOG_DEBUG
;
1709 s
->max_level_syslog
= LOG_DEBUG
;
1710 s
->max_level_kmsg
= LOG_NOTICE
;
1711 s
->max_level_console
= LOG_INFO
;
1712 s
->max_level_wall
= LOG_EMERG
;
1714 journal_reset_metrics(&s
->system_metrics
);
1715 journal_reset_metrics(&s
->runtime_metrics
);
1717 server_parse_config_file(s
);
1718 server_parse_proc_cmdline(s
);
1720 if (!!s
->rate_limit_interval
^ !!s
->rate_limit_burst
) {
1721 log_debug("Setting both rate limit interval and burst from "USEC_FMT
",%u to 0,0",
1722 s
->rate_limit_interval
, s
->rate_limit_burst
);
1723 s
->rate_limit_interval
= s
->rate_limit_burst
= 0;
1726 (void) mkdir_p("/run/systemd/journal", 0755);
1728 s
->user_journals
= ordered_hashmap_new(NULL
);
1729 if (!s
->user_journals
)
1732 s
->mmap
= mmap_cache_new();
1736 r
= sd_event_default(&s
->event
);
1738 return log_error_errno(r
, "Failed to create event loop: %m");
1740 n
= sd_listen_fds(true);
1742 return log_error_errno(n
, "Failed to read listening file descriptors from environment: %m");
1744 for (fd
= SD_LISTEN_FDS_START
; fd
< SD_LISTEN_FDS_START
+ n
; fd
++) {
1746 if (sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/run/systemd/journal/socket", 0) > 0) {
1748 if (s
->native_fd
>= 0) {
1749 log_error("Too many native sockets passed.");
1755 } else if (sd_is_socket_unix(fd
, SOCK_STREAM
, 1, "/run/systemd/journal/stdout", 0) > 0) {
1757 if (s
->stdout_fd
>= 0) {
1758 log_error("Too many stdout sockets passed.");
1764 } else if (sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/dev/log", 0) > 0 ||
1765 sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/run/systemd/journal/dev-log", 0) > 0) {
1767 if (s
->syslog_fd
>= 0) {
1768 log_error("Too many /dev/log sockets passed.");
1774 } else if (sd_is_socket(fd
, AF_NETLINK
, SOCK_RAW
, -1) > 0) {
1776 if (s
->audit_fd
>= 0) {
1777 log_error("Too many audit sockets passed.");
1791 r
= fdset_put(fds
, fd
);
1797 /* Try to restore streams, but don't bother if this fails */
1798 (void) server_restore_streams(s
, fds
);
1800 if (fdset_size(fds
) > 0) {
1801 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds
));
1802 fds
= fdset_free(fds
);
1805 no_sockets
= s
->native_fd
< 0 && s
->stdout_fd
< 0 && s
->syslog_fd
< 0 && s
->audit_fd
< 0;
1807 /* always open stdout, syslog, native, and kmsg sockets */
1809 /* systemd-journald.socket: /run/systemd/journal/stdout */
1810 r
= server_open_stdout_socket(s
);
1814 /* systemd-journald-dev-log.socket: /run/systemd/journal/dev-log */
1815 r
= server_open_syslog_socket(s
);
1819 /* systemd-journald.socket: /run/systemd/journal/socket */
1820 r
= server_open_native_socket(s
);
1825 r
= server_open_dev_kmsg(s
);
1829 /* Unless we got *some* sockets and not audit, open audit socket */
1830 if (s
->audit_fd
>= 0 || no_sockets
) {
1831 r
= server_open_audit(s
);
1836 r
= server_open_kernel_seqnum(s
);
1840 r
= server_open_hostname(s
);
1844 r
= setup_signals(s
);
1848 s
->udev
= udev_new();
1852 s
->rate_limit
= journal_rate_limit_new(s
->rate_limit_interval
, s
->rate_limit_burst
);
1856 r
= cg_get_root_path(&s
->cgroup_root
);
1860 server_cache_hostname(s
);
1861 server_cache_boot_id(s
);
1862 server_cache_machine_id(s
);
1864 (void) server_connect_notify(s
);
1866 return system_journal_open(s
, false);
1869 void server_maybe_append_tags(Server
*s
) {
1875 n
= now(CLOCK_REALTIME
);
1877 if (s
->system_journal
)
1878 journal_file_maybe_append_tag(s
->system_journal
, n
);
1880 ORDERED_HASHMAP_FOREACH(f
, s
->user_journals
, i
)
1881 journal_file_maybe_append_tag(f
, n
);
1885 void server_done(Server
*s
) {
1889 while (s
->stdout_streams
)
1890 stdout_stream_free(s
->stdout_streams
);
1892 if (s
->system_journal
)
1893 journal_file_close(s
->system_journal
);
1895 if (s
->runtime_journal
)
1896 journal_file_close(s
->runtime_journal
);
1898 while ((f
= ordered_hashmap_steal_first(s
->user_journals
)))
1899 journal_file_close(f
);
1901 ordered_hashmap_free(s
->user_journals
);
1903 sd_event_source_unref(s
->syslog_event_source
);
1904 sd_event_source_unref(s
->native_event_source
);
1905 sd_event_source_unref(s
->stdout_event_source
);
1906 sd_event_source_unref(s
->dev_kmsg_event_source
);
1907 sd_event_source_unref(s
->audit_event_source
);
1908 sd_event_source_unref(s
->sync_event_source
);
1909 sd_event_source_unref(s
->sigusr1_event_source
);
1910 sd_event_source_unref(s
->sigusr2_event_source
);
1911 sd_event_source_unref(s
->sigterm_event_source
);
1912 sd_event_source_unref(s
->sigint_event_source
);
1913 sd_event_source_unref(s
->sigrtmin1_event_source
);
1914 sd_event_source_unref(s
->hostname_event_source
);
1915 sd_event_source_unref(s
->notify_event_source
);
1916 sd_event_source_unref(s
->watchdog_event_source
);
1917 sd_event_unref(s
->event
);
1919 safe_close(s
->syslog_fd
);
1920 safe_close(s
->native_fd
);
1921 safe_close(s
->stdout_fd
);
1922 safe_close(s
->dev_kmsg_fd
);
1923 safe_close(s
->audit_fd
);
1924 safe_close(s
->hostname_fd
);
1925 safe_close(s
->notify_fd
);
1928 journal_rate_limit_free(s
->rate_limit
);
1930 if (s
->kernel_seqnum
)
1931 munmap(s
->kernel_seqnum
, sizeof(uint64_t));
1935 free(s
->cgroup_root
);
1936 free(s
->hostname_field
);
1939 mmap_cache_unref(s
->mmap
);
1941 udev_unref(s
->udev
);
1944 static const char* const storage_table
[_STORAGE_MAX
] = {
1945 [STORAGE_AUTO
] = "auto",
1946 [STORAGE_VOLATILE
] = "volatile",
1947 [STORAGE_PERSISTENT
] = "persistent",
1948 [STORAGE_NONE
] = "none"
1951 DEFINE_STRING_TABLE_LOOKUP(storage
, Storage
);
1952 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage
, storage
, Storage
, "Failed to parse storage setting");
1954 static const char* const split_mode_table
[_SPLIT_MAX
] = {
1955 [SPLIT_LOGIN
] = "login",
1956 [SPLIT_UID
] = "uid",
1957 [SPLIT_NONE
] = "none",
1960 DEFINE_STRING_TABLE_LOOKUP(split_mode
, SplitMode
);
1961 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode
, split_mode
, SplitMode
, "Failed to parse split mode setting");