2 This file is part of systemd.
4 Copyright 2011 Lennart Poettering
6 systemd is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public License
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
21 #include <selinux/selinux.h>
23 #include <sys/ioctl.h>
25 #include <sys/signalfd.h>
26 #include <sys/statvfs.h>
27 #include <linux/sockios.h>
30 #include "sd-daemon.h"
31 #include "sd-journal.h"
32 #include "sd-messages.h"
35 #include "alloc-util.h"
36 #include "audit-util.h"
37 #include "cgroup-util.h"
38 #include "conf-parser.h"
39 #include "dirent-util.h"
40 #include "extract-word.h"
43 #include "formats-util.h"
46 #include "hostname-util.h"
48 #include "journal-authenticate.h"
49 #include "journal-file.h"
50 #include "journal-internal.h"
51 #include "journal-vacuum.h"
52 #include "journald-audit.h"
53 #include "journald-kmsg.h"
54 #include "journald-native.h"
55 #include "journald-rate-limit.h"
56 #include "journald-server.h"
57 #include "journald-stream.h"
58 #include "journald-syslog.h"
61 #include "parse-util.h"
62 #include "proc-cmdline.h"
63 #include "process-util.h"
65 #include "selinux-util.h"
66 #include "signal-util.h"
67 #include "socket-util.h"
68 #include "stdio-util.h"
69 #include "string-table.h"
70 #include "string-util.h"
71 #include "user-util.h"
74 #define USER_JOURNALS_MAX 1024
76 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
77 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
78 #define DEFAULT_RATE_LIMIT_BURST 1000
79 #define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
81 #define RECHECK_SPACE_USEC (30*USEC_PER_SEC)
83 #define NOTIFY_SNDBUF_SIZE (8*1024*1024)
85 /* The period to insert between posting changes for coalescing */
86 #define POST_CHANGE_TIMER_INTERVAL_USEC (250*USEC_PER_MSEC)
88 static int determine_space_for(
90 JournalMetrics
*metrics
,
98 uint64_t sum
= 0, ss_avail
, avail
;
99 _cleanup_closedir_
DIR *d
= NULL
;
110 ts
= now(CLOCK_MONOTONIC
);
112 if (!verbose
&& s
->cached_space_timestamp
+ RECHECK_SPACE_USEC
> ts
) {
115 *available
= s
->cached_space_available
;
117 *limit
= s
->cached_space_limit
;
122 p
= strjoina(path
, SERVER_MACHINE_ID(s
));
125 return log_full_errno(errno
== ENOENT
? LOG_DEBUG
: LOG_ERR
, errno
, "Failed to open %s: %m", p
);
127 if (fstatvfs(dirfd(d
), &ss
) < 0)
128 return log_error_errno(errno
, "Failed to fstatvfs(%s): %m", p
);
130 FOREACH_DIRENT_ALL(de
, d
, break) {
133 if (!endswith(de
->d_name
, ".journal") &&
134 !endswith(de
->d_name
, ".journal~"))
137 if (fstatat(dirfd(d
), de
->d_name
, &st
, AT_SYMLINK_NOFOLLOW
) < 0) {
138 log_debug_errno(errno
, "Failed to stat %s/%s, ignoring: %m", p
, de
->d_name
);
142 if (!S_ISREG(st
.st_mode
))
145 sum
+= (uint64_t) st
.st_blocks
* 512UL;
148 /* If requested, then let's bump the min_use limit to the
149 * current usage on disk. We do this when starting up and
150 * first opening the journal files. This way sudden spikes in
151 * disk usage will not cause journald to vacuum files without
152 * bounds. Note that this means that only a restart of
153 * journald will make it reset this value. */
156 metrics
->min_use
= MAX(metrics
->min_use
, sum
);
158 ss_avail
= ss
.f_bsize
* ss
.f_bavail
;
159 avail
= LESS_BY(ss_avail
, metrics
->keep_free
);
161 s
->cached_space_limit
= MIN(MAX(sum
+ avail
, metrics
->min_use
), metrics
->max_use
);
162 s
->cached_space_available
= LESS_BY(s
->cached_space_limit
, sum
);
163 s
->cached_space_timestamp
= ts
;
166 char fb1
[FORMAT_BYTES_MAX
], fb2
[FORMAT_BYTES_MAX
], fb3
[FORMAT_BYTES_MAX
],
167 fb4
[FORMAT_BYTES_MAX
], fb5
[FORMAT_BYTES_MAX
], fb6
[FORMAT_BYTES_MAX
];
168 format_bytes(fb1
, sizeof(fb1
), sum
);
169 format_bytes(fb2
, sizeof(fb2
), metrics
->max_use
);
170 format_bytes(fb3
, sizeof(fb3
), metrics
->keep_free
);
171 format_bytes(fb4
, sizeof(fb4
), ss_avail
);
172 format_bytes(fb5
, sizeof(fb5
), s
->cached_space_limit
);
173 format_bytes(fb6
, sizeof(fb6
), s
->cached_space_available
);
175 server_driver_message(s
, SD_MESSAGE_JOURNAL_USAGE
,
176 LOG_MESSAGE("%s (%s) is %s, max %s, %s free.",
177 name
, path
, fb1
, fb5
, fb6
),
178 "JOURNAL_NAME=%s", name
,
179 "JOURNAL_PATH=%s", path
,
180 "CURRENT_USE=%"PRIu64
, sum
,
181 "CURRENT_USE_PRETTY=%s", fb1
,
182 "MAX_USE=%"PRIu64
, metrics
->max_use
,
183 "MAX_USE_PRETTY=%s", fb2
,
184 "DISK_KEEP_FREE=%"PRIu64
, metrics
->keep_free
,
185 "DISK_KEEP_FREE_PRETTY=%s", fb3
,
186 "DISK_AVAILABLE=%"PRIu64
, ss_avail
,
187 "DISK_AVAILABLE_PRETTY=%s", fb4
,
188 "LIMIT=%"PRIu64
, s
->cached_space_limit
,
189 "LIMIT_PRETTY=%s", fb5
,
190 "AVAILABLE=%"PRIu64
, s
->cached_space_available
,
191 "AVAILABLE_PRETTY=%s", fb6
,
196 *available
= s
->cached_space_available
;
198 *limit
= s
->cached_space_limit
;
203 static int determine_space(Server
*s
, bool verbose
, bool patch_min_use
, uint64_t *available
, uint64_t *limit
) {
204 JournalMetrics
*metrics
;
205 const char *path
, *name
;
209 if (s
->system_journal
) {
210 path
= "/var/log/journal/";
211 metrics
= &s
->system_metrics
;
212 name
= "System journal";
214 path
= "/run/log/journal/";
215 metrics
= &s
->runtime_metrics
;
216 name
= "Runtime journal";
219 return determine_space_for(s
, metrics
, path
, name
, verbose
, patch_min_use
, available
, limit
);
222 static void server_add_acls(JournalFile
*f
, uid_t uid
) {
229 if (uid
<= SYSTEM_UID_MAX
)
232 r
= add_acls_for_user(f
->fd
, uid
);
234 log_warning_errno(r
, "Failed to set ACL on %s, ignoring: %m", f
->path
);
238 static int open_journal(
244 JournalMetrics
*metrics
,
254 r
= journal_file_open_reliably(fname
, flags
, 0640, s
->compress
, seal
, metrics
, s
->mmap
, s
->deferred_closes
, NULL
, &f
);
256 r
= journal_file_open(-1, fname
, flags
, 0640, s
->compress
, seal
, metrics
, s
->mmap
, s
->deferred_closes
, NULL
, &f
);
260 r
= journal_file_enable_post_change_timer(f
, s
->event
, POST_CHANGE_TIMER_INTERVAL_USEC
);
262 (void) journal_file_close(f
);
270 static JournalFile
* find_journal(Server
*s
, uid_t uid
) {
271 _cleanup_free_
char *p
= NULL
;
278 /* We split up user logs only on /var, not on /run. If the
279 * runtime file is open, we write to it exclusively, in order
280 * to guarantee proper order as soon as we flush /run to
281 * /var and close the runtime file. */
283 if (s
->runtime_journal
)
284 return s
->runtime_journal
;
286 if (uid
<= SYSTEM_UID_MAX
)
287 return s
->system_journal
;
289 r
= sd_id128_get_machine(&machine
);
291 return s
->system_journal
;
293 f
= ordered_hashmap_get(s
->user_journals
, UID_TO_PTR(uid
));
297 if (asprintf(&p
, "/var/log/journal/" SD_ID128_FORMAT_STR
"/user-"UID_FMT
".journal",
298 SD_ID128_FORMAT_VAL(machine
), uid
) < 0)
299 return s
->system_journal
;
301 while (ordered_hashmap_size(s
->user_journals
) >= USER_JOURNALS_MAX
) {
302 /* Too many open? Then let's close one */
303 f
= ordered_hashmap_steal_first(s
->user_journals
);
305 (void) journal_file_close(f
);
308 r
= open_journal(s
, true, p
, O_RDWR
|O_CREAT
, s
->seal
, &s
->system_metrics
, &f
);
310 return s
->system_journal
;
312 server_add_acls(f
, uid
);
314 r
= ordered_hashmap_put(s
->user_journals
, UID_TO_PTR(uid
), f
);
316 (void) journal_file_close(f
);
317 return s
->system_journal
;
323 static int do_rotate(
336 r
= journal_file_rotate(f
, s
->compress
, seal
, s
->deferred_closes
);
339 log_error_errno(r
, "Failed to rotate %s: %m", (*f
)->path
);
341 log_error_errno(r
, "Failed to create new %s journal: %m", name
);
343 server_add_acls(*f
, uid
);
348 void server_rotate(Server
*s
) {
354 log_debug("Rotating...");
356 (void) do_rotate(s
, &s
->runtime_journal
, "runtime", false, 0);
357 (void) do_rotate(s
, &s
->system_journal
, "system", s
->seal
, 0);
359 ORDERED_HASHMAP_FOREACH_KEY(f
, k
, s
->user_journals
, i
) {
360 r
= do_rotate(s
, &f
, "user", s
->seal
, PTR_TO_UID(k
));
362 ordered_hashmap_replace(s
->user_journals
, k
, f
);
364 /* Old file has been closed and deallocated */
365 ordered_hashmap_remove(s
->user_journals
, k
);
368 /* Perform any deferred closes which aren't still offlining. */
369 SET_FOREACH(f
, s
->deferred_closes
, i
)
370 if (!journal_file_is_offlining(f
)) {
371 (void) set_remove(s
->deferred_closes
, f
);
372 (void) journal_file_close(f
);
376 void server_sync(Server
*s
) {
381 if (s
->system_journal
) {
382 r
= journal_file_set_offline(s
->system_journal
, false);
384 log_warning_errno(r
, "Failed to sync system journal, ignoring: %m");
387 ORDERED_HASHMAP_FOREACH(f
, s
->user_journals
, i
) {
388 r
= journal_file_set_offline(f
, false);
390 log_warning_errno(r
, "Failed to sync user journal, ignoring: %m");
393 if (s
->sync_event_source
) {
394 r
= sd_event_source_set_enabled(s
->sync_event_source
, SD_EVENT_OFF
);
396 log_error_errno(r
, "Failed to disable sync timer source: %m");
399 s
->sync_scheduled
= false;
402 static void do_vacuum(
405 JournalMetrics
*metrics
,
409 bool patch_min_use
) {
423 p
= strjoina(path
, SERVER_MACHINE_ID(s
));
425 limit
= metrics
->max_use
;
426 (void) determine_space_for(s
, metrics
, path
, name
, verbose
, patch_min_use
, NULL
, &limit
);
428 r
= journal_directory_vacuum(p
, limit
, metrics
->n_max_files
, s
->max_retention_usec
, &s
->oldest_file_usec
, verbose
);
429 if (r
< 0 && r
!= -ENOENT
)
430 log_warning_errno(r
, "Failed to vacuum %s, ignoring: %m", p
);
433 int server_vacuum(Server
*s
, bool verbose
, bool patch_min_use
) {
436 log_debug("Vacuuming...");
438 s
->oldest_file_usec
= 0;
440 do_vacuum(s
, s
->system_journal
, &s
->system_metrics
, "/var/log/journal/", "System journal", verbose
, patch_min_use
);
441 do_vacuum(s
, s
->runtime_journal
, &s
->runtime_metrics
, "/run/log/journal/", "Runtime journal", verbose
, patch_min_use
);
443 s
->cached_space_limit
= 0;
444 s
->cached_space_available
= 0;
445 s
->cached_space_timestamp
= 0;
450 static void server_cache_machine_id(Server
*s
) {
456 r
= sd_id128_get_machine(&id
);
460 sd_id128_to_string(id
, stpcpy(s
->machine_id_field
, "_MACHINE_ID="));
463 static void server_cache_boot_id(Server
*s
) {
469 r
= sd_id128_get_boot(&id
);
473 sd_id128_to_string(id
, stpcpy(s
->boot_id_field
, "_BOOT_ID="));
476 static void server_cache_hostname(Server
*s
) {
477 _cleanup_free_
char *t
= NULL
;
482 t
= gethostname_malloc();
486 x
= strappend("_HOSTNAME=", t
);
490 free(s
->hostname_field
);
491 s
->hostname_field
= x
;
494 static bool shall_try_append_again(JournalFile
*f
, int r
) {
496 case -E2BIG
: /* Hit configured limit */
497 case -EFBIG
: /* Hit fs limit */
498 case -EDQUOT
: /* Quota limit hit */
499 case -ENOSPC
: /* Disk full */
500 log_debug("%s: Allocation limit reached, rotating.", f
->path
);
502 case -EIO
: /* I/O error of some kind (mmap) */
503 log_warning("%s: IO error, rotating.", f
->path
);
505 case -EHOSTDOWN
: /* Other machine */
506 log_info("%s: Journal file from other machine, rotating.", f
->path
);
508 case -EBUSY
: /* Unclean shutdown */
509 log_info("%s: Unclean shutdown, rotating.", f
->path
);
511 case -EPROTONOSUPPORT
: /* Unsupported feature */
512 log_info("%s: Unsupported feature, rotating.", f
->path
);
514 case -EBADMSG
: /* Corrupted */
515 case -ENODATA
: /* Truncated */
516 case -ESHUTDOWN
: /* Already archived */
517 log_warning("%s: Journal file corrupted, rotating.", f
->path
);
519 case -EIDRM
: /* Journal file has been deleted */
520 log_warning("%s: Journal file has been deleted, rotating.", f
->path
);
527 static void write_to_journal(Server
*s
, uid_t uid
, struct iovec
*iovec
, unsigned n
, int priority
) {
529 bool vacuumed
= false;
536 f
= find_journal(s
, uid
);
540 if (journal_file_rotate_suggested(f
, s
->max_file_usec
)) {
541 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f
->path
);
543 server_vacuum(s
, false, false);
546 f
= find_journal(s
, uid
);
551 r
= journal_file_append_entry(f
, NULL
, iovec
, n
, &s
->seqnum
, NULL
, NULL
);
553 server_schedule_sync(s
, priority
);
557 if (vacuumed
|| !shall_try_append_again(f
, r
)) {
558 log_error_errno(r
, "Failed to write entry (%d items, %zu bytes), ignoring: %m", n
, IOVEC_TOTAL_SIZE(iovec
, n
));
563 server_vacuum(s
, false, false);
565 f
= find_journal(s
, uid
);
569 log_debug("Retrying write.");
570 r
= journal_file_append_entry(f
, NULL
, iovec
, n
, &s
->seqnum
, NULL
, NULL
);
572 log_error_errno(r
, "Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %m", n
, IOVEC_TOTAL_SIZE(iovec
, n
));
574 server_schedule_sync(s
, priority
);
577 static void dispatch_message_real(
579 struct iovec
*iovec
, unsigned n
, unsigned m
,
580 const struct ucred
*ucred
,
581 const struct timeval
*tv
,
582 const char *label
, size_t label_len
,
587 char pid
[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t
)],
588 uid
[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t
)],
589 gid
[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t
)],
590 owner_uid
[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t
)],
591 source_time
[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t
)],
592 o_uid
[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t
)],
593 o_gid
[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t
)],
594 o_owner_uid
[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t
)];
600 uid_t realuid
= 0, owner
= 0, journal_uid
;
601 bool owner_valid
= false;
603 char audit_session
[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
604 audit_loginuid
[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t
)],
605 o_audit_session
[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
606 o_audit_loginuid
[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t
)];
615 assert(n
+ N_IOVEC_META_FIELDS
+ (object_pid
? N_IOVEC_OBJECT_FIELDS
: 0) <= m
);
618 realuid
= ucred
->uid
;
620 sprintf(pid
, "_PID="PID_FMT
, ucred
->pid
);
621 IOVEC_SET_STRING(iovec
[n
++], pid
);
623 sprintf(uid
, "_UID="UID_FMT
, ucred
->uid
);
624 IOVEC_SET_STRING(iovec
[n
++], uid
);
626 sprintf(gid
, "_GID="GID_FMT
, ucred
->gid
);
627 IOVEC_SET_STRING(iovec
[n
++], gid
);
629 r
= get_process_comm(ucred
->pid
, &t
);
631 x
= strjoina("_COMM=", t
);
633 IOVEC_SET_STRING(iovec
[n
++], x
);
636 r
= get_process_exe(ucred
->pid
, &t
);
638 x
= strjoina("_EXE=", t
);
640 IOVEC_SET_STRING(iovec
[n
++], x
);
643 r
= get_process_cmdline(ucred
->pid
, 0, false, &t
);
645 x
= strjoina("_CMDLINE=", t
);
647 IOVEC_SET_STRING(iovec
[n
++], x
);
650 r
= get_process_capeff(ucred
->pid
, &t
);
652 x
= strjoina("_CAP_EFFECTIVE=", t
);
654 IOVEC_SET_STRING(iovec
[n
++], x
);
658 r
= audit_session_from_pid(ucred
->pid
, &audit
);
660 sprintf(audit_session
, "_AUDIT_SESSION=%"PRIu32
, audit
);
661 IOVEC_SET_STRING(iovec
[n
++], audit_session
);
664 r
= audit_loginuid_from_pid(ucred
->pid
, &loginuid
);
666 sprintf(audit_loginuid
, "_AUDIT_LOGINUID="UID_FMT
, loginuid
);
667 IOVEC_SET_STRING(iovec
[n
++], audit_loginuid
);
671 r
= cg_pid_get_path_shifted(ucred
->pid
, s
->cgroup_root
, &c
);
673 char *session
= NULL
;
675 x
= strjoina("_SYSTEMD_CGROUP=", c
);
676 IOVEC_SET_STRING(iovec
[n
++], x
);
678 r
= cg_path_get_session(c
, &t
);
680 session
= strjoina("_SYSTEMD_SESSION=", t
);
682 IOVEC_SET_STRING(iovec
[n
++], session
);
685 if (cg_path_get_owner_uid(c
, &owner
) >= 0) {
688 sprintf(owner_uid
, "_SYSTEMD_OWNER_UID="UID_FMT
, owner
);
689 IOVEC_SET_STRING(iovec
[n
++], owner_uid
);
692 if (cg_path_get_unit(c
, &t
) >= 0) {
693 x
= strjoina("_SYSTEMD_UNIT=", t
);
695 IOVEC_SET_STRING(iovec
[n
++], x
);
696 } else if (unit_id
&& !session
) {
697 x
= strjoina("_SYSTEMD_UNIT=", unit_id
);
698 IOVEC_SET_STRING(iovec
[n
++], x
);
701 if (cg_path_get_user_unit(c
, &t
) >= 0) {
702 x
= strjoina("_SYSTEMD_USER_UNIT=", t
);
704 IOVEC_SET_STRING(iovec
[n
++], x
);
705 } else if (unit_id
&& session
) {
706 x
= strjoina("_SYSTEMD_USER_UNIT=", unit_id
);
707 IOVEC_SET_STRING(iovec
[n
++], x
);
710 if (cg_path_get_slice(c
, &t
) >= 0) {
711 x
= strjoina("_SYSTEMD_SLICE=", t
);
713 IOVEC_SET_STRING(iovec
[n
++], x
);
717 } else if (unit_id
) {
718 x
= strjoina("_SYSTEMD_UNIT=", unit_id
);
719 IOVEC_SET_STRING(iovec
[n
++], x
);
723 if (mac_selinux_have()) {
725 x
= alloca(strlen("_SELINUX_CONTEXT=") + label_len
+ 1);
727 *((char*) mempcpy(stpcpy(x
, "_SELINUX_CONTEXT="), label
, label_len
)) = 0;
728 IOVEC_SET_STRING(iovec
[n
++], x
);
732 if (getpidcon(ucred
->pid
, &con
) >= 0) {
733 x
= strjoina("_SELINUX_CONTEXT=", con
);
736 IOVEC_SET_STRING(iovec
[n
++], x
);
745 r
= get_process_uid(object_pid
, &object_uid
);
747 sprintf(o_uid
, "OBJECT_UID="UID_FMT
, object_uid
);
748 IOVEC_SET_STRING(iovec
[n
++], o_uid
);
751 r
= get_process_gid(object_pid
, &object_gid
);
753 sprintf(o_gid
, "OBJECT_GID="GID_FMT
, object_gid
);
754 IOVEC_SET_STRING(iovec
[n
++], o_gid
);
757 r
= get_process_comm(object_pid
, &t
);
759 x
= strjoina("OBJECT_COMM=", t
);
761 IOVEC_SET_STRING(iovec
[n
++], x
);
764 r
= get_process_exe(object_pid
, &t
);
766 x
= strjoina("OBJECT_EXE=", t
);
768 IOVEC_SET_STRING(iovec
[n
++], x
);
771 r
= get_process_cmdline(object_pid
, 0, false, &t
);
773 x
= strjoina("OBJECT_CMDLINE=", t
);
775 IOVEC_SET_STRING(iovec
[n
++], x
);
779 r
= audit_session_from_pid(object_pid
, &audit
);
781 sprintf(o_audit_session
, "OBJECT_AUDIT_SESSION=%"PRIu32
, audit
);
782 IOVEC_SET_STRING(iovec
[n
++], o_audit_session
);
785 r
= audit_loginuid_from_pid(object_pid
, &loginuid
);
787 sprintf(o_audit_loginuid
, "OBJECT_AUDIT_LOGINUID="UID_FMT
, loginuid
);
788 IOVEC_SET_STRING(iovec
[n
++], o_audit_loginuid
);
792 r
= cg_pid_get_path_shifted(object_pid
, s
->cgroup_root
, &c
);
794 x
= strjoina("OBJECT_SYSTEMD_CGROUP=", c
);
795 IOVEC_SET_STRING(iovec
[n
++], x
);
797 r
= cg_path_get_session(c
, &t
);
799 x
= strjoina("OBJECT_SYSTEMD_SESSION=", t
);
801 IOVEC_SET_STRING(iovec
[n
++], x
);
804 if (cg_path_get_owner_uid(c
, &owner
) >= 0) {
805 sprintf(o_owner_uid
, "OBJECT_SYSTEMD_OWNER_UID="UID_FMT
, owner
);
806 IOVEC_SET_STRING(iovec
[n
++], o_owner_uid
);
809 if (cg_path_get_unit(c
, &t
) >= 0) {
810 x
= strjoina("OBJECT_SYSTEMD_UNIT=", t
);
812 IOVEC_SET_STRING(iovec
[n
++], x
);
815 if (cg_path_get_user_unit(c
, &t
) >= 0) {
816 x
= strjoina("OBJECT_SYSTEMD_USER_UNIT=", t
);
818 IOVEC_SET_STRING(iovec
[n
++], x
);
827 sprintf(source_time
, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv
));
828 IOVEC_SET_STRING(iovec
[n
++], source_time
);
831 /* Note that strictly speaking storing the boot id here is
832 * redundant since the entry includes this in-line
833 * anyway. However, we need this indexed, too. */
834 if (!isempty(s
->boot_id_field
))
835 IOVEC_SET_STRING(iovec
[n
++], s
->boot_id_field
);
837 if (!isempty(s
->machine_id_field
))
838 IOVEC_SET_STRING(iovec
[n
++], s
->machine_id_field
);
840 if (!isempty(s
->hostname_field
))
841 IOVEC_SET_STRING(iovec
[n
++], s
->hostname_field
);
845 if (s
->split_mode
== SPLIT_UID
&& realuid
> 0)
846 /* Split up strictly by any UID */
847 journal_uid
= realuid
;
848 else if (s
->split_mode
== SPLIT_LOGIN
&& realuid
> 0 && owner_valid
&& owner
> 0)
849 /* Split up by login UIDs. We do this only if the
850 * realuid is not root, in order not to accidentally
851 * leak privileged information to the user that is
852 * logged by a privileged process that is part of an
853 * unprivileged session. */
858 write_to_journal(s
, journal_uid
, iovec
, n
, priority
);
861 void server_driver_message(Server
*s
, sd_id128_t message_id
, const char *format
, ...) {
862 char mid
[11 + 32 + 1];
863 struct iovec iovec
[N_IOVEC_META_FIELDS
+ 5 + N_IOVEC_PAYLOAD_FIELDS
];
867 struct ucred ucred
= {};
872 assert_cc(3 == LOG_FAC(LOG_DAEMON
));
873 IOVEC_SET_STRING(iovec
[n
++], "SYSLOG_FACILITY=3");
874 IOVEC_SET_STRING(iovec
[n
++], "SYSLOG_IDENTIFIER=systemd-journald");
876 IOVEC_SET_STRING(iovec
[n
++], "_TRANSPORT=driver");
877 assert_cc(6 == LOG_INFO
);
878 IOVEC_SET_STRING(iovec
[n
++], "PRIORITY=6");
880 if (!sd_id128_is_null(message_id
)) {
881 snprintf(mid
, sizeof(mid
), LOG_MESSAGE_ID(message_id
));
882 IOVEC_SET_STRING(iovec
[n
++], mid
);
887 va_start(ap
, format
);
888 r
= log_format_iovec(iovec
, ELEMENTSOF(iovec
), &n
, false, 0, format
, ap
);
889 /* Error handling below */
892 ucred
.pid
= getpid();
893 ucred
.uid
= getuid();
894 ucred
.gid
= getgid();
897 dispatch_message_real(s
, iovec
, n
, ELEMENTSOF(iovec
), &ucred
, NULL
, NULL
, 0, NULL
, LOG_INFO
, 0);
900 free(iovec
[m
++].iov_base
);
903 /* We failed to format the message. Emit a warning instead. */
906 xsprintf(buf
, "MESSAGE=Entry printing failed: %s", strerror(-r
));
909 IOVEC_SET_STRING(iovec
[n
++], "PRIORITY=4");
910 IOVEC_SET_STRING(iovec
[n
++], buf
);
911 dispatch_message_real(s
, iovec
, n
, ELEMENTSOF(iovec
), &ucred
, NULL
, NULL
, 0, NULL
, LOG_INFO
, 0);
915 void server_dispatch_message(
917 struct iovec
*iovec
, unsigned n
, unsigned m
,
918 const struct ucred
*ucred
,
919 const struct timeval
*tv
,
920 const char *label
, size_t label_len
,
926 _cleanup_free_
char *path
= NULL
;
927 uint64_t available
= 0;
931 assert(iovec
|| n
== 0);
936 if (LOG_PRI(priority
) > s
->max_level_store
)
939 /* Stop early in case the information will not be stored
941 if (s
->storage
== STORAGE_NONE
)
947 r
= cg_pid_get_path_shifted(ucred
->pid
, s
->cgroup_root
, &path
);
951 /* example: /user/lennart/3/foobar
952 * /system/dbus.service/foobar
954 * So let's cut of everything past the third /, since that is
955 * where user directories start */
957 c
= strchr(path
, '/');
959 c
= strchr(c
+1, '/');
961 c
= strchr(c
+1, '/');
967 (void) determine_space(s
, false, false, &available
, NULL
);
968 rl
= journal_rate_limit_test(s
->rate_limit
, path
, priority
& LOG_PRIMASK
, available
);
972 /* Write a suppression message if we suppressed something */
974 server_driver_message(s
, SD_MESSAGE_JOURNAL_DROPPED
,
975 LOG_MESSAGE("Suppressed %u messages from %s", rl
- 1, path
),
979 dispatch_message_real(s
, iovec
, n
, m
, ucred
, tv
, label
, label_len
, unit_id
, priority
, object_pid
);
983 static int system_journal_open(Server
*s
, bool flush_requested
) {
987 if (!s
->system_journal
&&
988 (s
->storage
== STORAGE_PERSISTENT
|| s
->storage
== STORAGE_AUTO
) &&
990 || access("/run/systemd/journal/flushed", F_OK
) >= 0)) {
992 /* If in auto mode: first try to create the machine
993 * path, but not the prefix.
995 * If in persistent mode: create /var/log/journal and
996 * the machine path */
998 if (s
->storage
== STORAGE_PERSISTENT
)
999 (void) mkdir_p("/var/log/journal/", 0755);
1001 fn
= strjoina("/var/log/journal/", SERVER_MACHINE_ID(s
));
1002 (void) mkdir(fn
, 0755);
1004 fn
= strjoina(fn
, "/system.journal");
1005 r
= open_journal(s
, true, fn
, O_RDWR
|O_CREAT
, s
->seal
, &s
->system_metrics
, &s
->system_journal
);
1007 server_add_acls(s
->system_journal
, 0);
1008 (void) determine_space_for(s
, &s
->system_metrics
, "/var/log/journal/", "System journal", true, true, NULL
, NULL
);
1010 if (r
!= -ENOENT
&& r
!= -EROFS
)
1011 log_warning_errno(r
, "Failed to open system journal: %m");
1017 if (!s
->runtime_journal
&&
1018 (s
->storage
!= STORAGE_NONE
)) {
1020 fn
= strjoina("/run/log/journal/", SERVER_MACHINE_ID(s
), "/system.journal");
1022 if (s
->system_journal
) {
1024 /* Try to open the runtime journal, but only
1025 * if it already exists, so that we can flush
1026 * it into the system journal */
1028 r
= open_journal(s
, false, fn
, O_RDWR
, false, &s
->runtime_metrics
, &s
->runtime_journal
);
1031 log_warning_errno(r
, "Failed to open runtime journal: %m");
1038 /* OK, we really need the runtime journal, so create
1039 * it if necessary. */
1041 (void) mkdir("/run/log", 0755);
1042 (void) mkdir("/run/log/journal", 0755);
1043 (void) mkdir_parents(fn
, 0750);
1045 r
= open_journal(s
, true, fn
, O_RDWR
|O_CREAT
, false, &s
->runtime_metrics
, &s
->runtime_journal
);
1047 return log_error_errno(r
, "Failed to open runtime journal: %m");
1050 if (s
->runtime_journal
) {
1051 server_add_acls(s
->runtime_journal
, 0);
1052 (void) determine_space_for(s
, &s
->runtime_metrics
, "/run/log/journal/", "Runtime journal", true, true, NULL
, NULL
);
1059 int server_flush_to_var(Server
*s
) {
1061 sd_journal
*j
= NULL
;
1062 char ts
[FORMAT_TIMESPAN_MAX
];
1069 if (s
->storage
!= STORAGE_AUTO
&&
1070 s
->storage
!= STORAGE_PERSISTENT
)
1073 if (!s
->runtime_journal
)
1076 (void) system_journal_open(s
, true);
1078 if (!s
->system_journal
)
1081 log_debug("Flushing to /var...");
1083 start
= now(CLOCK_MONOTONIC
);
1085 r
= sd_id128_get_machine(&machine
);
1089 r
= sd_journal_open(&j
, SD_JOURNAL_RUNTIME_ONLY
);
1091 return log_error_errno(r
, "Failed to read runtime journal: %m");
1093 sd_journal_set_data_threshold(j
, 0);
1095 SD_JOURNAL_FOREACH(j
) {
1099 f
= j
->current_file
;
1100 assert(f
&& f
->current_offset
> 0);
1104 r
= journal_file_move_to_object(f
, OBJECT_ENTRY
, f
->current_offset
, &o
);
1106 log_error_errno(r
, "Can't read entry: %m");
1110 r
= journal_file_copy_entry(f
, s
->system_journal
, o
, f
->current_offset
, NULL
, NULL
, NULL
);
1114 if (!shall_try_append_again(s
->system_journal
, r
)) {
1115 log_error_errno(r
, "Can't write entry: %m");
1120 server_vacuum(s
, false, false);
1122 if (!s
->system_journal
) {
1123 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1128 log_debug("Retrying write.");
1129 r
= journal_file_copy_entry(f
, s
->system_journal
, o
, f
->current_offset
, NULL
, NULL
, NULL
);
1131 log_error_errno(r
, "Can't write entry: %m");
1139 journal_file_post_change(s
->system_journal
);
1141 s
->runtime_journal
= journal_file_close(s
->runtime_journal
);
1144 (void) rm_rf("/run/log/journal", REMOVE_ROOT
);
1146 sd_journal_close(j
);
1148 server_driver_message(s
, SD_ID128_NULL
,
1149 LOG_MESSAGE("Time spent on flushing to /var is %s for %u entries.",
1150 format_timespan(ts
, sizeof(ts
), now(CLOCK_MONOTONIC
) - start
, 0),
1157 int server_process_datagram(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1158 Server
*s
= userdata
;
1159 struct ucred
*ucred
= NULL
;
1160 struct timeval
*tv
= NULL
;
1161 struct cmsghdr
*cmsg
;
1163 size_t label_len
= 0, m
;
1166 int *fds
= NULL
, v
= 0;
1170 struct cmsghdr cmsghdr
;
1172 /* We use NAME_MAX space for the SELinux label
1173 * here. The kernel currently enforces no
1174 * limit, but according to suggestions from
1175 * the SELinux people this will change and it
1176 * will probably be identical to NAME_MAX. For
1177 * now we use that, but this should be updated
1178 * one day when the final limit is known. */
1179 uint8_t buf
[CMSG_SPACE(sizeof(struct ucred
)) +
1180 CMSG_SPACE(sizeof(struct timeval
)) +
1181 CMSG_SPACE(sizeof(int)) + /* fd */
1182 CMSG_SPACE(NAME_MAX
)]; /* selinux label */
1185 union sockaddr_union sa
= {};
1187 struct msghdr msghdr
= {
1190 .msg_control
= &control
,
1191 .msg_controllen
= sizeof(control
),
1193 .msg_namelen
= sizeof(sa
),
1197 assert(fd
== s
->native_fd
|| fd
== s
->syslog_fd
|| fd
== s
->audit_fd
);
1199 if (revents
!= EPOLLIN
) {
1200 log_error("Got invalid event from epoll for datagram fd: %"PRIx32
, revents
);
1204 /* Try to get the right size, if we can. (Not all
1205 * sockets support SIOCINQ, hence we just try, but
1206 * don't rely on it. */
1207 (void) ioctl(fd
, SIOCINQ
, &v
);
1209 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1210 m
= PAGE_ALIGN(MAX3((size_t) v
+ 1,
1212 ALIGN(sizeof(struct nlmsghdr
)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH
)) + 1);
1214 if (!GREEDY_REALLOC(s
->buffer
, s
->buffer_size
, m
))
1217 iovec
.iov_base
= s
->buffer
;
1218 iovec
.iov_len
= s
->buffer_size
- 1; /* Leave room for trailing NUL we add later */
1220 n
= recvmsg(fd
, &msghdr
, MSG_DONTWAIT
|MSG_CMSG_CLOEXEC
);
1222 if (errno
== EINTR
|| errno
== EAGAIN
)
1225 return log_error_errno(errno
, "recvmsg() failed: %m");
1228 CMSG_FOREACH(cmsg
, &msghdr
) {
1230 if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1231 cmsg
->cmsg_type
== SCM_CREDENTIALS
&&
1232 cmsg
->cmsg_len
== CMSG_LEN(sizeof(struct ucred
)))
1233 ucred
= (struct ucred
*) CMSG_DATA(cmsg
);
1234 else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1235 cmsg
->cmsg_type
== SCM_SECURITY
) {
1236 label
= (char*) CMSG_DATA(cmsg
);
1237 label_len
= cmsg
->cmsg_len
- CMSG_LEN(0);
1238 } else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1239 cmsg
->cmsg_type
== SO_TIMESTAMP
&&
1240 cmsg
->cmsg_len
== CMSG_LEN(sizeof(struct timeval
)))
1241 tv
= (struct timeval
*) CMSG_DATA(cmsg
);
1242 else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1243 cmsg
->cmsg_type
== SCM_RIGHTS
) {
1244 fds
= (int*) CMSG_DATA(cmsg
);
1245 n_fds
= (cmsg
->cmsg_len
- CMSG_LEN(0)) / sizeof(int);
1249 /* And a trailing NUL, just in case */
1252 if (fd
== s
->syslog_fd
) {
1253 if (n
> 0 && n_fds
== 0)
1254 server_process_syslog_message(s
, strstrip(s
->buffer
), ucred
, tv
, label
, label_len
);
1256 log_warning("Got file descriptors via syslog socket. Ignoring.");
1258 } else if (fd
== s
->native_fd
) {
1259 if (n
> 0 && n_fds
== 0)
1260 server_process_native_message(s
, s
->buffer
, n
, ucred
, tv
, label
, label_len
);
1261 else if (n
== 0 && n_fds
== 1)
1262 server_process_native_file(s
, fds
[0], ucred
, tv
, label
, label_len
);
1264 log_warning("Got too many file descriptors via native socket. Ignoring.");
1267 assert(fd
== s
->audit_fd
);
1269 if (n
> 0 && n_fds
== 0)
1270 server_process_audit_message(s
, s
->buffer
, n
, ucred
, &sa
, msghdr
.msg_namelen
);
1272 log_warning("Got file descriptors via audit socket. Ignoring.");
1275 close_many(fds
, n_fds
);
1279 static int dispatch_sigusr1(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1280 Server
*s
= userdata
;
1285 log_info("Received request to flush runtime journal from PID " PID_FMT
, si
->ssi_pid
);
1287 server_flush_to_var(s
);
1289 server_vacuum(s
, false, false);
1291 r
= touch("/run/systemd/journal/flushed");
1293 log_warning_errno(r
, "Failed to touch /run/systemd/journal/flushed, ignoring: %m");
1298 static int dispatch_sigusr2(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1299 Server
*s
= userdata
;
1304 log_info("Received request to rotate journal from PID " PID_FMT
, si
->ssi_pid
);
1306 server_vacuum(s
, true, true);
1308 /* Let clients know when the most recent rotation happened. */
1309 r
= write_timestamp_file_atomic("/run/systemd/journal/rotated", now(CLOCK_MONOTONIC
));
1311 log_warning_errno(r
, "Failed to write /run/systemd/journal/rotated, ignoring: %m");
1316 static int dispatch_sigterm(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1317 Server
*s
= userdata
;
1321 log_received_signal(LOG_INFO
, si
);
1323 sd_event_exit(s
->event
, 0);
1327 static int dispatch_sigrtmin1(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1328 Server
*s
= userdata
;
1333 log_debug("Received request to sync from PID " PID_FMT
, si
->ssi_pid
);
1337 /* Let clients know when the most recent sync happened. */
1338 r
= write_timestamp_file_atomic("/run/systemd/journal/synced", now(CLOCK_MONOTONIC
));
1340 log_warning_errno(r
, "Failed to write /run/systemd/journal/synced, ignoring: %m");
1345 static int setup_signals(Server
*s
) {
1350 assert(sigprocmask_many(SIG_SETMASK
, NULL
, SIGINT
, SIGTERM
, SIGUSR1
, SIGUSR2
, SIGRTMIN
+1, -1) >= 0);
1352 r
= sd_event_add_signal(s
->event
, &s
->sigusr1_event_source
, SIGUSR1
, dispatch_sigusr1
, s
);
1356 r
= sd_event_add_signal(s
->event
, &s
->sigusr2_event_source
, SIGUSR2
, dispatch_sigusr2
, s
);
1360 r
= sd_event_add_signal(s
->event
, &s
->sigterm_event_source
, SIGTERM
, dispatch_sigterm
, s
);
1364 /* Let's process SIGTERM late, so that we flush all queued
1365 * messages to disk before we exit */
1366 r
= sd_event_source_set_priority(s
->sigterm_event_source
, SD_EVENT_PRIORITY_NORMAL
+20);
1370 /* When journald is invoked on the terminal (when debugging),
1371 * it's useful if C-c is handled equivalent to SIGTERM. */
1372 r
= sd_event_add_signal(s
->event
, &s
->sigint_event_source
, SIGINT
, dispatch_sigterm
, s
);
1376 r
= sd_event_source_set_priority(s
->sigint_event_source
, SD_EVENT_PRIORITY_NORMAL
+20);
1380 /* SIGRTMIN+1 causes an immediate sync. We process this very
1381 * late, so that everything else queued at this point is
1382 * really written to disk. Clients can watch
1383 * /run/systemd/journal/synced with inotify until its mtime
1384 * changes to see when a sync happened. */
1385 r
= sd_event_add_signal(s
->event
, &s
->sigrtmin1_event_source
, SIGRTMIN
+1, dispatch_sigrtmin1
, s
);
1389 r
= sd_event_source_set_priority(s
->sigrtmin1_event_source
, SD_EVENT_PRIORITY_NORMAL
+15);
1396 static int server_parse_proc_cmdline(Server
*s
) {
1397 _cleanup_free_
char *line
= NULL
;
1401 r
= proc_cmdline(&line
);
1403 log_warning_errno(r
, "Failed to read /proc/cmdline, ignoring: %m");
1409 _cleanup_free_
char *word
= NULL
;
1411 r
= extract_first_word(&p
, &word
, NULL
, 0);
1413 return log_error_errno(r
, "Failed to parse journald syntax \"%s\": %m", line
);
1418 if (startswith(word
, "systemd.journald.forward_to_syslog=")) {
1419 r
= parse_boolean(word
+ 35);
1421 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word
+ 35);
1423 s
->forward_to_syslog
= r
;
1424 } else if (startswith(word
, "systemd.journald.forward_to_kmsg=")) {
1425 r
= parse_boolean(word
+ 33);
1427 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word
+ 33);
1429 s
->forward_to_kmsg
= r
;
1430 } else if (startswith(word
, "systemd.journald.forward_to_console=")) {
1431 r
= parse_boolean(word
+ 36);
1433 log_warning("Failed to parse forward to console switch %s. Ignoring.", word
+ 36);
1435 s
->forward_to_console
= r
;
1436 } else if (startswith(word
, "systemd.journald.forward_to_wall=")) {
1437 r
= parse_boolean(word
+ 33);
1439 log_warning("Failed to parse forward to wall switch %s. Ignoring.", word
+ 33);
1441 s
->forward_to_wall
= r
;
1442 } else if (startswith(word
, "systemd.journald"))
1443 log_warning("Invalid systemd.journald parameter. Ignoring.");
1446 /* do not warn about state here, since probably systemd already did */
1450 static int server_parse_config_file(Server
*s
) {
1453 return config_parse_many(PKGSYSCONFDIR
"/journald.conf",
1454 CONF_PATHS_NULSTR("systemd/journald.conf.d"),
1456 config_item_perf_lookup
, journald_gperf_lookup
,
1460 static int server_dispatch_sync(sd_event_source
*es
, usec_t t
, void *userdata
) {
1461 Server
*s
= userdata
;
1469 int server_schedule_sync(Server
*s
, int priority
) {
1474 if (priority
<= LOG_CRIT
) {
1475 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1480 if (s
->sync_scheduled
)
1483 if (s
->sync_interval_usec
> 0) {
1486 r
= sd_event_now(s
->event
, CLOCK_MONOTONIC
, &when
);
1490 when
+= s
->sync_interval_usec
;
1492 if (!s
->sync_event_source
) {
1493 r
= sd_event_add_time(
1495 &s
->sync_event_source
,
1498 server_dispatch_sync
, s
);
1502 r
= sd_event_source_set_priority(s
->sync_event_source
, SD_EVENT_PRIORITY_IMPORTANT
);
1504 r
= sd_event_source_set_time(s
->sync_event_source
, when
);
1508 r
= sd_event_source_set_enabled(s
->sync_event_source
, SD_EVENT_ONESHOT
);
1513 s
->sync_scheduled
= true;
1519 static int dispatch_hostname_change(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1520 Server
*s
= userdata
;
1524 server_cache_hostname(s
);
1528 static int server_open_hostname(Server
*s
) {
1533 s
->hostname_fd
= open("/proc/sys/kernel/hostname", O_RDONLY
|O_CLOEXEC
|O_NDELAY
|O_NOCTTY
);
1534 if (s
->hostname_fd
< 0)
1535 return log_error_errno(errno
, "Failed to open /proc/sys/kernel/hostname: %m");
1537 r
= sd_event_add_io(s
->event
, &s
->hostname_event_source
, s
->hostname_fd
, 0, dispatch_hostname_change
, s
);
1539 /* kernels prior to 3.2 don't support polling this file. Ignore
1542 log_warning_errno(r
, "Failed to register hostname fd in event loop, ignoring: %m");
1543 s
->hostname_fd
= safe_close(s
->hostname_fd
);
1547 return log_error_errno(r
, "Failed to register hostname fd in event loop: %m");
1550 r
= sd_event_source_set_priority(s
->hostname_event_source
, SD_EVENT_PRIORITY_IMPORTANT
-10);
1552 return log_error_errno(r
, "Failed to adjust priority of host name event source: %m");
1557 static int dispatch_notify_event(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1558 Server
*s
= userdata
;
1562 assert(s
->notify_event_source
== es
);
1563 assert(s
->notify_fd
== fd
);
1565 /* The $NOTIFY_SOCKET is writable again, now send exactly one
1566 * message on it. Either it's the wtachdog event, the initial
1567 * READY=1 event or an stdout stream event. If there's nothing
1568 * to write anymore, turn our event source off. The next time
1569 * there's something to send it will be turned on again. */
1571 if (!s
->sent_notify_ready
) {
1572 static const char p
[] =
1574 "STATUS=Processing requests...";
1577 l
= send(s
->notify_fd
, p
, strlen(p
), MSG_DONTWAIT
);
1579 if (errno
== EAGAIN
)
1582 return log_error_errno(errno
, "Failed to send READY=1 notification message: %m");
1585 s
->sent_notify_ready
= true;
1586 log_debug("Sent READY=1 notification.");
1588 } else if (s
->send_watchdog
) {
1590 static const char p
[] =
1595 l
= send(s
->notify_fd
, p
, strlen(p
), MSG_DONTWAIT
);
1597 if (errno
== EAGAIN
)
1600 return log_error_errno(errno
, "Failed to send WATCHDOG=1 notification message: %m");
1603 s
->send_watchdog
= false;
1604 log_debug("Sent WATCHDOG=1 notification.");
1606 } else if (s
->stdout_streams_notify_queue
)
1607 /* Dispatch one stream notification event */
1608 stdout_stream_send_notify(s
->stdout_streams_notify_queue
);
1610 /* Leave us enabled if there's still more to do. */
1611 if (s
->send_watchdog
|| s
->stdout_streams_notify_queue
)
1614 /* There was nothing to do anymore, let's turn ourselves off. */
1615 r
= sd_event_source_set_enabled(es
, SD_EVENT_OFF
);
1617 return log_error_errno(r
, "Failed to turn off notify event source: %m");
1622 static int dispatch_watchdog(sd_event_source
*es
, uint64_t usec
, void *userdata
) {
1623 Server
*s
= userdata
;
1628 s
->send_watchdog
= true;
1630 r
= sd_event_source_set_enabled(s
->notify_event_source
, SD_EVENT_ON
);
1632 log_warning_errno(r
, "Failed to turn on notify event source: %m");
1634 r
= sd_event_source_set_time(s
->watchdog_event_source
, usec
+ s
->watchdog_usec
/ 2);
1636 return log_error_errno(r
, "Failed to restart watchdog event source: %m");
1638 r
= sd_event_source_set_enabled(s
->watchdog_event_source
, SD_EVENT_ON
);
1640 return log_error_errno(r
, "Failed to enable watchdog event source: %m");
1645 static int server_connect_notify(Server
*s
) {
1646 union sockaddr_union sa
= {
1647 .un
.sun_family
= AF_UNIX
,
1653 assert(s
->notify_fd
< 0);
1654 assert(!s
->notify_event_source
);
1657 So here's the problem: we'd like to send notification
1658 messages to PID 1, but we cannot do that via sd_notify(),
1659 since that's synchronous, and we might end up blocking on
1660 it. Specifically: given that PID 1 might block on
1661 dbus-daemon during IPC, and dbus-daemon is logging to us,
1662 and might hence block on us, we might end up in a deadlock
1663 if we block on sending PID 1 notification messages — by
1664 generating a full blocking circle. To avoid this, let's
1665 create a non-blocking socket, and connect it to the
1666 notification socket, and then wait for POLLOUT before we
1667 send anything. This should efficiently avoid any deadlocks,
1668 as we'll never block on PID 1, hence PID 1 can safely block
1669 on dbus-daemon which can safely block on us again.
1671 Don't think that this issue is real? It is, see:
1672 https://github.com/systemd/systemd/issues/1505
1675 e
= getenv("NOTIFY_SOCKET");
1679 if ((e
[0] != '@' && e
[0] != '/') || e
[1] == 0) {
1680 log_error("NOTIFY_SOCKET set to an invalid value: %s", e
);
1684 if (strlen(e
) > sizeof(sa
.un
.sun_path
)) {
1685 log_error("NOTIFY_SOCKET path too long: %s", e
);
1689 s
->notify_fd
= socket(AF_UNIX
, SOCK_DGRAM
|SOCK_CLOEXEC
|SOCK_NONBLOCK
, 0);
1690 if (s
->notify_fd
< 0)
1691 return log_error_errno(errno
, "Failed to create notify socket: %m");
1693 (void) fd_inc_sndbuf(s
->notify_fd
, NOTIFY_SNDBUF_SIZE
);
1695 strncpy(sa
.un
.sun_path
, e
, sizeof(sa
.un
.sun_path
));
1696 if (sa
.un
.sun_path
[0] == '@')
1697 sa
.un
.sun_path
[0] = 0;
1699 r
= connect(s
->notify_fd
, &sa
.sa
, SOCKADDR_UN_LEN(sa
.un
));
1701 return log_error_errno(errno
, "Failed to connect to notify socket: %m");
1703 r
= sd_event_add_io(s
->event
, &s
->notify_event_source
, s
->notify_fd
, EPOLLOUT
, dispatch_notify_event
, s
);
1705 return log_error_errno(r
, "Failed to watch notification socket: %m");
1707 if (sd_watchdog_enabled(false, &s
->watchdog_usec
) > 0) {
1708 s
->send_watchdog
= true;
1710 r
= sd_event_add_time(s
->event
, &s
->watchdog_event_source
, CLOCK_MONOTONIC
, now(CLOCK_MONOTONIC
) + s
->watchdog_usec
/2, s
->watchdog_usec
/4, dispatch_watchdog
, s
);
1712 return log_error_errno(r
, "Failed to add watchdog time event: %m");
1715 /* This should fire pretty soon, which we'll use to send the
1721 int server_init(Server
*s
) {
1722 _cleanup_fdset_free_ FDSet
*fds
= NULL
;
1729 s
->syslog_fd
= s
->native_fd
= s
->stdout_fd
= s
->dev_kmsg_fd
= s
->audit_fd
= s
->hostname_fd
= s
->notify_fd
= -1;
1733 s
->watchdog_usec
= USEC_INFINITY
;
1735 s
->sync_interval_usec
= DEFAULT_SYNC_INTERVAL_USEC
;
1736 s
->sync_scheduled
= false;
1738 s
->rate_limit_interval
= DEFAULT_RATE_LIMIT_INTERVAL
;
1739 s
->rate_limit_burst
= DEFAULT_RATE_LIMIT_BURST
;
1741 s
->forward_to_wall
= true;
1743 s
->max_file_usec
= DEFAULT_MAX_FILE_USEC
;
1745 s
->max_level_store
= LOG_DEBUG
;
1746 s
->max_level_syslog
= LOG_DEBUG
;
1747 s
->max_level_kmsg
= LOG_NOTICE
;
1748 s
->max_level_console
= LOG_INFO
;
1749 s
->max_level_wall
= LOG_EMERG
;
1751 journal_reset_metrics(&s
->system_metrics
);
1752 journal_reset_metrics(&s
->runtime_metrics
);
1754 server_parse_config_file(s
);
1755 server_parse_proc_cmdline(s
);
1757 if (!!s
->rate_limit_interval
^ !!s
->rate_limit_burst
) {
1758 log_debug("Setting both rate limit interval and burst from "USEC_FMT
",%u to 0,0",
1759 s
->rate_limit_interval
, s
->rate_limit_burst
);
1760 s
->rate_limit_interval
= s
->rate_limit_burst
= 0;
1763 (void) mkdir_p("/run/systemd/journal", 0755);
1765 s
->user_journals
= ordered_hashmap_new(NULL
);
1766 if (!s
->user_journals
)
1769 s
->mmap
= mmap_cache_new();
1773 s
->deferred_closes
= set_new(NULL
);
1774 if (!s
->deferred_closes
)
1777 r
= sd_event_default(&s
->event
);
1779 return log_error_errno(r
, "Failed to create event loop: %m");
1781 n
= sd_listen_fds(true);
1783 return log_error_errno(n
, "Failed to read listening file descriptors from environment: %m");
1785 for (fd
= SD_LISTEN_FDS_START
; fd
< SD_LISTEN_FDS_START
+ n
; fd
++) {
1787 if (sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/run/systemd/journal/socket", 0) > 0) {
1789 if (s
->native_fd
>= 0) {
1790 log_error("Too many native sockets passed.");
1796 } else if (sd_is_socket_unix(fd
, SOCK_STREAM
, 1, "/run/systemd/journal/stdout", 0) > 0) {
1798 if (s
->stdout_fd
>= 0) {
1799 log_error("Too many stdout sockets passed.");
1805 } else if (sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/dev/log", 0) > 0 ||
1806 sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/run/systemd/journal/dev-log", 0) > 0) {
1808 if (s
->syslog_fd
>= 0) {
1809 log_error("Too many /dev/log sockets passed.");
1815 } else if (sd_is_socket(fd
, AF_NETLINK
, SOCK_RAW
, -1) > 0) {
1817 if (s
->audit_fd
>= 0) {
1818 log_error("Too many audit sockets passed.");
1832 r
= fdset_put(fds
, fd
);
1838 /* Try to restore streams, but don't bother if this fails */
1839 (void) server_restore_streams(s
, fds
);
1841 if (fdset_size(fds
) > 0) {
1842 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds
));
1843 fds
= fdset_free(fds
);
1846 no_sockets
= s
->native_fd
< 0 && s
->stdout_fd
< 0 && s
->syslog_fd
< 0 && s
->audit_fd
< 0;
1848 /* always open stdout, syslog, native, and kmsg sockets */
1850 /* systemd-journald.socket: /run/systemd/journal/stdout */
1851 r
= server_open_stdout_socket(s
);
1855 /* systemd-journald-dev-log.socket: /run/systemd/journal/dev-log */
1856 r
= server_open_syslog_socket(s
);
1860 /* systemd-journald.socket: /run/systemd/journal/socket */
1861 r
= server_open_native_socket(s
);
1866 r
= server_open_dev_kmsg(s
);
1870 /* Unless we got *some* sockets and not audit, open audit socket */
1871 if (s
->audit_fd
>= 0 || no_sockets
) {
1872 r
= server_open_audit(s
);
1877 r
= server_open_kernel_seqnum(s
);
1881 r
= server_open_hostname(s
);
1885 r
= setup_signals(s
);
1889 s
->udev
= udev_new();
1893 s
->rate_limit
= journal_rate_limit_new(s
->rate_limit_interval
, s
->rate_limit_burst
);
1897 r
= cg_get_root_path(&s
->cgroup_root
);
1901 server_cache_hostname(s
);
1902 server_cache_boot_id(s
);
1903 server_cache_machine_id(s
);
1905 (void) server_connect_notify(s
);
1907 return system_journal_open(s
, false);
1910 void server_maybe_append_tags(Server
*s
) {
1916 n
= now(CLOCK_REALTIME
);
1918 if (s
->system_journal
)
1919 journal_file_maybe_append_tag(s
->system_journal
, n
);
1921 ORDERED_HASHMAP_FOREACH(f
, s
->user_journals
, i
)
1922 journal_file_maybe_append_tag(f
, n
);
1926 void server_done(Server
*s
) {
1930 if (s
->deferred_closes
) {
1931 journal_file_close_set(s
->deferred_closes
);
1932 set_free(s
->deferred_closes
);
1935 while (s
->stdout_streams
)
1936 stdout_stream_free(s
->stdout_streams
);
1938 if (s
->system_journal
)
1939 (void) journal_file_close(s
->system_journal
);
1941 if (s
->runtime_journal
)
1942 (void) journal_file_close(s
->runtime_journal
);
1944 while ((f
= ordered_hashmap_steal_first(s
->user_journals
)))
1945 (void) journal_file_close(f
);
1947 ordered_hashmap_free(s
->user_journals
);
1949 sd_event_source_unref(s
->syslog_event_source
);
1950 sd_event_source_unref(s
->native_event_source
);
1951 sd_event_source_unref(s
->stdout_event_source
);
1952 sd_event_source_unref(s
->dev_kmsg_event_source
);
1953 sd_event_source_unref(s
->audit_event_source
);
1954 sd_event_source_unref(s
->sync_event_source
);
1955 sd_event_source_unref(s
->sigusr1_event_source
);
1956 sd_event_source_unref(s
->sigusr2_event_source
);
1957 sd_event_source_unref(s
->sigterm_event_source
);
1958 sd_event_source_unref(s
->sigint_event_source
);
1959 sd_event_source_unref(s
->sigrtmin1_event_source
);
1960 sd_event_source_unref(s
->hostname_event_source
);
1961 sd_event_source_unref(s
->notify_event_source
);
1962 sd_event_source_unref(s
->watchdog_event_source
);
1963 sd_event_unref(s
->event
);
1965 safe_close(s
->syslog_fd
);
1966 safe_close(s
->native_fd
);
1967 safe_close(s
->stdout_fd
);
1968 safe_close(s
->dev_kmsg_fd
);
1969 safe_close(s
->audit_fd
);
1970 safe_close(s
->hostname_fd
);
1971 safe_close(s
->notify_fd
);
1974 journal_rate_limit_free(s
->rate_limit
);
1976 if (s
->kernel_seqnum
)
1977 munmap(s
->kernel_seqnum
, sizeof(uint64_t));
1981 free(s
->cgroup_root
);
1982 free(s
->hostname_field
);
1985 mmap_cache_unref(s
->mmap
);
1987 udev_unref(s
->udev
);
1990 static const char* const storage_table
[_STORAGE_MAX
] = {
1991 [STORAGE_AUTO
] = "auto",
1992 [STORAGE_VOLATILE
] = "volatile",
1993 [STORAGE_PERSISTENT
] = "persistent",
1994 [STORAGE_NONE
] = "none"
1997 DEFINE_STRING_TABLE_LOOKUP(storage
, Storage
);
1998 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage
, storage
, Storage
, "Failed to parse storage setting");
2000 static const char* const split_mode_table
[_SPLIT_MAX
] = {
2001 [SPLIT_LOGIN
] = "login",
2002 [SPLIT_UID
] = "uid",
2003 [SPLIT_NONE
] = "none",
2006 DEFINE_STRING_TABLE_LOOKUP(split_mode
, SplitMode
);
2007 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode
, split_mode
, SplitMode
, "Failed to parse split mode setting");