1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2011 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/signalfd.h>
23 #include <sys/ioctl.h>
24 #include <linux/sockios.h>
25 #include <sys/statvfs.h>
29 #include <systemd/sd-journal.h>
30 #include <systemd/sd-messages.h>
31 #include <systemd/sd-daemon.h>
34 #include <systemd/sd-login.h>
40 #include "journal-file.h"
41 #include "socket-util.h"
42 #include "cgroup-util.h"
46 #include "conf-parser.h"
47 #include "journal-internal.h"
48 #include "journal-vacuum.h"
49 #include "journal-authenticate.h"
50 #include "journald-server.h"
51 #include "journald-rate-limit.h"
52 #include "journald-kmsg.h"
53 #include "journald-syslog.h"
54 #include "journald-stream.h"
55 #include "journald-console.h"
56 #include "journald-native.h"
60 #include <acl/libacl.h>
65 #include <selinux/selinux.h>
68 #define USER_JOURNALS_MAX 1024
70 #define DEFAULT_RATE_LIMIT_INTERVAL (10*USEC_PER_SEC)
71 #define DEFAULT_RATE_LIMIT_BURST 200
73 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
75 static const char* const storage_table
[] = {
76 [STORAGE_AUTO
] = "auto",
77 [STORAGE_VOLATILE
] = "volatile",
78 [STORAGE_PERSISTENT
] = "persistent",
79 [STORAGE_NONE
] = "none"
82 DEFINE_STRING_TABLE_LOOKUP(storage
, Storage
);
83 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage
, storage
, Storage
, "Failed to parse storage setting");
85 static const char* const split_mode_table
[] = {
86 [SPLIT_NONE
] = "none",
88 [SPLIT_LOGIN
] = "login"
91 DEFINE_STRING_TABLE_LOOKUP(split_mode
, SplitMode
);
92 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode
, split_mode
, SplitMode
, "Failed to parse split mode setting");
94 static uint64_t available_space(Server
*s
) {
96 char _cleanup_free_
*p
= NULL
;
100 uint64_t sum
= 0, avail
= 0, ss_avail
= 0;
102 DIR _cleanup_closedir_
*d
= NULL
;
106 ts
= now(CLOCK_MONOTONIC
);
108 if (s
->cached_available_space_timestamp
+ RECHECK_AVAILABLE_SPACE_USEC
> ts
)
109 return s
->cached_available_space
;
111 r
= sd_id128_get_machine(&machine
);
115 if (s
->system_journal
) {
116 f
= "/var/log/journal/";
117 m
= &s
->system_metrics
;
119 f
= "/run/log/journal/";
120 m
= &s
->runtime_metrics
;
125 p
= strappend(f
, sd_id128_to_string(machine
, ids
));
133 if (fstatvfs(dirfd(d
), &ss
) < 0)
139 union dirent_storage buf
;
141 r
= readdir_r(d
, &buf
.de
, &de
);
148 if (!endswith(de
->d_name
, ".journal") &&
149 !endswith(de
->d_name
, ".journal~"))
152 if (fstatat(dirfd(d
), de
->d_name
, &st
, AT_SYMLINK_NOFOLLOW
) < 0)
155 if (!S_ISREG(st
.st_mode
))
158 sum
+= (uint64_t) st
.st_blocks
* 512UL;
161 avail
= sum
>= m
->max_use
? 0 : m
->max_use
- sum
;
163 ss_avail
= ss
.f_bsize
* ss
.f_bavail
;
165 ss_avail
= ss_avail
< m
->keep_free
? 0 : ss_avail
- m
->keep_free
;
167 if (ss_avail
< avail
)
170 s
->cached_available_space
= avail
;
171 s
->cached_available_space_timestamp
= ts
;
176 static void server_read_file_gid(Server
*s
) {
177 const char *adm
= "adm";
182 if (s
->file_gid_valid
)
185 r
= get_group_creds(&adm
, &s
->file_gid
);
187 log_warning("Failed to resolve 'adm' group: %s", strerror(-r
));
189 /* if we couldn't read the gid, then it will be 0, but that's
190 * fine and we shouldn't try to resolve the group again, so
191 * let's just pretend it worked right-away. */
192 s
->file_gid_valid
= true;
195 void server_fix_perms(Server
*s
, JournalFile
*f
, uid_t uid
) {
200 acl_permset_t permset
;
205 server_read_file_gid(s
);
207 r
= fchmod_and_fchown(f
->fd
, 0640, 0, s
->file_gid
);
209 log_warning("Failed to fix access mode/rights on %s, ignoring: %s", f
->path
, strerror(-r
));
215 acl
= acl_get_fd(f
->fd
);
217 log_warning("Failed to read ACL on %s, ignoring: %m", f
->path
);
221 r
= acl_find_uid(acl
, uid
, &entry
);
224 if (acl_create_entry(&acl
, &entry
) < 0 ||
225 acl_set_tag_type(entry
, ACL_USER
) < 0 ||
226 acl_set_qualifier(entry
, &uid
) < 0) {
227 log_warning("Failed to patch ACL on %s, ignoring: %m", f
->path
);
232 if (acl_get_permset(entry
, &permset
) < 0 ||
233 acl_add_perm(permset
, ACL_READ
) < 0 ||
234 acl_calc_mask(&acl
) < 0) {
235 log_warning("Failed to patch ACL on %s, ignoring: %m", f
->path
);
239 if (acl_set_fd(f
->fd
, acl
) < 0)
240 log_warning("Failed to set ACL on %s, ignoring: %m", f
->path
);
247 static JournalFile
* find_journal(Server
*s
, uid_t uid
) {
255 /* We split up user logs only on /var, not on /run. If the
256 * runtime file is open, we write to it exclusively, in order
257 * to guarantee proper order as soon as we flush /run to
258 * /var and close the runtime file. */
260 if (s
->runtime_journal
)
261 return s
->runtime_journal
;
264 return s
->system_journal
;
266 r
= sd_id128_get_machine(&machine
);
268 return s
->system_journal
;
270 f
= hashmap_get(s
->user_journals
, UINT32_TO_PTR(uid
));
274 if (asprintf(&p
, "/var/log/journal/" SD_ID128_FORMAT_STR
"/user-%lu.journal",
275 SD_ID128_FORMAT_VAL(machine
), (unsigned long) uid
) < 0)
276 return s
->system_journal
;
278 while (hashmap_size(s
->user_journals
) >= USER_JOURNALS_MAX
) {
279 /* Too many open? Then let's close one */
280 f
= hashmap_steal_first(s
->user_journals
);
282 journal_file_close(f
);
285 r
= journal_file_open_reliably(p
, O_RDWR
|O_CREAT
, 0640, s
->compress
, s
->seal
, &s
->system_metrics
, s
->mmap
, s
->system_journal
, &f
);
289 return s
->system_journal
;
291 server_fix_perms(s
, f
, uid
);
293 r
= hashmap_put(s
->user_journals
, UINT32_TO_PTR(uid
), f
);
295 journal_file_close(f
);
296 return s
->system_journal
;
302 void server_rotate(Server
*s
) {
308 log_debug("Rotating...");
310 if (s
->runtime_journal
) {
311 r
= journal_file_rotate(&s
->runtime_journal
, s
->compress
, false);
313 if (s
->runtime_journal
)
314 log_error("Failed to rotate %s: %s", s
->runtime_journal
->path
, strerror(-r
));
316 log_error("Failed to create new runtime journal: %s", strerror(-r
));
318 server_fix_perms(s
, s
->runtime_journal
, 0);
321 if (s
->system_journal
) {
322 r
= journal_file_rotate(&s
->system_journal
, s
->compress
, s
->seal
);
324 if (s
->system_journal
)
325 log_error("Failed to rotate %s: %s", s
->system_journal
->path
, strerror(-r
));
327 log_error("Failed to create new system journal: %s", strerror(-r
));
330 server_fix_perms(s
, s
->system_journal
, 0);
333 HASHMAP_FOREACH_KEY(f
, k
, s
->user_journals
, i
) {
334 r
= journal_file_rotate(&f
, s
->compress
, s
->seal
);
337 log_error("Failed to rotate %s: %s", f
->path
, strerror(-r
));
339 log_error("Failed to create user journal: %s", strerror(-r
));
341 hashmap_replace(s
->user_journals
, k
, f
);
342 server_fix_perms(s
, f
, PTR_TO_UINT32(k
));
347 void server_vacuum(Server
*s
) {
353 log_debug("Vacuuming...");
355 s
->oldest_file_usec
= 0;
357 r
= sd_id128_get_machine(&machine
);
359 log_error("Failed to get machine ID: %s", strerror(-r
));
363 sd_id128_to_string(machine
, ids
);
365 if (s
->system_journal
) {
366 p
= strappend("/var/log/journal/", ids
);
372 r
= journal_directory_vacuum(p
, s
->system_metrics
.max_use
, s
->system_metrics
.keep_free
, s
->max_retention_usec
, &s
->oldest_file_usec
);
373 if (r
< 0 && r
!= -ENOENT
)
374 log_error("Failed to vacuum %s: %s", p
, strerror(-r
));
378 if (s
->runtime_journal
) {
379 p
= strappend("/run/log/journal/", ids
);
385 r
= journal_directory_vacuum(p
, s
->runtime_metrics
.max_use
, s
->runtime_metrics
.keep_free
, s
->max_retention_usec
, &s
->oldest_file_usec
);
386 if (r
< 0 && r
!= -ENOENT
)
387 log_error("Failed to vacuum %s: %s", p
, strerror(-r
));
391 s
->cached_available_space_timestamp
= 0;
394 static char *shortened_cgroup_path(pid_t pid
) {
396 char _cleanup_free_
*process_path
= NULL
, *init_path
= NULL
;
401 r
= cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER
, pid
, &process_path
);
405 r
= cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER
, 1, &init_path
);
409 if (endswith(init_path
, "/system"))
410 init_path
[strlen(init_path
) - 7] = 0;
411 else if (streq(init_path
, "/"))
414 if (startswith(process_path
, init_path
)) {
415 path
= strdup(process_path
+ strlen(init_path
));
424 bool shall_try_append_again(JournalFile
*f
, int r
) {
426 /* -E2BIG Hit configured limit
428 -EDQUOT Quota limit hit
430 -EHOSTDOWN Other machine
431 -EBUSY Unclean shutdown
432 -EPROTONOSUPPORT Unsupported feature
435 -ESHUTDOWN Already archived */
437 if (r
== -E2BIG
|| r
== -EFBIG
|| r
== -EDQUOT
|| r
== -ENOSPC
)
438 log_debug("%s: Allocation limit reached, rotating.", f
->path
);
439 else if (r
== -EHOSTDOWN
)
440 log_info("%s: Journal file from other machine, rotating.", f
->path
);
441 else if (r
== -EBUSY
)
442 log_info("%s: Unclean shutdown, rotating.", f
->path
);
443 else if (r
== -EPROTONOSUPPORT
)
444 log_info("%s: Unsupported feature, rotating.", f
->path
);
445 else if (r
== -EBADMSG
|| r
== -ENODATA
|| r
== ESHUTDOWN
)
446 log_warning("%s: Journal file corrupted, rotating.", f
->path
);
453 static void write_to_journal(Server
*s
, uid_t uid
, struct iovec
*iovec
, unsigned n
) {
455 bool vacuumed
= false;
462 f
= find_journal(s
, uid
);
466 if (journal_file_rotate_suggested(f
, s
->max_file_usec
)) {
467 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f
->path
);
472 f
= find_journal(s
, uid
);
477 r
= journal_file_append_entry(f
, NULL
, iovec
, n
, &s
->seqnum
, NULL
, NULL
);
481 if (vacuumed
|| !shall_try_append_again(f
, r
)) {
482 log_error("Failed to write entry, ignoring: %s", strerror(-r
));
489 f
= find_journal(s
, uid
);
493 log_debug("Retrying write.");
494 r
= journal_file_append_entry(f
, NULL
, iovec
, n
, &s
->seqnum
, NULL
, NULL
);
496 log_error("Failed to write entry, ignoring: %s", strerror(-r
));
499 static void dispatch_message_real(
501 struct iovec
*iovec
, unsigned n
, unsigned m
,
504 const char *label
, size_t label_len
,
505 const char *unit_id
) {
507 char _cleanup_free_
*pid
= NULL
, *uid
= NULL
, *gid
= NULL
,
508 *source_time
= NULL
, *boot_id
= NULL
, *machine_id
= NULL
,
509 *comm
= NULL
, *cmdline
= NULL
, *hostname
= NULL
,
510 *audit_session
= NULL
, *audit_loginuid
= NULL
,
511 *exe
= NULL
, *cgroup
= NULL
, *session
= NULL
,
512 *owner_uid
= NULL
, *unit
= NULL
, *selinux_context
= NULL
;
518 uid_t realuid
= 0, owner
= 0, journal_uid
;
519 bool owner_valid
= false;
524 assert(n
+ N_IOVEC_META_FIELDS
<= m
);
530 realuid
= ucred
->uid
;
532 if (asprintf(&pid
, "_PID=%lu", (unsigned long) ucred
->pid
) >= 0)
533 IOVEC_SET_STRING(iovec
[n
++], pid
);
535 if (asprintf(&uid
, "_UID=%lu", (unsigned long) ucred
->uid
) >= 0)
536 IOVEC_SET_STRING(iovec
[n
++], uid
);
538 if (asprintf(&gid
, "_GID=%lu", (unsigned long) ucred
->gid
) >= 0)
539 IOVEC_SET_STRING(iovec
[n
++], gid
);
541 r
= get_process_comm(ucred
->pid
, &t
);
543 comm
= strappend("_COMM=", t
);
547 IOVEC_SET_STRING(iovec
[n
++], comm
);
550 r
= get_process_exe(ucred
->pid
, &t
);
552 exe
= strappend("_EXE=", t
);
556 IOVEC_SET_STRING(iovec
[n
++], exe
);
559 r
= get_process_cmdline(ucred
->pid
, 0, false, &t
);
561 cmdline
= strappend("_CMDLINE=", t
);
565 IOVEC_SET_STRING(iovec
[n
++], cmdline
);
568 r
= audit_session_from_pid(ucred
->pid
, &audit
);
570 if (asprintf(&audit_session
, "_AUDIT_SESSION=%lu", (unsigned long) audit
) >= 0)
571 IOVEC_SET_STRING(iovec
[n
++], audit_session
);
573 r
= audit_loginuid_from_pid(ucred
->pid
, &loginuid
);
575 if (asprintf(&audit_loginuid
, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid
) >= 0)
576 IOVEC_SET_STRING(iovec
[n
++], audit_loginuid
);
578 t
= shortened_cgroup_path(ucred
->pid
);
580 cgroup
= strappend("_SYSTEMD_CGROUP=", t
);
584 IOVEC_SET_STRING(iovec
[n
++], cgroup
);
588 if (sd_pid_get_session(ucred
->pid
, &t
) >= 0) {
589 session
= strappend("_SYSTEMD_SESSION=", t
);
593 IOVEC_SET_STRING(iovec
[n
++], session
);
596 if (sd_pid_get_owner_uid(ucred
->uid
, &owner
) >= 0) {
598 if (asprintf(&owner_uid
, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner
) >= 0)
599 IOVEC_SET_STRING(iovec
[n
++], owner_uid
);
603 if (cg_pid_get_unit(ucred
->pid
, &t
) >= 0) {
604 unit
= strappend("_SYSTEMD_UNIT=", t
);
606 } else if (cg_pid_get_user_unit(ucred
->pid
, &t
) >= 0) {
607 unit
= strappend("_SYSTEMD_USER_UNIT=", t
);
609 } else if (unit_id
) {
611 unit
= strappend("_SYSTEMD_USER_UNIT=", unit_id
);
613 unit
= strappend("_SYSTEMD_UNIT=", unit_id
);
617 IOVEC_SET_STRING(iovec
[n
++], unit
);
621 selinux_context
= malloc(sizeof("_SELINUX_CONTEXT=") + label_len
);
622 if (selinux_context
) {
623 memcpy(selinux_context
, "_SELINUX_CONTEXT=", sizeof("_SELINUX_CONTEXT=")-1);
624 memcpy(selinux_context
+sizeof("_SELINUX_CONTEXT=")-1, label
, label_len
);
625 selinux_context
[sizeof("_SELINUX_CONTEXT=")-1+label_len
] = 0;
626 IOVEC_SET_STRING(iovec
[n
++], selinux_context
);
629 security_context_t con
;
631 if (getpidcon(ucred
->pid
, &con
) >= 0) {
632 selinux_context
= strappend("_SELINUX_CONTEXT=", con
);
634 IOVEC_SET_STRING(iovec
[n
++], selinux_context
);
643 if (asprintf(&source_time
, "_SOURCE_REALTIME_TIMESTAMP=%llu",
644 (unsigned long long) timeval_load(tv
)) >= 0)
645 IOVEC_SET_STRING(iovec
[n
++], source_time
);
648 /* Note that strictly speaking storing the boot id here is
649 * redundant since the entry includes this in-line
650 * anyway. However, we need this indexed, too. */
651 r
= sd_id128_get_boot(&id
);
653 if (asprintf(&boot_id
, "_BOOT_ID=%s", sd_id128_to_string(id
, idbuf
)) >= 0)
654 IOVEC_SET_STRING(iovec
[n
++], boot_id
);
656 r
= sd_id128_get_machine(&id
);
658 if (asprintf(&machine_id
, "_MACHINE_ID=%s", sd_id128_to_string(id
, idbuf
)) >= 0)
659 IOVEC_SET_STRING(iovec
[n
++], machine_id
);
661 t
= gethostname_malloc();
663 hostname
= strappend("_HOSTNAME=", t
);
666 IOVEC_SET_STRING(iovec
[n
++], hostname
);
671 if (s
->split_mode
== SPLIT_UID
&& realuid
> 0)
672 /* Split up strictly by any UID */
673 journal_uid
= realuid
;
674 else if (s
->split_mode
== SPLIT_LOGIN
&& realuid
> 0 && owner_valid
&& owner
> 0)
675 /* Split up by login UIDs, this avoids creation of
676 * individual journals for system UIDs. We do this
677 * only if the realuid is not root, in order not to
678 * accidentally leak privileged information to the
679 * user that is logged by a privileged process that is
680 * part of an unprivileged session.*/
685 write_to_journal(s
, journal_uid
, iovec
, n
);
688 void server_driver_message(Server
*s
, sd_id128_t message_id
, const char *format
, ...) {
689 char mid
[11 + 32 + 1];
690 char buffer
[16 + LINE_MAX
+ 1];
691 struct iovec iovec
[N_IOVEC_META_FIELDS
+ 4];
699 IOVEC_SET_STRING(iovec
[n
++], "PRIORITY=6");
700 IOVEC_SET_STRING(iovec
[n
++], "_TRANSPORT=driver");
702 memcpy(buffer
, "MESSAGE=", 8);
703 va_start(ap
, format
);
704 vsnprintf(buffer
+ 8, sizeof(buffer
) - 8, format
, ap
);
706 char_array_0(buffer
);
707 IOVEC_SET_STRING(iovec
[n
++], buffer
);
709 if (!sd_id128_equal(message_id
, SD_ID128_NULL
)) {
710 snprintf(mid
, sizeof(mid
), MESSAGE_ID(message_id
));
712 IOVEC_SET_STRING(iovec
[n
++], mid
);
716 ucred
.pid
= getpid();
717 ucred
.uid
= getuid();
718 ucred
.gid
= getgid();
720 dispatch_message_real(s
, iovec
, n
, ELEMENTSOF(iovec
), &ucred
, NULL
, NULL
, 0, NULL
);
723 void server_dispatch_message(
725 struct iovec
*iovec
, unsigned n
, unsigned m
,
728 const char *label
, size_t label_len
,
733 char _cleanup_free_
*path
= NULL
;
737 assert(iovec
|| n
== 0);
742 if (LOG_PRI(priority
) > s
->max_level_store
)
748 path
= shortened_cgroup_path(ucred
->pid
);
752 /* example: /user/lennart/3/foobar
753 * /system/dbus.service/foobar
755 * So let's cut of everything past the third /, since that is
756 * where user directories start */
758 c
= strchr(path
, '/');
760 c
= strchr(c
+1, '/');
762 c
= strchr(c
+1, '/');
768 rl
= journal_rate_limit_test(s
->rate_limit
, path
,
769 priority
& LOG_PRIMASK
, available_space(s
));
774 /* Write a suppression message if we suppressed something */
776 server_driver_message(s
, SD_MESSAGE_JOURNAL_DROPPED
,
777 "Suppressed %u messages from %s", rl
- 1, path
);
780 dispatch_message_real(s
, iovec
, n
, m
, ucred
, tv
, label
, label_len
, unit_id
);
784 static int system_journal_open(Server
*s
) {
790 r
= sd_id128_get_machine(&machine
);
794 sd_id128_to_string(machine
, ids
);
796 if (!s
->system_journal
&&
797 (s
->storage
== STORAGE_PERSISTENT
|| s
->storage
== STORAGE_AUTO
) &&
798 access("/run/systemd/journal/flushed", F_OK
) >= 0) {
800 /* If in auto mode: first try to create the machine
801 * path, but not the prefix.
803 * If in persistent mode: create /var/log/journal and
804 * the machine path */
806 if (s
->storage
== STORAGE_PERSISTENT
)
807 (void) mkdir("/var/log/journal/", 0755);
809 fn
= strappend("/var/log/journal/", ids
);
813 (void) mkdir(fn
, 0755);
816 fn
= strjoin("/var/log/journal/", ids
, "/system.journal", NULL
);
820 r
= journal_file_open_reliably(fn
, O_RDWR
|O_CREAT
, 0640, s
->compress
, s
->seal
, &s
->system_metrics
, s
->mmap
, NULL
, &s
->system_journal
);
824 char fb
[FORMAT_BYTES_MAX
];
826 server_fix_perms(s
, s
->system_journal
, 0);
827 server_driver_message(s
, SD_ID128_NULL
, "Allowing system journal files to grow to %s.",
828 format_bytes(fb
, sizeof(fb
), s
->system_metrics
.max_use
));
832 if (r
!= -ENOENT
&& r
!= -EROFS
)
833 log_warning("Failed to open system journal: %s", strerror(-r
));
839 if (!s
->runtime_journal
&&
840 (s
->storage
!= STORAGE_NONE
)) {
842 fn
= strjoin("/run/log/journal/", ids
, "/system.journal", NULL
);
846 if (s
->system_journal
) {
848 /* Try to open the runtime journal, but only
849 * if it already exists, so that we can flush
850 * it into the system journal */
852 r
= journal_file_open(fn
, O_RDWR
, 0640, s
->compress
, false, &s
->runtime_metrics
, s
->mmap
, NULL
, &s
->runtime_journal
);
857 log_warning("Failed to open runtime journal: %s", strerror(-r
));
864 /* OK, we really need the runtime journal, so create
865 * it if necessary. */
867 (void) mkdir_parents(fn
, 0755);
868 r
= journal_file_open_reliably(fn
, O_RDWR
|O_CREAT
, 0640, s
->compress
, false, &s
->runtime_metrics
, s
->mmap
, NULL
, &s
->runtime_journal
);
872 log_error("Failed to open runtime journal: %s", strerror(-r
));
877 if (s
->runtime_journal
) {
878 char fb
[FORMAT_BYTES_MAX
];
880 server_fix_perms(s
, s
->runtime_journal
, 0);
881 server_driver_message(s
, SD_ID128_NULL
, "Allowing runtime journal files to grow to %s.",
882 format_bytes(fb
, sizeof(fb
), s
->runtime_metrics
.max_use
));
889 int server_flush_to_var(Server
*s
) {
892 sd_journal
*j
= NULL
;
896 if (s
->storage
!= STORAGE_AUTO
&&
897 s
->storage
!= STORAGE_PERSISTENT
)
900 if (!s
->runtime_journal
)
903 system_journal_open(s
);
905 if (!s
->system_journal
)
908 log_debug("Flushing to /var...");
910 r
= sd_id128_get_machine(&machine
);
912 log_error("Failed to get machine id: %s", strerror(-r
));
916 r
= sd_journal_open(&j
, SD_JOURNAL_RUNTIME_ONLY
);
918 log_error("Failed to read runtime journal: %s", strerror(-r
));
922 sd_journal_set_data_threshold(j
, 0);
924 SD_JOURNAL_FOREACH(j
) {
929 assert(f
&& f
->current_offset
> 0);
931 r
= journal_file_move_to_object(f
, OBJECT_ENTRY
, f
->current_offset
, &o
);
933 log_error("Can't read entry: %s", strerror(-r
));
937 r
= journal_file_copy_entry(f
, s
->system_journal
, o
, f
->current_offset
, NULL
, NULL
, NULL
);
941 if (!shall_try_append_again(s
->system_journal
, r
)) {
942 log_error("Can't write entry: %s", strerror(-r
));
949 log_debug("Retrying write.");
950 r
= journal_file_copy_entry(f
, s
->system_journal
, o
, f
->current_offset
, NULL
, NULL
, NULL
);
952 log_error("Can't write entry: %s", strerror(-r
));
958 journal_file_post_change(s
->system_journal
);
960 journal_file_close(s
->runtime_journal
);
961 s
->runtime_journal
= NULL
;
964 rm_rf("/run/log/journal", false, true, false);
972 int process_event(Server
*s
, struct epoll_event
*ev
) {
976 if (ev
->data
.fd
== s
->signal_fd
) {
977 struct signalfd_siginfo sfsi
;
980 if (ev
->events
!= EPOLLIN
) {
981 log_error("Got invalid event from epoll.");
985 n
= read(s
->signal_fd
, &sfsi
, sizeof(sfsi
));
986 if (n
!= sizeof(sfsi
)) {
991 if (errno
== EINTR
|| errno
== EAGAIN
)
997 log_info("Received SIG%s", signal_to_string(sfsi
.ssi_signo
));
999 if (sfsi
.ssi_signo
== SIGUSR1
) {
1000 touch("/run/systemd/journal/flushed");
1001 server_flush_to_var(s
);
1005 if (sfsi
.ssi_signo
== SIGUSR2
) {
1013 } else if (ev
->data
.fd
== s
->dev_kmsg_fd
) {
1016 if (ev
->events
!= EPOLLIN
) {
1017 log_error("Got invalid event from epoll.");
1021 r
= server_read_dev_kmsg(s
);
1027 } else if (ev
->data
.fd
== s
->native_fd
||
1028 ev
->data
.fd
== s
->syslog_fd
) {
1030 if (ev
->events
!= EPOLLIN
) {
1031 log_error("Got invalid event from epoll.");
1036 struct msghdr msghdr
;
1038 struct ucred
*ucred
= NULL
;
1039 struct timeval
*tv
= NULL
;
1040 struct cmsghdr
*cmsg
;
1042 size_t label_len
= 0;
1044 struct cmsghdr cmsghdr
;
1046 /* We use NAME_MAX space for the
1047 * SELinux label here. The kernel
1048 * currently enforces no limit, but
1049 * according to suggestions from the
1050 * SELinux people this will change and
1051 * it will probably be identical to
1052 * NAME_MAX. For now we use that, but
1053 * this should be updated one day when
1054 * the final limit is known.*/
1055 uint8_t buf
[CMSG_SPACE(sizeof(struct ucred
)) +
1056 CMSG_SPACE(sizeof(struct timeval
)) +
1057 CMSG_SPACE(sizeof(int)) + /* fd */
1058 CMSG_SPACE(NAME_MAX
)]; /* selinux label */
1065 if (ioctl(ev
->data
.fd
, SIOCINQ
, &v
) < 0) {
1066 log_error("SIOCINQ failed: %m");
1070 if (s
->buffer_size
< (size_t) v
) {
1074 l
= MAX(LINE_MAX
+ (size_t) v
, s
->buffer_size
* 2);
1075 b
= realloc(s
->buffer
, l
+1);
1078 log_error("Couldn't increase buffer.");
1087 iovec
.iov_base
= s
->buffer
;
1088 iovec
.iov_len
= s
->buffer_size
;
1092 msghdr
.msg_iov
= &iovec
;
1093 msghdr
.msg_iovlen
= 1;
1094 msghdr
.msg_control
= &control
;
1095 msghdr
.msg_controllen
= sizeof(control
);
1097 n
= recvmsg(ev
->data
.fd
, &msghdr
, MSG_DONTWAIT
|MSG_CMSG_CLOEXEC
);
1100 if (errno
== EINTR
|| errno
== EAGAIN
)
1103 log_error("recvmsg() failed: %m");
1107 for (cmsg
= CMSG_FIRSTHDR(&msghdr
); cmsg
; cmsg
= CMSG_NXTHDR(&msghdr
, cmsg
)) {
1109 if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1110 cmsg
->cmsg_type
== SCM_CREDENTIALS
&&
1111 cmsg
->cmsg_len
== CMSG_LEN(sizeof(struct ucred
)))
1112 ucred
= (struct ucred
*) CMSG_DATA(cmsg
);
1113 else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1114 cmsg
->cmsg_type
== SCM_SECURITY
) {
1115 label
= (char*) CMSG_DATA(cmsg
);
1116 label_len
= cmsg
->cmsg_len
- CMSG_LEN(0);
1117 } else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1118 cmsg
->cmsg_type
== SO_TIMESTAMP
&&
1119 cmsg
->cmsg_len
== CMSG_LEN(sizeof(struct timeval
)))
1120 tv
= (struct timeval
*) CMSG_DATA(cmsg
);
1121 else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1122 cmsg
->cmsg_type
== SCM_RIGHTS
) {
1123 fds
= (int*) CMSG_DATA(cmsg
);
1124 n_fds
= (cmsg
->cmsg_len
- CMSG_LEN(0)) / sizeof(int);
1128 if (ev
->data
.fd
== s
->syslog_fd
) {
1131 if (n
> 0 && n_fds
== 0) {
1132 e
= memchr(s
->buffer
, '\n', n
);
1138 server_process_syslog_message(s
, strstrip(s
->buffer
), ucred
, tv
, label
, label_len
);
1139 } else if (n_fds
> 0)
1140 log_warning("Got file descriptors via syslog socket. Ignoring.");
1143 if (n
> 0 && n_fds
== 0)
1144 server_process_native_message(s
, s
->buffer
, n
, ucred
, tv
, label
, label_len
);
1145 else if (n
== 0 && n_fds
== 1)
1146 server_process_native_file(s
, fds
[0], ucred
, tv
, label
, label_len
);
1148 log_warning("Got too many file descriptors via native socket. Ignoring.");
1151 close_many(fds
, n_fds
);
1156 } else if (ev
->data
.fd
== s
->stdout_fd
) {
1158 if (ev
->events
!= EPOLLIN
) {
1159 log_error("Got invalid event from epoll.");
1163 stdout_stream_new(s
);
1167 StdoutStream
*stream
;
1169 if ((ev
->events
|EPOLLIN
|EPOLLHUP
) != (EPOLLIN
|EPOLLHUP
)) {
1170 log_error("Got invalid event from epoll.");
1174 /* If it is none of the well-known fds, it must be an
1175 * stdout stream fd. Note that this is a bit ugly here
1176 * (since we rely that none of the well-known fds
1177 * could be interpreted as pointer), but nonetheless
1178 * safe, since the well-known fds would never get an
1179 * fd > 4096, i.e. beyond the first memory page */
1181 stream
= ev
->data
.ptr
;
1183 if (stdout_stream_process(stream
) <= 0)
1184 stdout_stream_free(stream
);
1189 log_error("Unknown event.");
1193 static int open_signalfd(Server
*s
) {
1195 struct epoll_event ev
;
1199 assert_se(sigemptyset(&mask
) == 0);
1200 sigset_add_many(&mask
, SIGINT
, SIGTERM
, SIGUSR1
, SIGUSR2
, -1);
1201 assert_se(sigprocmask(SIG_SETMASK
, &mask
, NULL
) == 0);
1203 s
->signal_fd
= signalfd(-1, &mask
, SFD_NONBLOCK
|SFD_CLOEXEC
);
1204 if (s
->signal_fd
< 0) {
1205 log_error("signalfd(): %m");
1210 ev
.events
= EPOLLIN
;
1211 ev
.data
.fd
= s
->signal_fd
;
1213 if (epoll_ctl(s
->epoll_fd
, EPOLL_CTL_ADD
, s
->signal_fd
, &ev
) < 0) {
1214 log_error("epoll_ctl(): %m");
1221 static int server_parse_proc_cmdline(Server
*s
) {
1222 char _cleanup_free_
*line
= NULL
;
1227 if (detect_container(NULL
) > 0)
1230 r
= read_one_line_file("/proc/cmdline", &line
);
1232 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r
));
1236 FOREACH_WORD_QUOTED(w
, l
, line
, state
) {
1237 char _cleanup_free_
*word
;
1239 word
= strndup(w
, l
);
1243 if (startswith(word
, "systemd.journald.forward_to_syslog=")) {
1244 r
= parse_boolean(word
+ 35);
1246 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word
+ 35);
1248 s
->forward_to_syslog
= r
;
1249 } else if (startswith(word
, "systemd.journald.forward_to_kmsg=")) {
1250 r
= parse_boolean(word
+ 33);
1252 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word
+ 33);
1254 s
->forward_to_kmsg
= r
;
1255 } else if (startswith(word
, "systemd.journald.forward_to_console=")) {
1256 r
= parse_boolean(word
+ 36);
1258 log_warning("Failed to parse forward to console switch %s. Ignoring.", word
+ 36);
1260 s
->forward_to_console
= r
;
1261 } else if (startswith(word
, "systemd.journald"))
1262 log_warning("Invalid systemd.journald parameter. Ignoring.");
1268 static int server_parse_config_file(Server
*s
) {
1269 static const char *fn
= "/etc/systemd/journald.conf";
1270 FILE _cleanup_fclose_
*f
= NULL
;
1275 f
= fopen(fn
, "re");
1277 if (errno
== ENOENT
)
1280 log_warning("Failed to open configuration file %s: %m", fn
);
1284 r
= config_parse(fn
, f
, "Journal\0", config_item_perf_lookup
,
1285 (void*) journald_gperf_lookup
, false, s
);
1287 log_warning("Failed to parse configuration file: %s", strerror(-r
));
1292 int server_init(Server
*s
) {
1298 s
->syslog_fd
= s
->native_fd
= s
->stdout_fd
= s
->signal_fd
= s
->epoll_fd
= s
->dev_kmsg_fd
= -1;
1302 s
->rate_limit_interval
= DEFAULT_RATE_LIMIT_INTERVAL
;
1303 s
->rate_limit_burst
= DEFAULT_RATE_LIMIT_BURST
;
1305 s
->forward_to_syslog
= true;
1307 s
->max_level_store
= LOG_DEBUG
;
1308 s
->max_level_syslog
= LOG_DEBUG
;
1309 s
->max_level_kmsg
= LOG_NOTICE
;
1310 s
->max_level_console
= LOG_INFO
;
1312 memset(&s
->system_metrics
, 0xFF, sizeof(s
->system_metrics
));
1313 memset(&s
->runtime_metrics
, 0xFF, sizeof(s
->runtime_metrics
));
1315 server_parse_config_file(s
);
1316 server_parse_proc_cmdline(s
);
1317 if (!!s
->rate_limit_interval
^ !!s
->rate_limit_burst
) {
1318 log_debug("Setting both rate limit interval and burst from %llu,%u to 0,0",
1319 (long long unsigned) s
->rate_limit_interval
,
1320 s
->rate_limit_burst
);
1321 s
->rate_limit_interval
= s
->rate_limit_burst
= 0;
1324 mkdir_p("/run/systemd/journal", 0755);
1326 s
->user_journals
= hashmap_new(trivial_hash_func
, trivial_compare_func
);
1327 if (!s
->user_journals
)
1330 s
->mmap
= mmap_cache_new();
1334 s
->epoll_fd
= epoll_create1(EPOLL_CLOEXEC
);
1335 if (s
->epoll_fd
< 0) {
1336 log_error("Failed to create epoll object: %m");
1340 n
= sd_listen_fds(true);
1342 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n
));
1346 for (fd
= SD_LISTEN_FDS_START
; fd
< SD_LISTEN_FDS_START
+ n
; fd
++) {
1348 if (sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/run/systemd/journal/socket", 0) > 0) {
1350 if (s
->native_fd
>= 0) {
1351 log_error("Too many native sockets passed.");
1357 } else if (sd_is_socket_unix(fd
, SOCK_STREAM
, 1, "/run/systemd/journal/stdout", 0) > 0) {
1359 if (s
->stdout_fd
>= 0) {
1360 log_error("Too many stdout sockets passed.");
1366 } else if (sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/dev/log", 0) > 0) {
1368 if (s
->syslog_fd
>= 0) {
1369 log_error("Too many /dev/log sockets passed.");
1376 log_error("Unknown socket passed.");
1381 r
= server_open_syslog_socket(s
);
1385 r
= server_open_native_socket(s
);
1389 r
= server_open_stdout_socket(s
);
1393 r
= server_open_dev_kmsg(s
);
1397 r
= server_open_kernel_seqnum(s
);
1401 r
= open_signalfd(s
);
1405 s
->udev
= udev_new();
1409 s
->rate_limit
= journal_rate_limit_new(s
->rate_limit_interval
,
1410 s
->rate_limit_burst
);
1414 r
= system_journal_open(s
);
1421 void server_maybe_append_tags(Server
*s
) {
1427 n
= now(CLOCK_REALTIME
);
1429 if (s
->system_journal
)
1430 journal_file_maybe_append_tag(s
->system_journal
, n
);
1432 HASHMAP_FOREACH(f
, s
->user_journals
, i
)
1433 journal_file_maybe_append_tag(f
, n
);
1437 void server_done(Server
*s
) {
1441 while (s
->stdout_streams
)
1442 stdout_stream_free(s
->stdout_streams
);
1444 if (s
->system_journal
)
1445 journal_file_close(s
->system_journal
);
1447 if (s
->runtime_journal
)
1448 journal_file_close(s
->runtime_journal
);
1450 while ((f
= hashmap_steal_first(s
->user_journals
)))
1451 journal_file_close(f
);
1453 hashmap_free(s
->user_journals
);
1455 if (s
->epoll_fd
>= 0)
1456 close_nointr_nofail(s
->epoll_fd
);
1458 if (s
->signal_fd
>= 0)
1459 close_nointr_nofail(s
->signal_fd
);
1461 if (s
->syslog_fd
>= 0)
1462 close_nointr_nofail(s
->syslog_fd
);
1464 if (s
->native_fd
>= 0)
1465 close_nointr_nofail(s
->native_fd
);
1467 if (s
->stdout_fd
>= 0)
1468 close_nointr_nofail(s
->stdout_fd
);
1470 if (s
->dev_kmsg_fd
>= 0)
1471 close_nointr_nofail(s
->dev_kmsg_fd
);
1474 journal_rate_limit_free(s
->rate_limit
);
1476 if (s
->kernel_seqnum
)
1477 munmap(s
->kernel_seqnum
, sizeof(uint64_t));
1483 mmap_cache_unref(s
->mmap
);
1486 udev_unref(s
->udev
);