1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2011 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/epoll.h>
23 #include <sys/socket.h>
25 #include <sys/signalfd.h>
29 #include <sys/ioctl.h>
30 #include <linux/sockios.h>
31 #include <sys/statvfs.h>
35 #include <systemd/sd-journal.h>
36 #include <systemd/sd-messages.h>
37 #include <systemd/sd-daemon.h>
40 #include <systemd/sd-login.h>
45 #include "journal-file.h"
46 #include "socket-util.h"
47 #include "cgroup-util.h"
51 #include "conf-parser.h"
52 #include "journal-internal.h"
53 #include "journal-vacuum.h"
54 #include "journal-authenticate.h"
56 #include "journald-rate-limit.h"
57 #include "journald-kmsg.h"
58 #include "journald-syslog.h"
59 #include "journald-stream.h"
60 #include "journald-console.h"
61 #include "journald-native.h"
65 #include <acl/libacl.h>
70 #include <selinux/selinux.h>
73 #define USER_JOURNALS_MAX 1024
75 #define DEFAULT_RATE_LIMIT_INTERVAL (10*USEC_PER_SEC)
76 #define DEFAULT_RATE_LIMIT_BURST 200
78 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
80 static const char* const storage_table
[] = {
81 [STORAGE_AUTO
] = "auto",
82 [STORAGE_VOLATILE
] = "volatile",
83 [STORAGE_PERSISTENT
] = "persistent",
84 [STORAGE_NONE
] = "none"
87 DEFINE_STRING_TABLE_LOOKUP(storage
, Storage
);
88 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage
, storage
, Storage
, "Failed to parse storage setting");
90 static uint64_t available_space(Server
*s
) {
95 uint64_t sum
= 0, avail
= 0, ss_avail
= 0;
101 ts
= now(CLOCK_MONOTONIC
);
103 if (s
->cached_available_space_timestamp
+ RECHECK_AVAILABLE_SPACE_USEC
> ts
)
104 return s
->cached_available_space
;
106 r
= sd_id128_get_machine(&machine
);
110 if (s
->system_journal
) {
111 f
= "/var/log/journal/";
112 m
= &s
->system_metrics
;
114 f
= "/run/log/journal/";
115 m
= &s
->runtime_metrics
;
120 p
= strappend(f
, sd_id128_to_string(machine
, ids
));
130 if (fstatvfs(dirfd(d
), &ss
) < 0)
135 struct dirent buf
, *de
;
137 r
= readdir_r(d
, &buf
, &de
);
144 if (!endswith(de
->d_name
, ".journal") &&
145 !endswith(de
->d_name
, ".journal~"))
148 if (fstatat(dirfd(d
), de
->d_name
, &st
, AT_SYMLINK_NOFOLLOW
) < 0)
151 if (!S_ISREG(st
.st_mode
))
154 sum
+= (uint64_t) st
.st_blocks
* 512UL;
157 avail
= sum
>= m
->max_use
? 0 : m
->max_use
- sum
;
159 ss_avail
= ss
.f_bsize
* ss
.f_bavail
;
161 ss_avail
= ss_avail
< m
->keep_free
? 0 : ss_avail
- m
->keep_free
;
163 if (ss_avail
< avail
)
166 s
->cached_available_space
= avail
;
167 s
->cached_available_space_timestamp
= ts
;
175 static void server_read_file_gid(Server
*s
) {
176 const char *adm
= "adm";
181 if (s
->file_gid_valid
)
184 r
= get_group_creds(&adm
, &s
->file_gid
);
186 log_warning("Failed to resolve 'adm' group: %s", strerror(-r
));
188 /* if we couldn't read the gid, then it will be 0, but that's
189 * fine and we shouldn't try to resolve the group again, so
190 * let's just pretend it worked right-away. */
191 s
->file_gid_valid
= true;
194 static void server_fix_perms(Server
*s
, JournalFile
*f
, uid_t uid
) {
199 acl_permset_t permset
;
204 server_read_file_gid(s
);
206 r
= fchmod_and_fchown(f
->fd
, 0640, 0, s
->file_gid
);
208 log_warning("Failed to fix access mode/rights on %s, ignoring: %s", f
->path
, strerror(-r
));
214 acl
= acl_get_fd(f
->fd
);
216 log_warning("Failed to read ACL on %s, ignoring: %m", f
->path
);
220 r
= acl_find_uid(acl
, uid
, &entry
);
223 if (acl_create_entry(&acl
, &entry
) < 0 ||
224 acl_set_tag_type(entry
, ACL_USER
) < 0 ||
225 acl_set_qualifier(entry
, &uid
) < 0) {
226 log_warning("Failed to patch ACL on %s, ignoring: %m", f
->path
);
231 if (acl_get_permset(entry
, &permset
) < 0 ||
232 acl_add_perm(permset
, ACL_READ
) < 0 ||
233 acl_calc_mask(&acl
) < 0) {
234 log_warning("Failed to patch ACL on %s, ignoring: %m", f
->path
);
238 if (acl_set_fd(f
->fd
, acl
) < 0)
239 log_warning("Failed to set ACL on %s, ignoring: %m", f
->path
);
246 static JournalFile
* find_journal(Server
*s
, uid_t uid
) {
254 /* We split up user logs only on /var, not on /run. If the
255 * runtime file is open, we write to it exclusively, in order
256 * to guarantee proper order as soon as we flush /run to
257 * /var and close the runtime file. */
259 if (s
->runtime_journal
)
260 return s
->runtime_journal
;
263 return s
->system_journal
;
265 r
= sd_id128_get_machine(&machine
);
267 return s
->system_journal
;
269 f
= hashmap_get(s
->user_journals
, UINT32_TO_PTR(uid
));
273 if (asprintf(&p
, "/var/log/journal/" SD_ID128_FORMAT_STR
"/user-%lu.journal",
274 SD_ID128_FORMAT_VAL(machine
), (unsigned long) uid
) < 0)
275 return s
->system_journal
;
277 while (hashmap_size(s
->user_journals
) >= USER_JOURNALS_MAX
) {
278 /* Too many open? Then let's close one */
279 f
= hashmap_steal_first(s
->user_journals
);
281 journal_file_close(f
);
284 r
= journal_file_open_reliably(p
, O_RDWR
|O_CREAT
, 0640, s
->compress
, s
->seal
, &s
->system_metrics
, s
->mmap
, s
->system_journal
, &f
);
288 return s
->system_journal
;
290 server_fix_perms(s
, f
, uid
);
292 r
= hashmap_put(s
->user_journals
, UINT32_TO_PTR(uid
), f
);
294 journal_file_close(f
);
295 return s
->system_journal
;
301 static void server_rotate(Server
*s
) {
307 log_debug("Rotating...");
309 if (s
->runtime_journal
) {
310 r
= journal_file_rotate(&s
->runtime_journal
, s
->compress
, false);
312 if (s
->runtime_journal
)
313 log_error("Failed to rotate %s: %s", s
->runtime_journal
->path
, strerror(-r
));
315 log_error("Failed to create new runtime journal: %s", strerror(-r
));
317 server_fix_perms(s
, s
->runtime_journal
, 0);
320 if (s
->system_journal
) {
321 r
= journal_file_rotate(&s
->system_journal
, s
->compress
, s
->seal
);
323 if (s
->system_journal
)
324 log_error("Failed to rotate %s: %s", s
->system_journal
->path
, strerror(-r
));
326 log_error("Failed to create new system journal: %s", strerror(-r
));
329 server_fix_perms(s
, s
->system_journal
, 0);
332 HASHMAP_FOREACH_KEY(f
, k
, s
->user_journals
, i
) {
333 r
= journal_file_rotate(&f
, s
->compress
, s
->seal
);
336 log_error("Failed to rotate %s: %s", f
->path
, strerror(-r
));
338 log_error("Failed to create user journal: %s", strerror(-r
));
340 hashmap_replace(s
->user_journals
, k
, f
);
341 server_fix_perms(s
, s
->system_journal
, PTR_TO_UINT32(k
));
346 static void server_vacuum(Server
*s
) {
352 log_debug("Vacuuming...");
354 r
= sd_id128_get_machine(&machine
);
356 log_error("Failed to get machine ID: %s", strerror(-r
));
360 sd_id128_to_string(machine
, ids
);
362 if (s
->system_journal
) {
363 p
= strappend("/var/log/journal/", ids
);
369 r
= journal_directory_vacuum(p
, s
->system_metrics
.max_use
, s
->system_metrics
.keep_free
);
370 if (r
< 0 && r
!= -ENOENT
)
371 log_error("Failed to vacuum %s: %s", p
, strerror(-r
));
375 if (s
->runtime_journal
) {
376 p
= strappend("/run/log/journal/", ids
);
382 r
= journal_directory_vacuum(p
, s
->runtime_metrics
.max_use
, s
->runtime_metrics
.keep_free
);
383 if (r
< 0 && r
!= -ENOENT
)
384 log_error("Failed to vacuum %s: %s", p
, strerror(-r
));
388 s
->cached_available_space_timestamp
= 0;
391 static char *shortened_cgroup_path(pid_t pid
) {
393 char *process_path
, *init_path
, *path
;
397 r
= cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER
, pid
, &process_path
);
401 r
= cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER
, 1, &init_path
);
407 if (endswith(init_path
, "/system"))
408 init_path
[strlen(init_path
) - 7] = 0;
409 else if (streq(init_path
, "/"))
412 if (startswith(process_path
, init_path
)) {
415 p
= strdup(process_path
+ strlen(init_path
));
433 static void write_to_journal(Server
*s
, uid_t uid
, struct iovec
*iovec
, unsigned n
) {
435 bool vacuumed
= false;
442 f
= find_journal(s
, uid
);
446 if (journal_file_rotate_suggested(f
)) {
447 log_debug("Journal header limits reached or header out-of-date, rotating.");
452 f
= find_journal(s
, uid
);
458 r
= journal_file_append_entry(f
, NULL
, iovec
, n
, &s
->seqnum
, NULL
, NULL
);
463 (r
!= -E2BIG
&& /* hit limit */
464 r
!= -EFBIG
&& /* hit fs limit */
465 r
!= -EDQUOT
&& /* quota hit */
466 r
!= -ENOSPC
&& /* disk full */
467 r
!= -EBADMSG
&& /* corrupted */
468 r
!= -ENODATA
&& /* truncated */
469 r
!= -EHOSTDOWN
&& /* other machine */
470 r
!= -EPROTONOSUPPORT
&& /* unsupported feature */
471 r
!= -EBUSY
&& /* unclean shutdown */
472 r
!= -ESHUTDOWN
/* already archived */)) {
473 log_error("Failed to write entry, ignoring: %s", strerror(-r
));
477 if (r
== -E2BIG
|| r
== -EFBIG
|| r
== EDQUOT
|| r
== ENOSPC
)
478 log_debug("Allocation limit reached, rotating.");
479 else if (r
== -EHOSTDOWN
)
480 log_info("Journal file from other machine, rotating.");
481 else if (r
== -EBUSY
)
482 log_info("Unclean shutdown, rotating.");
484 log_warning("Journal file corrupted, rotating.");
490 f
= find_journal(s
, uid
);
494 log_debug("Retrying write.");
498 static void dispatch_message_real(
500 struct iovec
*iovec
, unsigned n
, unsigned m
,
503 const char *label
, size_t label_len
,
504 const char *unit_id
) {
506 char *pid
= NULL
, *uid
= NULL
, *gid
= NULL
,
507 *source_time
= NULL
, *boot_id
= NULL
, *machine_id
= NULL
,
508 *comm
= NULL
, *cmdline
= NULL
, *hostname
= NULL
,
509 *audit_session
= NULL
, *audit_loginuid
= NULL
,
510 *exe
= NULL
, *cgroup
= NULL
, *session
= NULL
,
511 *owner_uid
= NULL
, *unit
= NULL
, *selinux_context
= NULL
;
517 uid_t loginuid
= 0, realuid
= 0;
522 assert(n
+ N_IOVEC_META_FIELDS
<= m
);
530 realuid
= ucred
->uid
;
532 if (asprintf(&pid
, "_PID=%lu", (unsigned long) ucred
->pid
) >= 0)
533 IOVEC_SET_STRING(iovec
[n
++], pid
);
535 if (asprintf(&uid
, "_UID=%lu", (unsigned long) ucred
->uid
) >= 0)
536 IOVEC_SET_STRING(iovec
[n
++], uid
);
538 if (asprintf(&gid
, "_GID=%lu", (unsigned long) ucred
->gid
) >= 0)
539 IOVEC_SET_STRING(iovec
[n
++], gid
);
541 r
= get_process_comm(ucred
->pid
, &t
);
543 comm
= strappend("_COMM=", t
);
547 IOVEC_SET_STRING(iovec
[n
++], comm
);
550 r
= get_process_exe(ucred
->pid
, &t
);
552 exe
= strappend("_EXE=", t
);
556 IOVEC_SET_STRING(iovec
[n
++], exe
);
559 r
= get_process_cmdline(ucred
->pid
, LINE_MAX
, false, &t
);
561 cmdline
= strappend("_CMDLINE=", t
);
565 IOVEC_SET_STRING(iovec
[n
++], cmdline
);
568 r
= audit_session_from_pid(ucred
->pid
, &audit
);
570 if (asprintf(&audit_session
, "_AUDIT_SESSION=%lu", (unsigned long) audit
) >= 0)
571 IOVEC_SET_STRING(iovec
[n
++], audit_session
);
573 r
= audit_loginuid_from_pid(ucred
->pid
, &loginuid
);
575 if (asprintf(&audit_loginuid
, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid
) >= 0)
576 IOVEC_SET_STRING(iovec
[n
++], audit_loginuid
);
578 t
= shortened_cgroup_path(ucred
->pid
);
580 cgroup
= strappend("_SYSTEMD_CGROUP=", t
);
584 IOVEC_SET_STRING(iovec
[n
++], cgroup
);
588 if (sd_pid_get_session(ucred
->pid
, &t
) >= 0) {
589 session
= strappend("_SYSTEMD_SESSION=", t
);
593 IOVEC_SET_STRING(iovec
[n
++], session
);
596 if (sd_pid_get_owner_uid(ucred
->uid
, &owner
) >= 0)
597 if (asprintf(&owner_uid
, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner
) >= 0)
598 IOVEC_SET_STRING(iovec
[n
++], owner_uid
);
601 if (cg_pid_get_unit(ucred
->pid
, &t
) >= 0) {
602 unit
= strappend("_SYSTEMD_UNIT=", t
);
605 unit
= strappend("_SYSTEMD_UNIT=", unit_id
);
608 IOVEC_SET_STRING(iovec
[n
++], unit
);
612 selinux_context
= malloc(sizeof("_SELINUX_CONTEXT=") + label_len
);
613 if (selinux_context
) {
614 memcpy(selinux_context
, "_SELINUX_CONTEXT=", sizeof("_SELINUX_CONTEXT=")-1);
615 memcpy(selinux_context
+sizeof("_SELINUX_CONTEXT=")-1, label
, label_len
);
616 selinux_context
[sizeof("_SELINUX_CONTEXT=")-1+label_len
] = 0;
617 IOVEC_SET_STRING(iovec
[n
++], selinux_context
);
620 security_context_t con
;
622 if (getpidcon(ucred
->pid
, &con
) >= 0) {
623 selinux_context
= strappend("_SELINUX_CONTEXT=", con
);
625 IOVEC_SET_STRING(iovec
[n
++], selinux_context
);
634 if (asprintf(&source_time
, "_SOURCE_REALTIME_TIMESTAMP=%llu",
635 (unsigned long long) timeval_load(tv
)) >= 0)
636 IOVEC_SET_STRING(iovec
[n
++], source_time
);
639 /* Note that strictly speaking storing the boot id here is
640 * redundant since the entry includes this in-line
641 * anyway. However, we need this indexed, too. */
642 r
= sd_id128_get_boot(&id
);
644 if (asprintf(&boot_id
, "_BOOT_ID=%s", sd_id128_to_string(id
, idbuf
)) >= 0)
645 IOVEC_SET_STRING(iovec
[n
++], boot_id
);
647 r
= sd_id128_get_machine(&id
);
649 if (asprintf(&machine_id
, "_MACHINE_ID=%s", sd_id128_to_string(id
, idbuf
)) >= 0)
650 IOVEC_SET_STRING(iovec
[n
++], machine_id
);
652 t
= gethostname_malloc();
654 hostname
= strappend("_HOSTNAME=", t
);
657 IOVEC_SET_STRING(iovec
[n
++], hostname
);
662 write_to_journal(s
, realuid
== 0 ? 0 : loginuid
, iovec
, n
);
675 free(audit_loginuid
);
680 free(selinux_context
);
683 void server_driver_message(Server
*s
, sd_id128_t message_id
, const char *format
, ...) {
684 char mid
[11 + 32 + 1];
685 char buffer
[16 + LINE_MAX
+ 1];
686 struct iovec iovec
[N_IOVEC_META_FIELDS
+ 4];
694 IOVEC_SET_STRING(iovec
[n
++], "PRIORITY=6");
695 IOVEC_SET_STRING(iovec
[n
++], "_TRANSPORT=driver");
697 memcpy(buffer
, "MESSAGE=", 8);
698 va_start(ap
, format
);
699 vsnprintf(buffer
+ 8, sizeof(buffer
) - 8, format
, ap
);
701 char_array_0(buffer
);
702 IOVEC_SET_STRING(iovec
[n
++], buffer
);
704 snprintf(mid
, sizeof(mid
), "MESSAGE_ID=" SD_ID128_FORMAT_STR
, SD_ID128_FORMAT_VAL(message_id
));
706 IOVEC_SET_STRING(iovec
[n
++], mid
);
709 ucred
.pid
= getpid();
710 ucred
.uid
= getuid();
711 ucred
.gid
= getgid();
713 dispatch_message_real(s
, iovec
, n
, ELEMENTSOF(iovec
), &ucred
, NULL
, NULL
, 0, NULL
);
716 void server_dispatch_message(
718 struct iovec
*iovec
, unsigned n
, unsigned m
,
721 const char *label
, size_t label_len
,
726 char *path
= NULL
, *c
;
729 assert(iovec
|| n
== 0);
734 if (LOG_PRI(priority
) > s
->max_level_store
)
740 path
= shortened_cgroup_path(ucred
->pid
);
744 /* example: /user/lennart/3/foobar
745 * /system/dbus.service/foobar
747 * So let's cut of everything past the third /, since that is
748 * wher user directories start */
750 c
= strchr(path
, '/');
752 c
= strchr(c
+1, '/');
754 c
= strchr(c
+1, '/');
760 rl
= journal_rate_limit_test(s
->rate_limit
, path
, priority
& LOG_PRIMASK
, available_space(s
));
767 /* Write a suppression message if we suppressed something */
769 server_driver_message(s
, SD_MESSAGE_JOURNAL_DROPPED
, "Suppressed %u messages from %s", rl
- 1, path
);
774 dispatch_message_real(s
, iovec
, n
, m
, ucred
, tv
, label
, label_len
, unit_id
);
778 static int system_journal_open(Server
*s
) {
784 r
= sd_id128_get_machine(&machine
);
788 sd_id128_to_string(machine
, ids
);
790 if (!s
->system_journal
&&
791 (s
->storage
== STORAGE_PERSISTENT
|| s
->storage
== STORAGE_AUTO
) &&
792 access("/run/systemd/journal/flushed", F_OK
) >= 0) {
794 /* If in auto mode: first try to create the machine
795 * path, but not the prefix.
797 * If in persistent mode: create /var/log/journal and
798 * the machine path */
800 if (s
->storage
== STORAGE_PERSISTENT
)
801 (void) mkdir("/var/log/journal/", 0755);
803 fn
= strappend("/var/log/journal/", ids
);
807 (void) mkdir(fn
, 0755);
810 fn
= strjoin("/var/log/journal/", ids
, "/system.journal", NULL
);
814 r
= journal_file_open_reliably(fn
, O_RDWR
|O_CREAT
, 0640, s
->compress
, s
->seal
, &s
->system_metrics
, s
->mmap
, NULL
, &s
->system_journal
);
818 server_fix_perms(s
, s
->system_journal
, 0);
821 if (r
!= -ENOENT
&& r
!= -EROFS
)
822 log_warning("Failed to open system journal: %s", strerror(-r
));
828 if (!s
->runtime_journal
&&
829 (s
->storage
!= STORAGE_NONE
)) {
831 fn
= strjoin("/run/log/journal/", ids
, "/system.journal", NULL
);
835 if (s
->system_journal
) {
837 /* Try to open the runtime journal, but only
838 * if it already exists, so that we can flush
839 * it into the system journal */
841 r
= journal_file_open(fn
, O_RDWR
, 0640, s
->compress
, false, &s
->runtime_metrics
, s
->mmap
, NULL
, &s
->runtime_journal
);
846 log_warning("Failed to open runtime journal: %s", strerror(-r
));
853 /* OK, we really need the runtime journal, so create
854 * it if necessary. */
856 (void) mkdir_parents(fn
, 0755);
857 r
= journal_file_open_reliably(fn
, O_RDWR
|O_CREAT
, 0640, s
->compress
, false, &s
->runtime_metrics
, s
->mmap
, NULL
, &s
->runtime_journal
);
861 log_error("Failed to open runtime journal: %s", strerror(-r
));
866 if (s
->runtime_journal
)
867 server_fix_perms(s
, s
->runtime_journal
, 0);
873 static int server_flush_to_var(Server
*s
) {
881 if (s
->storage
!= STORAGE_AUTO
&&
882 s
->storage
!= STORAGE_PERSISTENT
)
885 if (!s
->runtime_journal
)
888 system_journal_open(s
);
890 if (!s
->system_journal
)
893 log_debug("Flushing to /var...");
895 r
= sd_id128_get_machine(&machine
);
897 log_error("Failed to get machine id: %s", strerror(-r
));
901 r
= sd_journal_open(&j
, SD_JOURNAL_RUNTIME_ONLY
);
903 log_error("Failed to read runtime journal: %s", strerror(-r
));
907 SD_JOURNAL_FOREACH(j
) {
911 assert(f
&& f
->current_offset
> 0);
913 r
= journal_file_move_to_object(f
, OBJECT_ENTRY
, f
->current_offset
, &o
);
915 log_error("Can't read entry: %s", strerror(-r
));
919 r
= journal_file_copy_entry(f
, s
->system_journal
, o
, f
->current_offset
, NULL
, NULL
, NULL
);
921 log_debug("Allocation limit reached.");
923 journal_file_post_change(s
->system_journal
);
927 r
= journal_file_copy_entry(f
, s
->system_journal
, o
, f
->current_offset
, NULL
, NULL
, NULL
);
931 log_error("Can't write entry: %s", strerror(-r
));
937 journal_file_post_change(s
->system_journal
);
939 journal_file_close(s
->runtime_journal
);
940 s
->runtime_journal
= NULL
;
943 rm_rf("/run/log/journal", false, true, false);
948 static int process_event(Server
*s
, struct epoll_event
*ev
) {
952 if (ev
->data
.fd
== s
->signal_fd
) {
953 struct signalfd_siginfo sfsi
;
956 if (ev
->events
!= EPOLLIN
) {
957 log_error("Got invalid event from epoll.");
961 n
= read(s
->signal_fd
, &sfsi
, sizeof(sfsi
));
962 if (n
!= sizeof(sfsi
)) {
967 if (errno
== EINTR
|| errno
== EAGAIN
)
973 log_info("Received SIG%s", signal_to_string(sfsi
.ssi_signo
));
975 if (sfsi
.ssi_signo
== SIGUSR1
) {
976 touch("/run/systemd/journal/flushed");
977 server_flush_to_var(s
);
981 if (sfsi
.ssi_signo
== SIGUSR2
) {
989 } else if (ev
->data
.fd
== s
->dev_kmsg_fd
) {
992 if (ev
->events
!= EPOLLIN
) {
993 log_error("Got invalid event from epoll.");
997 r
= server_read_dev_kmsg(s
);
1003 } else if (ev
->data
.fd
== s
->native_fd
||
1004 ev
->data
.fd
== s
->syslog_fd
) {
1006 if (ev
->events
!= EPOLLIN
) {
1007 log_error("Got invalid event from epoll.");
1012 struct msghdr msghdr
;
1014 struct ucred
*ucred
= NULL
;
1015 struct timeval
*tv
= NULL
;
1016 struct cmsghdr
*cmsg
;
1018 size_t label_len
= 0;
1020 struct cmsghdr cmsghdr
;
1022 /* We use NAME_MAX space for the
1023 * SELinux label here. The kernel
1024 * currently enforces no limit, but
1025 * according to suggestions from the
1026 * SELinux people this will change and
1027 * it will probably be identical to
1028 * NAME_MAX. For now we use that, but
1029 * this should be updated one day when
1030 * the final limit is known.*/
1031 uint8_t buf
[CMSG_SPACE(sizeof(struct ucred
)) +
1032 CMSG_SPACE(sizeof(struct timeval
)) +
1033 CMSG_SPACE(sizeof(int)) + /* fd */
1034 CMSG_SPACE(NAME_MAX
)]; /* selinux label */
1041 if (ioctl(ev
->data
.fd
, SIOCINQ
, &v
) < 0) {
1042 log_error("SIOCINQ failed: %m");
1046 if (s
->buffer_size
< (size_t) v
) {
1050 l
= MAX(LINE_MAX
+ (size_t) v
, s
->buffer_size
* 2);
1051 b
= realloc(s
->buffer
, l
+1);
1054 log_error("Couldn't increase buffer.");
1063 iovec
.iov_base
= s
->buffer
;
1064 iovec
.iov_len
= s
->buffer_size
;
1068 msghdr
.msg_iov
= &iovec
;
1069 msghdr
.msg_iovlen
= 1;
1070 msghdr
.msg_control
= &control
;
1071 msghdr
.msg_controllen
= sizeof(control
);
1073 n
= recvmsg(ev
->data
.fd
, &msghdr
, MSG_DONTWAIT
|MSG_CMSG_CLOEXEC
);
1076 if (errno
== EINTR
|| errno
== EAGAIN
)
1079 log_error("recvmsg() failed: %m");
1083 for (cmsg
= CMSG_FIRSTHDR(&msghdr
); cmsg
; cmsg
= CMSG_NXTHDR(&msghdr
, cmsg
)) {
1085 if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1086 cmsg
->cmsg_type
== SCM_CREDENTIALS
&&
1087 cmsg
->cmsg_len
== CMSG_LEN(sizeof(struct ucred
)))
1088 ucred
= (struct ucred
*) CMSG_DATA(cmsg
);
1089 else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1090 cmsg
->cmsg_type
== SCM_SECURITY
) {
1091 label
= (char*) CMSG_DATA(cmsg
);
1092 label_len
= cmsg
->cmsg_len
- CMSG_LEN(0);
1093 } else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1094 cmsg
->cmsg_type
== SO_TIMESTAMP
&&
1095 cmsg
->cmsg_len
== CMSG_LEN(sizeof(struct timeval
)))
1096 tv
= (struct timeval
*) CMSG_DATA(cmsg
);
1097 else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1098 cmsg
->cmsg_type
== SCM_RIGHTS
) {
1099 fds
= (int*) CMSG_DATA(cmsg
);
1100 n_fds
= (cmsg
->cmsg_len
- CMSG_LEN(0)) / sizeof(int);
1104 if (ev
->data
.fd
== s
->syslog_fd
) {
1107 if (n
> 0 && n_fds
== 0) {
1108 e
= memchr(s
->buffer
, '\n', n
);
1114 server_process_syslog_message(s
, strstrip(s
->buffer
), ucred
, tv
, label
, label_len
);
1115 } else if (n_fds
> 0)
1116 log_warning("Got file descriptors via syslog socket. Ignoring.");
1119 if (n
> 0 && n_fds
== 0)
1120 server_process_native_message(s
, s
->buffer
, n
, ucred
, tv
, label
, label_len
);
1121 else if (n
== 0 && n_fds
== 1)
1122 server_process_native_file(s
, fds
[0], ucred
, tv
, label
, label_len
);
1124 log_warning("Got too many file descriptors via native socket. Ignoring.");
1127 close_many(fds
, n_fds
);
1132 } else if (ev
->data
.fd
== s
->stdout_fd
) {
1134 if (ev
->events
!= EPOLLIN
) {
1135 log_error("Got invalid event from epoll.");
1139 stdout_stream_new(s
);
1143 StdoutStream
*stream
;
1145 if ((ev
->events
|EPOLLIN
|EPOLLHUP
) != (EPOLLIN
|EPOLLHUP
)) {
1146 log_error("Got invalid event from epoll.");
1150 /* If it is none of the well-known fds, it must be an
1151 * stdout stream fd. Note that this is a bit ugly here
1152 * (since we rely that none of the well-known fds
1153 * could be interpreted as pointer), but nonetheless
1154 * safe, since the well-known fds would never get an
1155 * fd > 4096, i.e. beyond the first memory page */
1157 stream
= ev
->data
.ptr
;
1159 if (stdout_stream_process(stream
) <= 0)
1160 stdout_stream_free(stream
);
1165 log_error("Unknown event.");
1169 static int open_signalfd(Server
*s
) {
1171 struct epoll_event ev
;
1175 assert_se(sigemptyset(&mask
) == 0);
1176 sigset_add_many(&mask
, SIGINT
, SIGTERM
, SIGUSR1
, SIGUSR2
, -1);
1177 assert_se(sigprocmask(SIG_SETMASK
, &mask
, NULL
) == 0);
1179 s
->signal_fd
= signalfd(-1, &mask
, SFD_NONBLOCK
|SFD_CLOEXEC
);
1180 if (s
->signal_fd
< 0) {
1181 log_error("signalfd(): %m");
1186 ev
.events
= EPOLLIN
;
1187 ev
.data
.fd
= s
->signal_fd
;
1189 if (epoll_ctl(s
->epoll_fd
, EPOLL_CTL_ADD
, s
->signal_fd
, &ev
) < 0) {
1190 log_error("epoll_ctl(): %m");
1197 static int server_parse_proc_cmdline(Server
*s
) {
1198 char *line
, *w
, *state
;
1202 if (detect_container(NULL
) > 0)
1205 r
= read_one_line_file("/proc/cmdline", &line
);
1207 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r
));
1211 FOREACH_WORD_QUOTED(w
, l
, line
, state
) {
1214 word
= strndup(w
, l
);
1220 if (startswith(word
, "systemd.journald.forward_to_syslog=")) {
1221 r
= parse_boolean(word
+ 35);
1223 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word
+ 35);
1225 s
->forward_to_syslog
= r
;
1226 } else if (startswith(word
, "systemd.journald.forward_to_kmsg=")) {
1227 r
= parse_boolean(word
+ 33);
1229 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word
+ 33);
1231 s
->forward_to_kmsg
= r
;
1232 } else if (startswith(word
, "systemd.journald.forward_to_console=")) {
1233 r
= parse_boolean(word
+ 36);
1235 log_warning("Failed to parse forward to console switch %s. Ignoring.", word
+ 36);
1237 s
->forward_to_console
= r
;
1238 } else if (startswith(word
, "systemd.journald"))
1239 log_warning("Invalid systemd.journald parameter. Ignoring.");
1251 static int server_parse_config_file(Server
*s
) {
1258 fn
= "/etc/systemd/journald.conf";
1259 f
= fopen(fn
, "re");
1261 if (errno
== ENOENT
)
1264 log_warning("Failed to open configuration file %s: %m", fn
);
1268 r
= config_parse(fn
, f
, "Journal\0", config_item_perf_lookup
, (void*) journald_gperf_lookup
, false, s
);
1270 log_warning("Failed to parse configuration file: %s", strerror(-r
));
1277 static int server_init(Server
*s
) {
1283 s
->syslog_fd
= s
->native_fd
= s
->stdout_fd
= s
->signal_fd
= s
->epoll_fd
= s
->dev_kmsg_fd
= -1;
1287 s
->rate_limit_interval
= DEFAULT_RATE_LIMIT_INTERVAL
;
1288 s
->rate_limit_burst
= DEFAULT_RATE_LIMIT_BURST
;
1290 s
->forward_to_syslog
= true;
1292 s
->max_level_store
= LOG_DEBUG
;
1293 s
->max_level_syslog
= LOG_DEBUG
;
1294 s
->max_level_kmsg
= LOG_NOTICE
;
1295 s
->max_level_console
= LOG_INFO
;
1297 memset(&s
->system_metrics
, 0xFF, sizeof(s
->system_metrics
));
1298 memset(&s
->runtime_metrics
, 0xFF, sizeof(s
->runtime_metrics
));
1300 server_parse_config_file(s
);
1301 server_parse_proc_cmdline(s
);
1303 mkdir_p("/run/systemd/journal", 0755);
1305 s
->user_journals
= hashmap_new(trivial_hash_func
, trivial_compare_func
);
1306 if (!s
->user_journals
)
1309 s
->mmap
= mmap_cache_new();
1313 s
->epoll_fd
= epoll_create1(EPOLL_CLOEXEC
);
1314 if (s
->epoll_fd
< 0) {
1315 log_error("Failed to create epoll object: %m");
1319 n
= sd_listen_fds(true);
1321 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n
));
1325 for (fd
= SD_LISTEN_FDS_START
; fd
< SD_LISTEN_FDS_START
+ n
; fd
++) {
1327 if (sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/run/systemd/journal/socket", 0) > 0) {
1329 if (s
->native_fd
>= 0) {
1330 log_error("Too many native sockets passed.");
1336 } else if (sd_is_socket_unix(fd
, SOCK_STREAM
, 1, "/run/systemd/journal/stdout", 0) > 0) {
1338 if (s
->stdout_fd
>= 0) {
1339 log_error("Too many stdout sockets passed.");
1345 } else if (sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/dev/log", 0) > 0) {
1347 if (s
->syslog_fd
>= 0) {
1348 log_error("Too many /dev/log sockets passed.");
1355 log_error("Unknown socket passed.");
1360 r
= server_open_syslog_socket(s
);
1364 r
= server_open_native_socket(s
);
1368 r
= server_open_stdout_socket(s
);
1372 r
= server_open_dev_kmsg(s
);
1376 r
= server_open_kernel_seqnum(s
);
1380 r
= open_signalfd(s
);
1384 s
->udev
= udev_new();
1388 s
->rate_limit
= journal_rate_limit_new(s
->rate_limit_interval
, s
->rate_limit_burst
);
1392 r
= system_journal_open(s
);
1399 static void server_maybe_append_tags(Server
*s
) {
1405 n
= now(CLOCK_REALTIME
);
1407 if (s
->system_journal
)
1408 journal_file_maybe_append_tag(s
->system_journal
, n
);
1410 HASHMAP_FOREACH(f
, s
->user_journals
, i
)
1411 journal_file_maybe_append_tag(f
, n
);
1415 static void server_done(Server
*s
) {
1419 while (s
->stdout_streams
)
1420 stdout_stream_free(s
->stdout_streams
);
1422 if (s
->system_journal
)
1423 journal_file_close(s
->system_journal
);
1425 if (s
->runtime_journal
)
1426 journal_file_close(s
->runtime_journal
);
1428 while ((f
= hashmap_steal_first(s
->user_journals
)))
1429 journal_file_close(f
);
1431 hashmap_free(s
->user_journals
);
1433 if (s
->epoll_fd
>= 0)
1434 close_nointr_nofail(s
->epoll_fd
);
1436 if (s
->signal_fd
>= 0)
1437 close_nointr_nofail(s
->signal_fd
);
1439 if (s
->syslog_fd
>= 0)
1440 close_nointr_nofail(s
->syslog_fd
);
1442 if (s
->native_fd
>= 0)
1443 close_nointr_nofail(s
->native_fd
);
1445 if (s
->stdout_fd
>= 0)
1446 close_nointr_nofail(s
->stdout_fd
);
1448 if (s
->dev_kmsg_fd
>= 0)
1449 close_nointr_nofail(s
->dev_kmsg_fd
);
1452 journal_rate_limit_free(s
->rate_limit
);
1454 if (s
->kernel_seqnum
)
1455 munmap(s
->kernel_seqnum
, sizeof(uint64_t));
1461 mmap_cache_unref(s
->mmap
);
1464 udev_unref(s
->udev
);
1467 int main(int argc
, char *argv
[]) {
1471 /* if (getppid() != 1) { */
1472 /* log_error("This program should be invoked by init only."); */
1473 /* return EXIT_FAILURE; */
1477 log_error("This program does not take arguments.");
1478 return EXIT_FAILURE
;
1481 log_set_target(LOG_TARGET_SAFE
);
1482 log_set_facility(LOG_SYSLOG
);
1483 log_set_max_level(LOG_DEBUG
);
1484 log_parse_environment();
1489 r
= server_init(&server
);
1493 server_vacuum(&server
);
1494 server_flush_to_var(&server
);
1495 server_flush_dev_kmsg(&server
);
1497 log_debug("systemd-journald running as pid %lu", (unsigned long) getpid());
1498 server_driver_message(&server
, SD_MESSAGE_JOURNAL_START
, "Journal started");
1502 "STATUS=Processing requests...");
1505 struct epoll_event event
;
1511 if (server
.system_journal
&&
1512 journal_file_next_evolve_usec(server
.system_journal
, &u
)) {
1515 n
= now(CLOCK_REALTIME
);
1520 t
= (int) ((u
- n
+ USEC_PER_MSEC
- 1) / USEC_PER_MSEC
);
1525 r
= epoll_wait(server
.epoll_fd
, &event
, 1, t
);
1531 log_error("epoll_wait() failed: %m");
1537 r
= process_event(&server
, &event
);
1544 server_maybe_append_tags(&server
);
1547 log_debug("systemd-journald stopped as pid %lu", (unsigned long) getpid());
1548 server_driver_message(&server
, SD_MESSAGE_JOURNAL_STOP
, "Journal stopped");
1552 "STATUS=Shutting down...");
1554 server_done(&server
);
1556 return r
< 0 ? EXIT_FAILURE
: EXIT_SUCCESS
;