1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2011 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/epoll.h>
23 #include <sys/socket.h>
25 #include <sys/signalfd.h>
29 #include <acl/libacl.h>
31 #include <sys/ioctl.h>
32 #include <linux/sockios.h>
33 #include <sys/statvfs.h>
36 #include "journal-file.h"
37 #include "sd-daemon.h"
38 #include "socket-util.h"
40 #include "cgroup-util.h"
42 #include "journal-rate-limit.h"
43 #include "sd-journal.h"
44 #include "journal-internal.h"
46 #define USER_JOURNALS_MAX 1024
47 #define STDOUT_STREAMS_MAX 4096
49 #define DEFAULT_RATE_LIMIT_INTERVAL (10*USEC_PER_SEC)
50 #define DEFAULT_RATE_LIMIT_BURST 200
52 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
54 #define RECHECK_VAR_AVAILABLE_USEC (30*USEC_PER_SEC)
56 #define SYSLOG_TIMEOUT_USEC (5*USEC_PER_SEC)
58 typedef struct StdoutStream StdoutStream
;
60 typedef struct Server
{
67 JournalFile
*runtime_journal
;
68 JournalFile
*system_journal
;
69 Hashmap
*user_journals
;
76 JournalRateLimit
*rate_limit
;
78 JournalMetrics metrics
;
81 uint64_t cached_available_space
;
82 usec_t cached_available_space_timestamp
;
84 uint64_t var_available_timestamp
;
86 LIST_HEAD(StdoutStream
, stdout_streams
);
87 unsigned n_stdout_streams
;
90 typedef enum StdoutStreamState
{
92 STDOUT_STREAM_PRIORITY
,
93 STDOUT_STREAM_PRIORITY_PREFIX
,
94 STDOUT_STREAM_TEE_CONSOLE
,
100 StdoutStreamState state
;
108 bool priority_prefix
:1;
111 char buffer
[LINE_MAX
+1];
114 LIST_FIELDS(StdoutStream
, stdout_stream
);
117 static int server_flush_to_var(Server
*s
);
119 static uint64_t available_space(Server
*s
) {
125 uint64_t sum
= 0, avail
= 0, ss_avail
= 0;
128 usec_t ts
= now(CLOCK_MONOTONIC
);
130 if (s
->cached_available_space_timestamp
+ RECHECK_AVAILABLE_SPACE_USEC
> ts
)
131 return s
->cached_available_space
;
133 r
= sd_id128_get_machine(&machine
);
137 if (s
->system_journal
)
138 f
= "/var/log/journal/";
140 f
= "/run/log/journal/";
142 p
= strappend(f
, sd_id128_to_string(machine
, ids
));
152 if (fstatvfs(dirfd(d
), &ss
) < 0)
157 struct dirent buf
, *de
;
160 k
= readdir_r(d
, &buf
, &de
);
169 if (!dirent_is_file_with_suffix(de
, ".journal"))
172 if (fstatat(dirfd(d
), de
->d_name
, &st
, AT_SYMLINK_NOFOLLOW
) < 0)
175 sum
+= (uint64_t) st
.st_blocks
* (uint64_t) st
.st_blksize
;
178 avail
= sum
>= s
->metrics
.max_use
? 0 : s
->metrics
.max_use
- sum
;
180 ss_avail
= ss
.f_bsize
* ss
.f_bavail
;
182 ss_avail
= ss_avail
< s
->metrics
.keep_free
? 0 : ss_avail
- s
->metrics
.keep_free
;
184 if (ss_avail
< avail
)
187 s
->cached_available_space
= avail
;
188 s
->cached_available_space_timestamp
= ts
;
196 static void fix_perms(JournalFile
*f
, uid_t uid
) {
199 acl_permset_t permset
;
204 r
= fchmod_and_fchown(f
->fd
, 0640, 0, 0);
206 log_warning("Failed to fix access mode/rights on %s, ignoring: %s", f
->path
, strerror(-r
));
211 acl
= acl_get_fd(f
->fd
);
213 log_warning("Failed to read ACL on %s, ignoring: %m", f
->path
);
217 r
= acl_find_uid(acl
, uid
, &entry
);
220 if (acl_create_entry(&acl
, &entry
) < 0 ||
221 acl_set_tag_type(entry
, ACL_USER
) < 0 ||
222 acl_set_qualifier(entry
, &uid
) < 0) {
223 log_warning("Failed to patch ACL on %s, ignoring: %m", f
->path
);
228 if (acl_get_permset(entry
, &permset
) < 0 ||
229 acl_add_perm(permset
, ACL_READ
) < 0 ||
230 acl_calc_mask(&acl
) < 0) {
231 log_warning("Failed to patch ACL on %s, ignoring: %m", f
->path
);
235 if (acl_set_fd(f
->fd
, acl
) < 0)
236 log_warning("Failed to set ACL on %s, ignoring: %m", f
->path
);
242 static JournalFile
* find_journal(Server
*s
, uid_t uid
) {
251 /* We split up user logs only on /var, not on /run. If the
252 * runtime file is open, we write to it exclusively, in order
253 * to guarantee proper order as soon as we flush /run to
254 * /var and close the runtime file. */
256 if (s
->runtime_journal
)
257 return s
->runtime_journal
;
260 return s
->system_journal
;
262 r
= sd_id128_get_machine(&machine
);
264 return s
->system_journal
;
266 f
= hashmap_get(s
->user_journals
, UINT32_TO_PTR(uid
));
270 if (asprintf(&p
, "/var/log/journal/%s/user-%lu.journal", sd_id128_to_string(machine
, ids
), (unsigned long) uid
) < 0)
271 return s
->system_journal
;
273 while (hashmap_size(s
->user_journals
) >= USER_JOURNALS_MAX
) {
274 /* Too many open? Then let's close one */
275 f
= hashmap_steal_first(s
->user_journals
);
277 journal_file_close(f
);
280 r
= journal_file_open(p
, O_RDWR
|O_CREAT
, 0640, s
->system_journal
, &f
);
284 return s
->system_journal
;
287 f
->metrics
= s
->metrics
;
288 f
->compress
= s
->compress
;
290 r
= hashmap_put(s
->user_journals
, UINT32_TO_PTR(uid
), f
);
292 journal_file_close(f
);
293 return s
->system_journal
;
299 static void server_vacuum(Server
*s
) {
308 log_info("Rotating...");
310 if (s
->runtime_journal
) {
311 r
= journal_file_rotate(&s
->runtime_journal
);
313 log_error("Failed to rotate %s: %s", s
->runtime_journal
->path
, strerror(-r
));
316 if (s
->system_journal
) {
317 r
= journal_file_rotate(&s
->system_journal
);
319 log_error("Failed to rotate %s: %s", s
->system_journal
->path
, strerror(-r
));
322 HASHMAP_FOREACH_KEY(f
, k
, s
->user_journals
, i
) {
323 r
= journal_file_rotate(&f
);
325 log_error("Failed to rotate %s: %s", f
->path
, strerror(-r
));
327 hashmap_replace(s
->user_journals
, k
, f
);
330 log_info("Vacuuming...");
332 r
= sd_id128_get_machine(&machine
);
334 log_error("Failed to get machine ID: %s", strerror(-r
));
338 if (asprintf(&p
, "/var/log/journal/%s", sd_id128_to_string(machine
, ids
)) < 0) {
339 log_error("Out of memory.");
343 r
= journal_directory_vacuum(p
, s
->metrics
.max_use
, s
->metrics
.keep_free
);
344 if (r
< 0 && r
!= -ENOENT
)
345 log_error("Failed to vacuum %s: %s", p
, strerror(-r
));
348 if (asprintf(&p
, "/run/log/journal/%s", ids
) < 0) {
349 log_error("Out of memory.");
353 r
= journal_directory_vacuum(p
, s
->metrics
.max_use
, s
->metrics
.keep_free
);
354 if (r
< 0 && r
!= -ENOENT
)
355 log_error("Failed to vacuum %s: %s", p
, strerror(-r
));
358 s
->cached_available_space_timestamp
= 0;
361 static char *shortened_cgroup_path(pid_t pid
) {
363 char *process_path
, *init_path
, *path
;
367 r
= cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER
, pid
, &process_path
);
371 r
= cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER
, 1, &init_path
);
377 if (streq(init_path
, "/"))
380 if (startswith(process_path
, init_path
)) {
383 p
= strdup(process_path
+ strlen(init_path
));
401 static void dispatch_message_real(Server
*s
,
402 struct iovec
*iovec
, unsigned n
, unsigned m
,
404 struct timeval
*tv
) {
406 char *pid
= NULL
, *uid
= NULL
, *gid
= NULL
,
407 *source_time
= NULL
, *boot_id
= NULL
, *machine_id
= NULL
,
408 *comm
= NULL
, *cmdline
= NULL
, *hostname
= NULL
,
409 *audit_session
= NULL
, *audit_loginuid
= NULL
,
410 *exe
= NULL
, *cgroup
= NULL
;
416 uid_t loginuid
= 0, realuid
= 0;
418 bool vacuumed
= false;
429 realuid
= ucred
->uid
;
431 if (asprintf(&pid
, "_PID=%lu", (unsigned long) ucred
->pid
) >= 0)
432 IOVEC_SET_STRING(iovec
[n
++], pid
);
434 if (asprintf(&uid
, "_UID=%lu", (unsigned long) ucred
->uid
) >= 0)
435 IOVEC_SET_STRING(iovec
[n
++], uid
);
437 if (asprintf(&gid
, "_GID=%lu", (unsigned long) ucred
->gid
) >= 0)
438 IOVEC_SET_STRING(iovec
[n
++], gid
);
440 r
= get_process_comm(ucred
->pid
, &t
);
442 comm
= strappend("_COMM=", t
);
444 IOVEC_SET_STRING(iovec
[n
++], comm
);
448 r
= get_process_exe(ucred
->pid
, &t
);
450 exe
= strappend("_EXE=", t
);
452 IOVEC_SET_STRING(iovec
[n
++], exe
);
456 r
= get_process_cmdline(ucred
->pid
, LINE_MAX
, false, &t
);
458 cmdline
= strappend("_CMDLINE=", t
);
460 IOVEC_SET_STRING(iovec
[n
++], cmdline
);
464 r
= audit_session_from_pid(ucred
->pid
, &session
);
466 if (asprintf(&audit_session
, "_AUDIT_SESSION=%lu", (unsigned long) session
) >= 0)
467 IOVEC_SET_STRING(iovec
[n
++], audit_session
);
469 r
= audit_loginuid_from_pid(ucred
->pid
, &loginuid
);
471 if (asprintf(&audit_loginuid
, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid
) >= 0)
472 IOVEC_SET_STRING(iovec
[n
++], audit_loginuid
);
474 path
= shortened_cgroup_path(ucred
->pid
);
476 cgroup
= strappend("_SYSTEMD_CGROUP=", path
);
478 IOVEC_SET_STRING(iovec
[n
++], cgroup
);
485 if (asprintf(&source_time
, "_SOURCE_REALTIME_TIMESTAMP=%llu",
486 (unsigned long long) timeval_load(tv
)) >= 0)
487 IOVEC_SET_STRING(iovec
[n
++], source_time
);
490 /* Note that strictly speaking storing the boot id here is
491 * redundant since the entry includes this in-line
492 * anyway. However, we need this indexed, too. */
493 r
= sd_id128_get_boot(&id
);
495 if (asprintf(&boot_id
, "_BOOT_ID=%s", sd_id128_to_string(id
, idbuf
)) >= 0)
496 IOVEC_SET_STRING(iovec
[n
++], boot_id
);
498 r
= sd_id128_get_machine(&id
);
500 if (asprintf(&machine_id
, "_MACHINE_ID=%s", sd_id128_to_string(id
, idbuf
)) >= 0)
501 IOVEC_SET_STRING(iovec
[n
++], machine_id
);
503 t
= gethostname_malloc();
505 hostname
= strappend("_HOSTNAME=", t
);
507 IOVEC_SET_STRING(iovec
[n
++], hostname
);
513 server_flush_to_var(s
);
516 f
= find_journal(s
, realuid
== 0 ? 0 : loginuid
);
518 log_warning("Dropping message, as we can't find a place to store the data.");
520 r
= journal_file_append_entry(f
, NULL
, iovec
, n
, &s
->seqnum
, NULL
, NULL
);
522 if (r
== -E2BIG
&& !vacuumed
) {
523 log_info("Allocation limit reached.");
528 log_info("Retrying write.");
533 log_error("Failed to write entry, ignoring: %s", strerror(-r
));
547 free(audit_loginuid
);
551 static void dispatch_message(Server
*s
,
552 struct iovec
*iovec
, unsigned n
, unsigned m
,
557 char *path
= NULL
, *c
;
560 assert(iovec
|| n
== 0);
568 path
= shortened_cgroup_path(ucred
->pid
);
572 /* example: /user/lennart/3/foobar
573 * /system/dbus.service/foobar
575 * So let's cut of everything past the third /, since that is
576 * wher user directories start */
578 c
= strchr(path
, '/');
580 c
= strchr(c
+1, '/');
582 c
= strchr(c
+1, '/');
588 rl
= journal_rate_limit_test(s
->rate_limit
, path
, priority
, available_space(s
));
597 char suppress_message
[LINE_MAX
];
598 struct iovec suppress_iovec
[15];
600 /* Write a suppression message if we suppressed something */
602 snprintf(suppress_message
, sizeof(suppress_message
), "MESSAGE=Suppressed %u messages from %s", rl
- 1, path
);
603 char_array_0(suppress_message
);
605 IOVEC_SET_STRING(suppress_iovec
[j
++], "PRIORITY=5");
606 IOVEC_SET_STRING(suppress_iovec
[j
++], suppress_message
);
608 dispatch_message_real(s
, suppress_iovec
, j
, ELEMENTSOF(suppress_iovec
), NULL
, NULL
);
614 dispatch_message_real(s
, iovec
, n
, m
, ucred
, tv
);
617 static void process_syslog_message(Server
*s
, const char *buf
, struct ucred
*ucred
, struct timeval
*tv
) {
618 char *message
= NULL
, *syslog_priority
= NULL
, *syslog_facility
= NULL
;
619 struct iovec iovec
[16];
621 int priority
= LOG_USER
| LOG_INFO
;
626 parse_syslog_priority((char**) &buf
, &priority
);
627 skip_syslog_date((char**) &buf
);
629 if (asprintf(&syslog_priority
, "PRIORITY=%i", priority
& LOG_PRIMASK
) >= 0)
630 IOVEC_SET_STRING(iovec
[n
++], syslog_priority
);
632 if (asprintf(&syslog_facility
, "SYSLOG_FACILITY=%i", LOG_FAC(priority
)) >= 0)
633 IOVEC_SET_STRING(iovec
[n
++], syslog_facility
);
635 message
= strappend("MESSAGE=", buf
);
637 IOVEC_SET_STRING(iovec
[n
++], message
);
639 dispatch_message(s
, iovec
, n
, ELEMENTSOF(iovec
), ucred
, tv
, priority
& LOG_PRIMASK
);
642 free(syslog_facility
);
643 free(syslog_priority
);
646 static bool valid_user_field(const char *p
, size_t l
) {
649 /* We kinda enforce POSIX syntax recommendations for
650 environment variables here, but make a couple of additional
653 http://pubs.opengroup.org/onlinepubs/000095399/basedefs/xbd_chap08.html */
655 /* No empty field names */
659 /* Don't allow names longer than 64 chars */
663 /* Variables starting with an underscore are protected */
667 /* Don't allow digits as first character */
668 if (p
[0] >= '0' && p
[0] <= '9')
671 /* Only allow A-Z0-9 and '_' */
672 for (a
= p
; a
< p
+ l
; a
++)
673 if (!((*a
>= 'A' && *a
<= 'Z') ||
674 (*a
>= '0' && *a
<= '9') ||
681 static void process_native_message(Server
*s
, const void *buffer
, size_t buffer_size
, struct ucred
*ucred
, struct timeval
*tv
) {
682 struct iovec
*iovec
= NULL
;
683 unsigned n
= 0, m
= 0, j
;
686 int priority
= LOG_INFO
;
689 assert(buffer
|| n
== 0);
692 remaining
= buffer_size
;
694 while (remaining
> 0) {
697 e
= memchr(p
, '\n', remaining
);
700 /* Trailing noise, let's ignore it, and flush what we collected */
701 log_debug("Received message with trailing noise, ignoring.");
706 /* Entry separator */
707 dispatch_message(s
, iovec
, n
, m
, ucred
, tv
, priority
);
716 if (*p
== '.' || *p
== '#') {
717 /* Ignore control commands for now, and
719 remaining
-= (e
- p
) + 1;
724 /* A property follows */
730 u
= MAX((n
+13U) * 2U, 4U);
731 c
= realloc(iovec
, u
* sizeof(struct iovec
));
733 log_error("Out of memory");
741 q
= memchr(p
, '=', e
- p
);
743 if (valid_user_field(p
, q
- p
)) {
744 /* If the field name starts with an
745 * underscore, skip the variable,
746 * since that indidates a trusted
748 iovec
[n
].iov_base
= (char*) p
;
749 iovec
[n
].iov_len
= e
- p
;
752 /* We need to determine the priority
753 * of this entry for the rate limiting
756 memcmp(p
, "PRIORITY=", 10) == 0 &&
759 priority
= p
[10] - '0';
762 remaining
-= (e
- p
) + 1;
769 if (remaining
< e
- p
+ 1 + sizeof(uint64_t) + 1) {
770 log_debug("Failed to parse message, ignoring.");
774 memcpy(&l
, e
+ 1, sizeof(uint64_t));
777 if (remaining
< e
- p
+ 1 + sizeof(uint64_t) + l
+ 1 ||
778 e
[1+sizeof(uint64_t)+l
] != '\n') {
779 log_debug("Failed to parse message, ignoring.");
783 k
= malloc((e
- p
) + 1 + l
);
785 log_error("Out of memory");
791 memcpy(k
+ (e
- p
) + 1, e
+ 1 + sizeof(uint64_t), l
);
793 if (valid_user_field(p
, e
- p
)) {
794 iovec
[n
].iov_base
= k
;
795 iovec
[n
].iov_len
= (e
- p
) + 1 + l
;
800 remaining
-= (e
- p
) + 1 + sizeof(uint64_t) + l
+ 1;
801 p
= e
+ 1 + sizeof(uint64_t) + l
+ 1;
805 dispatch_message(s
, iovec
, n
, m
, ucred
, tv
, priority
);
807 for (j
= 0; j
< n
; j
++)
808 if (iovec
[j
].iov_base
< buffer
||
809 (const uint8_t*) iovec
[j
].iov_base
>= (const uint8_t*) buffer
+ buffer_size
)
810 free(iovec
[j
].iov_base
);
813 static int stdout_stream_log(StdoutStream
*s
, const char *p
, size_t l
) {
814 struct iovec iovec
[15];
815 char *message
= NULL
, *syslog_priority
= NULL
;
823 priority
= s
->priority
;
825 if (s
->priority_prefix
&&
828 p
[1] >= '0' && p
[1] <= '7' &&
831 priority
= p
[1] - '0';
839 if (asprintf(&syslog_priority
, "PRIORITY=%i", priority
) >= 0)
840 IOVEC_SET_STRING(iovec
[n
++], syslog_priority
);
842 tag_len
= s
->tag
? strlen(s
->tag
) + 2: 0;
843 message
= malloc(8 + tag_len
+ l
);
845 memcpy(message
, "MESSAGE=", 8);
848 memcpy(message
+8, s
->tag
, tag_len
-2);
849 memcpy(message
+8+tag_len
-2, ": ", 2);
852 memcpy(message
+8+tag_len
, p
, l
);
853 iovec
[n
].iov_base
= message
;
854 iovec
[n
].iov_len
= 8+tag_len
+l
;
858 dispatch_message(s
->server
, iovec
, n
, ELEMENTSOF(iovec
), &s
->ucred
, NULL
, priority
);
860 if (s
->tee_console
) {
863 console
= open_terminal("/dev/console", O_WRONLY
|O_NOCTTY
|O_CLOEXEC
);
867 IOVEC_SET_STRING(iovec
[n
++], s
->tag
);
868 IOVEC_SET_STRING(iovec
[n
++], ": ");
871 iovec
[n
].iov_base
= (void*) p
;
872 iovec
[n
].iov_len
= l
;
875 IOVEC_SET_STRING(iovec
[n
++], (char*) "\n");
877 writev(console
, iovec
, n
);
882 free(syslog_priority
);
887 static int stdout_stream_line(StdoutStream
*s
, const char *p
, size_t l
) {
891 while (l
> 0 && strchr(WHITESPACE
, *p
)) {
896 while (l
> 0 && strchr(WHITESPACE
, *(p
+l
-1)))
901 case STDOUT_STREAM_TAG
:
904 s
->tag
= strndup(p
, l
);
906 log_error("Out of memory");
911 s
->state
= STDOUT_STREAM_PRIORITY
;
914 case STDOUT_STREAM_PRIORITY
:
915 if (l
!= 1 || *p
< '0' || *p
> '7') {
916 log_warning("Failed to parse log priority line.");
920 s
->priority
= *p
- '0';
921 s
->state
= STDOUT_STREAM_PRIORITY_PREFIX
;
924 case STDOUT_STREAM_PRIORITY_PREFIX
:
925 if (l
!= 1 || *p
< '0' || *p
> '1') {
926 log_warning("Failed to parse priority prefix line.");
930 s
->priority_prefix
= *p
- '0';
931 s
->state
= STDOUT_STREAM_TEE_CONSOLE
;
934 case STDOUT_STREAM_TEE_CONSOLE
:
935 if (l
!= 1 || *p
< '0' || *p
> '1') {
936 log_warning("Failed to parse tee to console line.");
940 s
->tee_console
= *p
- '0';
941 s
->state
= STDOUT_STREAM_RUNNING
;
944 case STDOUT_STREAM_RUNNING
:
945 return stdout_stream_log(s
, p
, l
);
948 assert_not_reached("Unknown stream state");
951 static int stdout_stream_scan(StdoutStream
*s
, bool force_flush
) {
959 remaining
= s
->length
;
964 end
= memchr(p
, '\n', remaining
);
966 if (remaining
>= LINE_MAX
) {
974 r
= stdout_stream_line(s
, p
, end
- p
);
982 if (force_flush
&& remaining
> 0) {
983 r
= stdout_stream_line(s
, p
, remaining
);
992 memmove(s
->buffer
, p
, remaining
);
993 s
->length
= remaining
;
999 static int stdout_stream_process(StdoutStream
*s
) {
1005 l
= read(s
->fd
, s
->buffer
+s
->length
, sizeof(s
->buffer
)-1-s
->length
);
1008 if (errno
== EAGAIN
)
1011 log_warning("Failed to read from stream: %m");
1016 r
= stdout_stream_scan(s
, true);
1024 r
= stdout_stream_scan(s
, false);
1032 static void stdout_stream_free(StdoutStream
*s
) {
1036 assert(s
->server
->n_stdout_streams
> 0);
1037 s
->server
->n_stdout_streams
--;
1038 LIST_REMOVE(StdoutStream
, stdout_stream
, s
->server
->stdout_streams
, s
);
1043 epoll_ctl(s
->server
->epoll_fd
, EPOLL_CTL_DEL
, s
->fd
, NULL
);
1045 close_nointr_nofail(s
->fd
);
1052 static int stdout_stream_new(Server
*s
) {
1053 StdoutStream
*stream
;
1056 struct epoll_event ev
;
1060 fd
= accept4(s
->stdout_fd
, NULL
, NULL
, SOCK_NONBLOCK
|SOCK_CLOEXEC
);
1062 if (errno
== EAGAIN
)
1065 log_error("Failed to accept stdout connection: %m");
1069 if (s
->n_stdout_streams
>= STDOUT_STREAMS_MAX
) {
1070 log_warning("Too many stdout streams, refusing connection.");
1071 close_nointr_nofail(fd
);
1075 stream
= new0(StdoutStream
, 1);
1077 log_error("Out of memory.");
1078 close_nointr_nofail(fd
);
1084 len
= sizeof(stream
->ucred
);
1085 if (getsockopt(fd
, SOL_SOCKET
, SO_PEERCRED
, &stream
->ucred
, &len
) < 0) {
1086 log_error("Failed to determine peer credentials: %m");
1091 if (shutdown(fd
, SHUT_WR
) < 0) {
1092 log_error("Failed to shutdown writing side of socket: %m");
1098 ev
.data
.ptr
= stream
;
1099 ev
.events
= EPOLLIN
;
1100 if (epoll_ctl(s
->epoll_fd
, EPOLL_CTL_ADD
, fd
, &ev
) < 0) {
1101 log_error("Failed to add stream to event loop: %m");
1107 LIST_PREPEND(StdoutStream
, stdout_stream
, s
->stdout_streams
, stream
);
1108 s
->n_stdout_streams
++;
1113 stdout_stream_free(stream
);
1117 static int system_journal_open(Server
*s
) {
1123 r
= sd_id128_get_machine(&machine
);
1127 sd_id128_to_string(machine
, ids
);
1129 if (!s
->system_journal
) {
1131 /* First try to create the machine path, but not the prefix */
1132 fn
= strappend("/var/log/journal/", ids
);
1135 (void) mkdir(fn
, 0755);
1138 /* The create the system journal file */
1139 fn
= join("/var/log/journal/", ids
, "/system.journal", NULL
);
1143 r
= journal_file_open(fn
, O_RDWR
|O_CREAT
, 0640, NULL
, &s
->system_journal
);
1147 s
->system_journal
->metrics
= s
->metrics
;
1148 s
->system_journal
->compress
= s
->compress
;
1150 fix_perms(s
->system_journal
, 0);
1156 log_error("Failed to open system journal: %s", strerror(-r
));
1162 if (!s
->runtime_journal
) {
1164 fn
= join("/run/log/journal/", ids
, "/system.journal", NULL
);
1168 if (s
->system_journal
) {
1170 /* Try to open the runtime journal, but only
1171 * if it already exists, so that we can flush
1172 * it into the system journal */
1174 r
= journal_file_open(fn
, O_RDWR
, 0640, NULL
, &s
->runtime_journal
);
1182 log_error("Failed to open runtime journal: %s", strerror(-r
));
1189 /* OK, we really need the runtime journal, so create
1190 * it if necessary. */
1192 (void) mkdir_parents(fn
, 0755);
1193 r
= journal_file_open(fn
, O_RDWR
|O_CREAT
, 0640, NULL
, &s
->runtime_journal
);
1197 log_error("Failed to open runtime journal: %s", strerror(-r
));
1202 if (s
->runtime_journal
) {
1203 s
->runtime_journal
->metrics
= s
->metrics
;
1204 s
->runtime_journal
->compress
= s
->compress
;
1206 fix_perms(s
->runtime_journal
, 0);
1213 static int server_flush_to_var(Server
*s
) {
1214 char path
[] = "/run/log/journal/xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
1223 if (!s
->runtime_journal
)
1226 ts
= now(CLOCK_MONOTONIC
);
1227 if (s
->var_available_timestamp
+ RECHECK_VAR_AVAILABLE_USEC
> ts
)
1230 s
->var_available_timestamp
= ts
;
1232 system_journal_open(s
);
1234 if (!s
->system_journal
)
1237 r
= sd_id128_get_machine(&machine
);
1239 log_error("Failed to get machine id: %s", strerror(-r
));
1243 r
= sd_journal_open(&j
, SD_JOURNAL_RUNTIME_ONLY
);
1245 log_error("Failed to read runtime journal: %s", strerror(-r
));
1249 SD_JOURNAL_FOREACH(j
) {
1252 f
= j
->current_file
;
1253 assert(f
&& f
->current_offset
> 0);
1255 r
= journal_file_move_to_object(f
, OBJECT_ENTRY
, f
->current_offset
, &o
);
1257 log_error("Can't read entry: %s", strerror(-r
));
1261 r
= journal_file_copy_entry(f
, s
->system_journal
, o
, f
->current_offset
, NULL
, NULL
, NULL
);
1263 log_info("Allocation limit reached.");
1265 journal_file_post_change(s
->system_journal
);
1268 r
= journal_file_copy_entry(f
, s
->system_journal
, o
, f
->current_offset
, NULL
, NULL
, NULL
);
1272 log_error("Can't write entry: %s", strerror(-r
));
1278 journal_file_post_change(s
->system_journal
);
1280 journal_file_close(s
->runtime_journal
);
1281 s
->runtime_journal
= NULL
;
1284 sd_id128_to_string(machine
, path
+ 17);
1285 rm_rf(path
, false, true, false);
1291 static void forward_syslog(Server
*s
, const void *buffer
, size_t length
, struct ucred
*ucred
, struct timeval
*tv
) {
1292 struct msghdr msghdr
;
1294 struct cmsghdr
*cmsg
;
1296 struct cmsghdr cmsghdr
;
1297 uint8_t buf
[CMSG_SPACE(sizeof(struct ucred
)) +
1298 CMSG_SPACE(sizeof(struct timeval
))];
1300 union sockaddr_union sa
;
1307 iovec
.iov_base
= (void*) buffer
;
1308 iovec
.iov_len
= length
;
1309 msghdr
.msg_iov
= &iovec
;
1310 msghdr
.msg_iovlen
= 1;
1313 sa
.un
.sun_family
= AF_UNIX
;
1314 strncpy(sa
.un
.sun_path
, "/run/systemd/syslog", sizeof(sa
.un
.sun_path
));
1315 msghdr
.msg_name
= &sa
;
1316 msghdr
.msg_namelen
= offsetof(union sockaddr_union
, un
.sun_path
) + strlen(sa
.un
.sun_path
);
1319 msghdr
.msg_control
= &control
;
1320 msghdr
.msg_controllen
= sizeof(control
);
1322 cmsg
= CMSG_FIRSTHDR(&msghdr
);
1323 cmsg
->cmsg_level
= SOL_SOCKET
;
1324 cmsg
->cmsg_type
= SCM_CREDENTIALS
;
1325 cmsg
->cmsg_len
= CMSG_LEN(sizeof(struct ucred
));
1326 memcpy(CMSG_DATA(cmsg
), ucred
, sizeof(struct ucred
));
1327 msghdr
.msg_controllen
= cmsg
->cmsg_len
;
1329 /* Forward the syslog message we received via /dev/log to
1330 * /run/systemd/syslog. Unfortunately we currently can't set
1331 * the SO_TIMESTAMP auxiliary data, and hence we don't. */
1333 if (sendmsg(s
->syslog_fd
, &msghdr
, MSG_NOSIGNAL
) >= 0)
1336 if (errno
== ESRCH
) {
1339 /* Hmm, presumably the sender process vanished
1340 * by now, so let's fix it as good as we
1345 memcpy(CMSG_DATA(cmsg
), &u
, sizeof(struct ucred
));
1347 if (sendmsg(s
->syslog_fd
, &msghdr
, MSG_NOSIGNAL
) >= 0)
1351 log_debug("Failed to forward syslog message: %m");
1354 static int process_event(Server
*s
, struct epoll_event
*ev
) {
1357 if (ev
->data
.fd
== s
->signal_fd
) {
1358 struct signalfd_siginfo sfsi
;
1361 if (ev
->events
!= EPOLLIN
) {
1362 log_info("Got invalid event from epoll.");
1366 n
= read(s
->signal_fd
, &sfsi
, sizeof(sfsi
));
1367 if (n
!= sizeof(sfsi
)) {
1372 if (errno
== EINTR
|| errno
== EAGAIN
)
1378 if (sfsi
.ssi_signo
== SIGUSR1
) {
1379 server_flush_to_var(s
);
1383 log_debug("Received SIG%s", signal_to_string(sfsi
.ssi_signo
));
1386 } else if (ev
->data
.fd
== s
->native_fd
||
1387 ev
->data
.fd
== s
->syslog_fd
) {
1389 if (ev
->events
!= EPOLLIN
) {
1390 log_info("Got invalid event from epoll.");
1395 struct msghdr msghdr
;
1397 struct ucred
*ucred
= NULL
;
1398 struct timeval
*tv
= NULL
;
1399 struct cmsghdr
*cmsg
;
1401 struct cmsghdr cmsghdr
;
1402 uint8_t buf
[CMSG_SPACE(sizeof(struct ucred
)) +
1403 CMSG_SPACE(sizeof(struct timeval
))];
1408 if (ioctl(ev
->data
.fd
, SIOCINQ
, &v
) < 0) {
1409 log_error("SIOCINQ failed: %m");
1416 if (s
->buffer_size
< (size_t) v
) {
1420 l
= MAX(LINE_MAX
+ (size_t) v
, s
->buffer_size
* 2);
1421 b
= realloc(s
->buffer
, l
+1);
1424 log_error("Couldn't increase buffer.");
1433 iovec
.iov_base
= s
->buffer
;
1434 iovec
.iov_len
= s
->buffer_size
;
1438 msghdr
.msg_iov
= &iovec
;
1439 msghdr
.msg_iovlen
= 1;
1440 msghdr
.msg_control
= &control
;
1441 msghdr
.msg_controllen
= sizeof(control
);
1443 n
= recvmsg(ev
->data
.fd
, &msghdr
, MSG_DONTWAIT
);
1446 if (errno
== EINTR
|| errno
== EAGAIN
)
1449 log_error("recvmsg() failed: %m");
1453 for (cmsg
= CMSG_FIRSTHDR(&msghdr
); cmsg
; cmsg
= CMSG_NXTHDR(&msghdr
, cmsg
)) {
1455 if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1456 cmsg
->cmsg_type
== SCM_CREDENTIALS
&&
1457 cmsg
->cmsg_len
== CMSG_LEN(sizeof(struct ucred
)))
1458 ucred
= (struct ucred
*) CMSG_DATA(cmsg
);
1459 else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1460 cmsg
->cmsg_type
== SO_TIMESTAMP
&&
1461 cmsg
->cmsg_len
== CMSG_LEN(sizeof(struct timeval
)))
1462 tv
= (struct timeval
*) CMSG_DATA(cmsg
);
1465 if (ev
->data
.fd
== s
->syslog_fd
) {
1468 e
= memchr(s
->buffer
, '\n', n
);
1474 forward_syslog(s
, s
->buffer
, n
, ucred
, tv
);
1475 process_syslog_message(s
, strstrip(s
->buffer
), ucred
, tv
);
1477 process_native_message(s
, s
->buffer
, n
, ucred
, tv
);
1482 } else if (ev
->data
.fd
== s
->stdout_fd
) {
1484 if (ev
->events
!= EPOLLIN
) {
1485 log_info("Got invalid event from epoll.");
1489 stdout_stream_new(s
);
1493 StdoutStream
*stream
;
1495 if ((ev
->events
|EPOLLIN
|EPOLLHUP
) != (EPOLLIN
|EPOLLHUP
)) {
1496 log_info("Got invalid event from epoll.");
1500 /* If it is none of the well-known fds, it must be an
1501 * stdout stream fd. Note that this is a bit ugly here
1502 * (since we rely that none of the well-known fds
1503 * could be interpreted as pointer), but nonetheless
1504 * safe, since the well-known fds would never get an
1505 * fd > 4096, i.e. beyond the first memory page */
1507 stream
= ev
->data
.ptr
;
1509 if (stdout_stream_process(stream
) <= 0)
1510 stdout_stream_free(stream
);
1515 log_error("Unknown event.");
1519 static int open_syslog_socket(Server
*s
) {
1520 union sockaddr_union sa
;
1522 struct epoll_event ev
;
1527 if (s
->syslog_fd
< 0) {
1529 s
->syslog_fd
= socket(AF_UNIX
, SOCK_DGRAM
|SOCK_CLOEXEC
, 0);
1530 if (s
->syslog_fd
< 0) {
1531 log_error("socket() failed: %m");
1536 sa
.un
.sun_family
= AF_UNIX
;
1537 strncpy(sa
.un
.sun_path
, "/dev/log", sizeof(sa
.un
.sun_path
));
1539 unlink(sa
.un
.sun_path
);
1541 r
= bind(s
->syslog_fd
, &sa
.sa
, offsetof(union sockaddr_union
, un
.sun_path
) + strlen(sa
.un
.sun_path
));
1543 log_error("bind() failed: %m");
1547 chmod(sa
.un
.sun_path
, 0666);
1551 r
= setsockopt(s
->syslog_fd
, SOL_SOCKET
, SO_PASSCRED
, &one
, sizeof(one
));
1553 log_error("SO_PASSCRED failed: %m");
1558 r
= setsockopt(s
->syslog_fd
, SOL_SOCKET
, SO_TIMESTAMP
, &one
, sizeof(one
));
1560 log_error("SO_TIMESTAMP failed: %m");
1564 /* Since we use the same socket for forwarding this to some
1565 * other syslog implementation, make sure we don't hang
1567 timeval_store(&tv
, SYSLOG_TIMEOUT_USEC
);
1568 if (setsockopt(s
->syslog_fd
, SOL_SOCKET
, SO_SNDTIMEO
, &tv
, sizeof(tv
)) < 0) {
1569 log_error("SO_SNDTIMEO failed: %m");
1574 ev
.events
= EPOLLIN
;
1575 ev
.data
.fd
= s
->syslog_fd
;
1576 if (epoll_ctl(s
->epoll_fd
, EPOLL_CTL_ADD
, s
->syslog_fd
, &ev
) < 0) {
1577 log_error("Failed to add syslog server fd to epoll object: %m");
1584 static int open_native_socket(Server
*s
) {
1585 union sockaddr_union sa
;
1587 struct epoll_event ev
;
1591 if (s
->native_fd
< 0) {
1593 s
->native_fd
= socket(AF_UNIX
, SOCK_DGRAM
|SOCK_CLOEXEC
, 0);
1594 if (s
->native_fd
< 0) {
1595 log_error("socket() failed: %m");
1600 sa
.un
.sun_family
= AF_UNIX
;
1601 strncpy(sa
.un
.sun_path
, "/run/systemd/journal", sizeof(sa
.un
.sun_path
));
1603 unlink(sa
.un
.sun_path
);
1605 r
= bind(s
->native_fd
, &sa
.sa
, offsetof(union sockaddr_union
, un
.sun_path
) + strlen(sa
.un
.sun_path
));
1607 log_error("bind() failed: %m");
1611 chmod(sa
.un
.sun_path
, 0666);
1615 r
= setsockopt(s
->native_fd
, SOL_SOCKET
, SO_PASSCRED
, &one
, sizeof(one
));
1617 log_error("SO_PASSCRED failed: %m");
1622 r
= setsockopt(s
->native_fd
, SOL_SOCKET
, SO_TIMESTAMP
, &one
, sizeof(one
));
1624 log_error("SO_TIMESTAMP failed: %m");
1629 ev
.events
= EPOLLIN
;
1630 ev
.data
.fd
= s
->native_fd
;
1631 if (epoll_ctl(s
->epoll_fd
, EPOLL_CTL_ADD
, s
->native_fd
, &ev
) < 0) {
1632 log_error("Failed to add native server fd to epoll object: %m");
1639 static int open_stdout_socket(Server
*s
) {
1640 union sockaddr_union sa
;
1642 struct epoll_event ev
;
1646 if (s
->stdout_fd
< 0) {
1648 s
->stdout_fd
= socket(AF_UNIX
, SOCK_STREAM
|SOCK_CLOEXEC
, 0);
1649 if (s
->stdout_fd
< 0) {
1650 log_error("socket() failed: %m");
1655 sa
.un
.sun_family
= AF_UNIX
;
1656 strncpy(sa
.un
.sun_path
, "/run/systemd/stdout", sizeof(sa
.un
.sun_path
));
1658 unlink(sa
.un
.sun_path
);
1660 r
= bind(s
->stdout_fd
, &sa
.sa
, offsetof(union sockaddr_union
, un
.sun_path
) + strlen(sa
.un
.sun_path
));
1662 log_error("bind() failed: %m");
1666 chmod(sa
.un
.sun_path
, 0666);
1668 if (listen(s
->stdout_fd
, SOMAXCONN
) < 0) {
1669 log_error("liste() failed: %m");
1675 ev
.events
= EPOLLIN
;
1676 ev
.data
.fd
= s
->stdout_fd
;
1677 if (epoll_ctl(s
->epoll_fd
, EPOLL_CTL_ADD
, s
->stdout_fd
, &ev
) < 0) {
1678 log_error("Failed to add stdout server fd to epoll object: %m");
1685 static int open_signalfd(Server
*s
) {
1687 struct epoll_event ev
;
1691 assert_se(sigemptyset(&mask
) == 0);
1692 sigset_add_many(&mask
, SIGINT
, SIGTERM
, SIGUSR1
, -1);
1693 assert_se(sigprocmask(SIG_SETMASK
, &mask
, NULL
) == 0);
1695 s
->signal_fd
= signalfd(-1, &mask
, SFD_NONBLOCK
|SFD_CLOEXEC
);
1696 if (s
->signal_fd
< 0) {
1697 log_error("signalfd(): %m");
1702 ev
.events
= EPOLLIN
;
1703 ev
.data
.fd
= s
->signal_fd
;
1705 if (epoll_ctl(s
->epoll_fd
, EPOLL_CTL_ADD
, s
->signal_fd
, &ev
) < 0) {
1706 log_error("epoll_ctl(): %m");
1713 static int server_init(Server
*s
) {
1719 s
->syslog_fd
= s
->native_fd
= s
->stdout_fd
= s
->signal_fd
= s
->epoll_fd
= -1;
1720 s
->metrics
.max_size
= DEFAULT_MAX_SIZE
;
1721 s
->metrics
.min_size
= DEFAULT_MIN_SIZE
;
1722 s
->metrics
.keep_free
= DEFAULT_KEEP_FREE
;
1723 s
->metrics
.max_use
= DEFAULT_MAX_USE
;
1726 s
->user_journals
= hashmap_new(trivial_hash_func
, trivial_compare_func
);
1727 if (!s
->user_journals
) {
1728 log_error("Out of memory.");
1732 s
->epoll_fd
= epoll_create1(EPOLL_CLOEXEC
);
1733 if (s
->epoll_fd
< 0) {
1734 log_error("Failed to create epoll object: %m");
1738 n
= sd_listen_fds(true);
1740 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n
));
1744 for (fd
= SD_LISTEN_FDS_START
; fd
< SD_LISTEN_FDS_START
+ n
; fd
++) {
1746 if (sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/run/systemd/native", 0) > 0) {
1748 if (s
->native_fd
>= 0) {
1749 log_error("Too many native sockets passed.");
1755 } else if (sd_is_socket_unix(fd
, SOCK_STREAM
, 1, "/run/systemd/stdout", 0) > 0) {
1757 if (s
->stdout_fd
>= 0) {
1758 log_error("Too many stdout sockets passed.");
1764 } else if (sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/dev/log", 0) > 0) {
1766 if (s
->syslog_fd
>= 0) {
1767 log_error("Too many /dev/log sockets passed.");
1774 log_error("Unknown socket passed.");
1779 r
= open_syslog_socket(s
);
1783 r
= open_native_socket(s
);
1787 r
= open_stdout_socket(s
);
1791 r
= system_journal_open(s
);
1795 r
= open_signalfd(s
);
1799 s
->rate_limit
= journal_rate_limit_new(DEFAULT_RATE_LIMIT_INTERVAL
, DEFAULT_RATE_LIMIT_BURST
);
1806 static void server_done(Server
*s
) {
1810 while (s
->stdout_streams
)
1811 stdout_stream_free(s
->stdout_streams
);
1813 if (s
->system_journal
)
1814 journal_file_close(s
->system_journal
);
1816 if (s
->runtime_journal
)
1817 journal_file_close(s
->runtime_journal
);
1819 while ((f
= hashmap_steal_first(s
->user_journals
)))
1820 journal_file_close(f
);
1822 hashmap_free(s
->user_journals
);
1824 if (s
->epoll_fd
>= 0)
1825 close_nointr_nofail(s
->epoll_fd
);
1827 if (s
->signal_fd
>= 0)
1828 close_nointr_nofail(s
->signal_fd
);
1830 if (s
->syslog_fd
>= 0)
1831 close_nointr_nofail(s
->syslog_fd
);
1833 if (s
->native_fd
>= 0)
1834 close_nointr_nofail(s
->native_fd
);
1836 if (s
->stdout_fd
>= 0)
1837 close_nointr_nofail(s
->stdout_fd
);
1840 journal_rate_limit_free(s
->rate_limit
);
1845 int main(int argc
, char *argv
[]) {
1849 /* if (getppid() != 1) { */
1850 /* log_error("This program should be invoked by init only."); */
1851 /* return EXIT_FAILURE; */
1855 log_error("This program does not take arguments.");
1856 return EXIT_FAILURE
;
1859 log_set_target(LOG_TARGET_CONSOLE
);
1860 log_parse_environment();
1865 r
= server_init(&server
);
1869 log_debug("systemd-journald running as pid %lu", (unsigned long) getpid());
1873 "STATUS=Processing requests...");
1875 server_vacuum(&server
);
1876 server_flush_to_var(&server
);
1879 struct epoll_event event
;
1881 r
= epoll_wait(server
.epoll_fd
, &event
, 1, -1);
1887 log_error("epoll_wait() failed: %m");
1893 r
= process_event(&server
, &event
);
1900 log_debug("systemd-journald stopped as pid %lu", (unsigned long) getpid());
1904 "STATUS=Shutting down...");
1906 server_done(&server
);
1908 return r
< 0 ? EXIT_FAILURE
: EXIT_SUCCESS
;