1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2011 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/epoll.h>
23 #include <sys/socket.h>
25 #include <sys/signalfd.h>
29 #include <acl/libacl.h>
31 #include <sys/ioctl.h>
32 #include <linux/sockios.h>
33 #include <sys/statvfs.h>
36 #include "journal-file.h"
37 #include "sd-daemon.h"
38 #include "socket-util.h"
40 #include "cgroup-util.h"
42 #include "journal-rate-limit.h"
43 #include "sd-journal.h"
45 #include "journal-internal.h"
46 #include "conf-parser.h"
49 #define USER_JOURNALS_MAX 1024
50 #define STDOUT_STREAMS_MAX 4096
52 #define DEFAULT_RATE_LIMIT_INTERVAL (10*USEC_PER_SEC)
53 #define DEFAULT_RATE_LIMIT_BURST 200
55 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
57 #define RECHECK_VAR_AVAILABLE_USEC (30*USEC_PER_SEC)
59 #define SYSLOG_TIMEOUT_USEC (5*USEC_PER_SEC)
61 typedef enum StdoutStreamState
{
63 STDOUT_STREAM_PRIORITY
,
64 STDOUT_STREAM_PRIORITY_PREFIX
,
65 STDOUT_STREAM_TEE_CONSOLE
,
71 StdoutStreamState state
;
79 bool priority_prefix
:1;
82 char buffer
[LINE_MAX
+1];
85 LIST_FIELDS(StdoutStream
, stdout_stream
);
88 static int server_flush_to_var(Server
*s
);
90 static uint64_t available_space(Server
*s
) {
95 uint64_t sum
= 0, avail
= 0, ss_avail
= 0;
101 ts
= now(CLOCK_MONOTONIC
);
103 if (s
->cached_available_space_timestamp
+ RECHECK_AVAILABLE_SPACE_USEC
> ts
)
104 return s
->cached_available_space
;
106 r
= sd_id128_get_machine(&machine
);
110 if (s
->system_journal
) {
111 f
= "/var/log/journal/";
112 m
= &s
->system_metrics
;
114 f
= "/run/log/journal/";
115 m
= &s
->runtime_metrics
;
120 p
= strappend(f
, sd_id128_to_string(machine
, ids
));
130 if (fstatvfs(dirfd(d
), &ss
) < 0)
135 struct dirent buf
, *de
;
138 k
= readdir_r(d
, &buf
, &de
);
147 if (!dirent_is_file_with_suffix(de
, ".journal"))
150 if (fstatat(dirfd(d
), de
->d_name
, &st
, AT_SYMLINK_NOFOLLOW
) < 0)
153 sum
+= (uint64_t) st
.st_blocks
* (uint64_t) st
.st_blksize
;
156 avail
= sum
>= m
->max_use
? 0 : m
->max_use
- sum
;
158 ss_avail
= ss
.f_bsize
* ss
.f_bavail
;
160 ss_avail
= ss_avail
< m
->keep_free
? 0 : ss_avail
- m
->keep_free
;
162 if (ss_avail
< avail
)
165 s
->cached_available_space
= avail
;
166 s
->cached_available_space_timestamp
= ts
;
174 static void fix_perms(JournalFile
*f
, uid_t uid
) {
177 acl_permset_t permset
;
182 r
= fchmod_and_fchown(f
->fd
, 0640, 0, 0);
184 log_warning("Failed to fix access mode/rights on %s, ignoring: %s", f
->path
, strerror(-r
));
189 acl
= acl_get_fd(f
->fd
);
191 log_warning("Failed to read ACL on %s, ignoring: %m", f
->path
);
195 r
= acl_find_uid(acl
, uid
, &entry
);
198 if (acl_create_entry(&acl
, &entry
) < 0 ||
199 acl_set_tag_type(entry
, ACL_USER
) < 0 ||
200 acl_set_qualifier(entry
, &uid
) < 0) {
201 log_warning("Failed to patch ACL on %s, ignoring: %m", f
->path
);
206 if (acl_get_permset(entry
, &permset
) < 0 ||
207 acl_add_perm(permset
, ACL_READ
) < 0 ||
208 acl_calc_mask(&acl
) < 0) {
209 log_warning("Failed to patch ACL on %s, ignoring: %m", f
->path
);
213 if (acl_set_fd(f
->fd
, acl
) < 0)
214 log_warning("Failed to set ACL on %s, ignoring: %m", f
->path
);
220 static JournalFile
* find_journal(Server
*s
, uid_t uid
) {
229 /* We split up user logs only on /var, not on /run. If the
230 * runtime file is open, we write to it exclusively, in order
231 * to guarantee proper order as soon as we flush /run to
232 * /var and close the runtime file. */
234 if (s
->runtime_journal
)
235 return s
->runtime_journal
;
238 return s
->system_journal
;
240 r
= sd_id128_get_machine(&machine
);
242 return s
->system_journal
;
244 f
= hashmap_get(s
->user_journals
, UINT32_TO_PTR(uid
));
248 if (asprintf(&p
, "/var/log/journal/%s/user-%lu.journal", sd_id128_to_string(machine
, ids
), (unsigned long) uid
) < 0)
249 return s
->system_journal
;
251 while (hashmap_size(s
->user_journals
) >= USER_JOURNALS_MAX
) {
252 /* Too many open? Then let's close one */
253 f
= hashmap_steal_first(s
->user_journals
);
255 journal_file_close(f
);
258 r
= journal_file_open(p
, O_RDWR
|O_CREAT
, 0640, s
->system_journal
, &f
);
262 return s
->system_journal
;
265 f
->metrics
= s
->system_metrics
;
266 f
->compress
= s
->compress
;
268 r
= hashmap_put(s
->user_journals
, UINT32_TO_PTR(uid
), f
);
270 journal_file_close(f
);
271 return s
->system_journal
;
277 static void server_rotate(Server
*s
) {
283 log_info("Rotating...");
285 if (s
->runtime_journal
) {
286 r
= journal_file_rotate(&s
->runtime_journal
);
288 log_error("Failed to rotate %s: %s", s
->runtime_journal
->path
, strerror(-r
));
291 if (s
->system_journal
) {
292 r
= journal_file_rotate(&s
->system_journal
);
294 log_error("Failed to rotate %s: %s", s
->system_journal
->path
, strerror(-r
));
297 HASHMAP_FOREACH_KEY(f
, k
, s
->user_journals
, i
) {
298 r
= journal_file_rotate(&f
);
300 log_error("Failed to rotate %s: %s", f
->path
, strerror(-r
));
302 hashmap_replace(s
->user_journals
, k
, f
);
306 static void server_vacuum(Server
*s
) {
312 log_info("Vacuuming...");
314 r
= sd_id128_get_machine(&machine
);
316 log_error("Failed to get machine ID: %s", strerror(-r
));
320 sd_id128_to_string(machine
, ids
);
322 if (s
->system_journal
) {
323 if (asprintf(&p
, "/var/log/journal/%s", ids
) < 0) {
324 log_error("Out of memory.");
328 r
= journal_directory_vacuum(p
, s
->system_metrics
.max_use
, s
->system_metrics
.keep_free
);
329 if (r
< 0 && r
!= -ENOENT
)
330 log_error("Failed to vacuum %s: %s", p
, strerror(-r
));
335 if (s
->runtime_journal
) {
336 if (asprintf(&p
, "/run/log/journal/%s", ids
) < 0) {
337 log_error("Out of memory.");
341 r
= journal_directory_vacuum(p
, s
->runtime_metrics
.max_use
, s
->runtime_metrics
.keep_free
);
342 if (r
< 0 && r
!= -ENOENT
)
343 log_error("Failed to vacuum %s: %s", p
, strerror(-r
));
347 s
->cached_available_space_timestamp
= 0;
350 static char *shortened_cgroup_path(pid_t pid
) {
352 char *process_path
, *init_path
, *path
;
356 r
= cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER
, pid
, &process_path
);
360 r
= cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER
, 1, &init_path
);
366 if (endswith(init_path
, "/system"))
367 init_path
[strlen(init_path
) - 7] = 0;
368 else if (streq(init_path
, "/"))
371 if (startswith(process_path
, init_path
)) {
374 p
= strdup(process_path
+ strlen(init_path
));
392 static void dispatch_message_real(Server
*s
,
393 struct iovec
*iovec
, unsigned n
, unsigned m
,
395 struct timeval
*tv
) {
397 char *pid
= NULL
, *uid
= NULL
, *gid
= NULL
,
398 *source_time
= NULL
, *boot_id
= NULL
, *machine_id
= NULL
,
399 *comm
= NULL
, *cmdline
= NULL
, *hostname
= NULL
,
400 *audit_session
= NULL
, *audit_loginuid
= NULL
,
401 *exe
= NULL
, *cgroup
= NULL
, *session
= NULL
,
402 *owner_uid
= NULL
, *unit
= NULL
;
408 uid_t loginuid
= 0, realuid
= 0;
410 bool vacuumed
= false;
421 realuid
= ucred
->uid
;
423 if (asprintf(&pid
, "_PID=%lu", (unsigned long) ucred
->pid
) >= 0)
424 IOVEC_SET_STRING(iovec
[n
++], pid
);
426 if (asprintf(&uid
, "_UID=%lu", (unsigned long) ucred
->uid
) >= 0)
427 IOVEC_SET_STRING(iovec
[n
++], uid
);
429 if (asprintf(&gid
, "_GID=%lu", (unsigned long) ucred
->gid
) >= 0)
430 IOVEC_SET_STRING(iovec
[n
++], gid
);
432 r
= get_process_comm(ucred
->pid
, &t
);
434 comm
= strappend("_COMM=", t
);
438 IOVEC_SET_STRING(iovec
[n
++], comm
);
441 r
= get_process_exe(ucred
->pid
, &t
);
443 exe
= strappend("_EXE=", t
);
447 IOVEC_SET_STRING(iovec
[n
++], exe
);
450 r
= get_process_cmdline(ucred
->pid
, LINE_MAX
, false, &t
);
452 cmdline
= strappend("_CMDLINE=", t
);
456 IOVEC_SET_STRING(iovec
[n
++], cmdline
);
459 r
= audit_session_from_pid(ucred
->pid
, &audit
);
461 if (asprintf(&audit_session
, "_AUDIT_SESSION=%lu", (unsigned long) audit
) >= 0)
462 IOVEC_SET_STRING(iovec
[n
++], audit_session
);
464 r
= audit_loginuid_from_pid(ucred
->pid
, &loginuid
);
466 if (asprintf(&audit_loginuid
, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid
) >= 0)
467 IOVEC_SET_STRING(iovec
[n
++], audit_loginuid
);
469 t
= shortened_cgroup_path(ucred
->pid
);
471 cgroup
= strappend("_SYSTEMD_CGROUP=", t
);
475 IOVEC_SET_STRING(iovec
[n
++], cgroup
);
478 if (sd_pid_get_session(ucred
->pid
, &t
) >= 0) {
479 session
= strappend("_SYSTEMD_SESSION=", t
);
483 IOVEC_SET_STRING(iovec
[n
++], session
);
486 if (sd_pid_get_unit(ucred
->pid
, &t
) >= 0) {
487 unit
= strappend("_SYSTEMD_UNIT=", t
);
491 IOVEC_SET_STRING(iovec
[n
++], unit
);
494 if (sd_pid_get_owner_uid(ucred
->uid
, &owner
) >= 0)
495 if (asprintf(&owner_uid
, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner
) >= 0)
496 IOVEC_SET_STRING(iovec
[n
++], owner_uid
);
500 if (asprintf(&source_time
, "_SOURCE_REALTIME_TIMESTAMP=%llu",
501 (unsigned long long) timeval_load(tv
)) >= 0)
502 IOVEC_SET_STRING(iovec
[n
++], source_time
);
505 /* Note that strictly speaking storing the boot id here is
506 * redundant since the entry includes this in-line
507 * anyway. However, we need this indexed, too. */
508 r
= sd_id128_get_boot(&id
);
510 if (asprintf(&boot_id
, "_BOOT_ID=%s", sd_id128_to_string(id
, idbuf
)) >= 0)
511 IOVEC_SET_STRING(iovec
[n
++], boot_id
);
513 r
= sd_id128_get_machine(&id
);
515 if (asprintf(&machine_id
, "_MACHINE_ID=%s", sd_id128_to_string(id
, idbuf
)) >= 0)
516 IOVEC_SET_STRING(iovec
[n
++], machine_id
);
518 t
= gethostname_malloc();
520 hostname
= strappend("_HOSTNAME=", t
);
523 IOVEC_SET_STRING(iovec
[n
++], hostname
);
528 server_flush_to_var(s
);
531 f
= find_journal(s
, realuid
== 0 ? 0 : loginuid
);
533 log_warning("Dropping message, as we can't find a place to store the data.");
535 r
= journal_file_append_entry(f
, NULL
, iovec
, n
, &s
->seqnum
, NULL
, NULL
);
537 if (r
== -E2BIG
&& !vacuumed
) {
538 log_info("Allocation limit reached.");
544 log_info("Retrying write.");
549 log_error("Failed to write entry, ignoring: %s", strerror(-r
));
563 free(audit_loginuid
);
570 static void dispatch_message(Server
*s
,
571 struct iovec
*iovec
, unsigned n
, unsigned m
,
576 char *path
= NULL
, *c
;
579 assert(iovec
|| n
== 0);
587 path
= shortened_cgroup_path(ucred
->pid
);
591 /* example: /user/lennart/3/foobar
592 * /system/dbus.service/foobar
594 * So let's cut of everything past the third /, since that is
595 * wher user directories start */
597 c
= strchr(path
, '/');
599 c
= strchr(c
+1, '/');
601 c
= strchr(c
+1, '/');
607 rl
= journal_rate_limit_test(s
->rate_limit
, path
, priority
, available_space(s
));
616 char suppress_message
[LINE_MAX
];
617 struct iovec suppress_iovec
[18];
619 /* Write a suppression message if we suppressed something */
621 snprintf(suppress_message
, sizeof(suppress_message
), "MESSAGE=Suppressed %u messages from %s", rl
- 1, path
);
622 char_array_0(suppress_message
);
624 IOVEC_SET_STRING(suppress_iovec
[j
++], "PRIORITY=5");
625 IOVEC_SET_STRING(suppress_iovec
[j
++], suppress_message
);
627 dispatch_message_real(s
, suppress_iovec
, j
, ELEMENTSOF(suppress_iovec
), NULL
, NULL
);
633 dispatch_message_real(s
, iovec
, n
, m
, ucred
, tv
);
636 static void process_syslog_message(Server
*s
, const char *buf
, struct ucred
*ucred
, struct timeval
*tv
) {
637 char *message
= NULL
, *syslog_priority
= NULL
, *syslog_facility
= NULL
;
638 struct iovec iovec
[19];
640 int priority
= LOG_USER
| LOG_INFO
;
645 parse_syslog_priority((char**) &buf
, &priority
);
646 skip_syslog_date((char**) &buf
);
648 if (asprintf(&syslog_priority
, "PRIORITY=%i", priority
& LOG_PRIMASK
) >= 0)
649 IOVEC_SET_STRING(iovec
[n
++], syslog_priority
);
651 if (asprintf(&syslog_facility
, "SYSLOG_FACILITY=%i", LOG_FAC(priority
)) >= 0)
652 IOVEC_SET_STRING(iovec
[n
++], syslog_facility
);
654 message
= strappend("MESSAGE=", buf
);
656 IOVEC_SET_STRING(iovec
[n
++], message
);
658 dispatch_message(s
, iovec
, n
, ELEMENTSOF(iovec
), ucred
, tv
, priority
& LOG_PRIMASK
);
661 free(syslog_facility
);
662 free(syslog_priority
);
665 static bool valid_user_field(const char *p
, size_t l
) {
668 /* We kinda enforce POSIX syntax recommendations for
669 environment variables here, but make a couple of additional
672 http://pubs.opengroup.org/onlinepubs/000095399/basedefs/xbd_chap08.html */
674 /* No empty field names */
678 /* Don't allow names longer than 64 chars */
682 /* Variables starting with an underscore are protected */
686 /* Don't allow digits as first character */
687 if (p
[0] >= '0' && p
[0] <= '9')
690 /* Only allow A-Z0-9 and '_' */
691 for (a
= p
; a
< p
+ l
; a
++)
692 if (!((*a
>= 'A' && *a
<= 'Z') ||
693 (*a
>= '0' && *a
<= '9') ||
700 static void process_native_message(Server
*s
, const void *buffer
, size_t buffer_size
, struct ucred
*ucred
, struct timeval
*tv
) {
701 struct iovec
*iovec
= NULL
;
702 unsigned n
= 0, m
= 0, j
;
705 int priority
= LOG_INFO
;
708 assert(buffer
|| n
== 0);
711 remaining
= buffer_size
;
713 while (remaining
> 0) {
716 e
= memchr(p
, '\n', remaining
);
719 /* Trailing noise, let's ignore it, and flush what we collected */
720 log_debug("Received message with trailing noise, ignoring.");
725 /* Entry separator */
726 dispatch_message(s
, iovec
, n
, m
, ucred
, tv
, priority
);
735 if (*p
== '.' || *p
== '#') {
736 /* Ignore control commands for now, and
738 remaining
-= (e
- p
) + 1;
743 /* A property follows */
749 u
= MAX((n
+16U) * 2U, 4U);
750 c
= realloc(iovec
, u
* sizeof(struct iovec
));
752 log_error("Out of memory");
760 q
= memchr(p
, '=', e
- p
);
762 if (valid_user_field(p
, q
- p
)) {
763 /* If the field name starts with an
764 * underscore, skip the variable,
765 * since that indidates a trusted
767 iovec
[n
].iov_base
= (char*) p
;
768 iovec
[n
].iov_len
= e
- p
;
771 /* We need to determine the priority
772 * of this entry for the rate limiting
775 memcmp(p
, "PRIORITY=", 10) == 0 &&
778 priority
= p
[10] - '0';
781 remaining
-= (e
- p
) + 1;
788 if (remaining
< e
- p
+ 1 + sizeof(uint64_t) + 1) {
789 log_debug("Failed to parse message, ignoring.");
793 memcpy(&l
, e
+ 1, sizeof(uint64_t));
796 if (remaining
< e
- p
+ 1 + sizeof(uint64_t) + l
+ 1 ||
797 e
[1+sizeof(uint64_t)+l
] != '\n') {
798 log_debug("Failed to parse message, ignoring.");
802 k
= malloc((e
- p
) + 1 + l
);
804 log_error("Out of memory");
810 memcpy(k
+ (e
- p
) + 1, e
+ 1 + sizeof(uint64_t), l
);
812 if (valid_user_field(p
, e
- p
)) {
813 iovec
[n
].iov_base
= k
;
814 iovec
[n
].iov_len
= (e
- p
) + 1 + l
;
819 remaining
-= (e
- p
) + 1 + sizeof(uint64_t) + l
+ 1;
820 p
= e
+ 1 + sizeof(uint64_t) + l
+ 1;
824 dispatch_message(s
, iovec
, n
, m
, ucred
, tv
, priority
);
826 for (j
= 0; j
< n
; j
++)
827 if (iovec
[j
].iov_base
< buffer
||
828 (const uint8_t*) iovec
[j
].iov_base
>= (const uint8_t*) buffer
+ buffer_size
)
829 free(iovec
[j
].iov_base
);
832 static int stdout_stream_log(StdoutStream
*s
, const char *p
, size_t l
) {
833 struct iovec iovec
[18];
834 char *message
= NULL
, *syslog_priority
= NULL
;
842 priority
= s
->priority
;
844 if (s
->priority_prefix
&&
847 p
[1] >= '0' && p
[1] <= '7' &&
850 priority
= p
[1] - '0';
858 if (asprintf(&syslog_priority
, "PRIORITY=%i", priority
) >= 0)
859 IOVEC_SET_STRING(iovec
[n
++], syslog_priority
);
861 tag_len
= s
->tag
? strlen(s
->tag
) + 2: 0;
862 message
= malloc(8 + tag_len
+ l
);
864 memcpy(message
, "MESSAGE=", 8);
867 memcpy(message
+8, s
->tag
, tag_len
-2);
868 memcpy(message
+8+tag_len
-2, ": ", 2);
871 memcpy(message
+8+tag_len
, p
, l
);
872 iovec
[n
].iov_base
= message
;
873 iovec
[n
].iov_len
= 8+tag_len
+l
;
877 dispatch_message(s
->server
, iovec
, n
, ELEMENTSOF(iovec
), &s
->ucred
, NULL
, priority
);
879 if (s
->tee_console
) {
882 console
= open_terminal("/dev/console", O_WRONLY
|O_NOCTTY
|O_CLOEXEC
);
886 IOVEC_SET_STRING(iovec
[n
++], s
->tag
);
887 IOVEC_SET_STRING(iovec
[n
++], ": ");
890 iovec
[n
].iov_base
= (void*) p
;
891 iovec
[n
].iov_len
= l
;
894 IOVEC_SET_STRING(iovec
[n
++], (char*) "\n");
896 writev(console
, iovec
, n
);
901 free(syslog_priority
);
906 static int stdout_stream_line(StdoutStream
*s
, const char *p
, size_t l
) {
910 while (l
> 0 && strchr(WHITESPACE
, *p
)) {
915 while (l
> 0 && strchr(WHITESPACE
, *(p
+l
-1)))
920 case STDOUT_STREAM_TAG
:
923 s
->tag
= strndup(p
, l
);
925 log_error("Out of memory");
930 s
->state
= STDOUT_STREAM_PRIORITY
;
933 case STDOUT_STREAM_PRIORITY
:
934 if (l
!= 1 || *p
< '0' || *p
> '7') {
935 log_warning("Failed to parse log priority line.");
939 s
->priority
= *p
- '0';
940 s
->state
= STDOUT_STREAM_PRIORITY_PREFIX
;
943 case STDOUT_STREAM_PRIORITY_PREFIX
:
944 if (l
!= 1 || *p
< '0' || *p
> '1') {
945 log_warning("Failed to parse priority prefix line.");
949 s
->priority_prefix
= *p
- '0';
950 s
->state
= STDOUT_STREAM_TEE_CONSOLE
;
953 case STDOUT_STREAM_TEE_CONSOLE
:
954 if (l
!= 1 || *p
< '0' || *p
> '1') {
955 log_warning("Failed to parse tee to console line.");
959 s
->tee_console
= *p
- '0';
960 s
->state
= STDOUT_STREAM_RUNNING
;
963 case STDOUT_STREAM_RUNNING
:
964 return stdout_stream_log(s
, p
, l
);
967 assert_not_reached("Unknown stream state");
970 static int stdout_stream_scan(StdoutStream
*s
, bool force_flush
) {
978 remaining
= s
->length
;
983 end
= memchr(p
, '\n', remaining
);
985 if (remaining
>= LINE_MAX
) {
993 r
= stdout_stream_line(s
, p
, end
- p
);
1001 if (force_flush
&& remaining
> 0) {
1002 r
= stdout_stream_line(s
, p
, remaining
);
1010 if (p
> s
->buffer
) {
1011 memmove(s
->buffer
, p
, remaining
);
1012 s
->length
= remaining
;
1018 static int stdout_stream_process(StdoutStream
*s
) {
1024 l
= read(s
->fd
, s
->buffer
+s
->length
, sizeof(s
->buffer
)-1-s
->length
);
1027 if (errno
== EAGAIN
)
1030 log_warning("Failed to read from stream: %m");
1035 r
= stdout_stream_scan(s
, true);
1043 r
= stdout_stream_scan(s
, false);
1051 static void stdout_stream_free(StdoutStream
*s
) {
1055 assert(s
->server
->n_stdout_streams
> 0);
1056 s
->server
->n_stdout_streams
--;
1057 LIST_REMOVE(StdoutStream
, stdout_stream
, s
->server
->stdout_streams
, s
);
1062 epoll_ctl(s
->server
->epoll_fd
, EPOLL_CTL_DEL
, s
->fd
, NULL
);
1064 close_nointr_nofail(s
->fd
);
1071 static int stdout_stream_new(Server
*s
) {
1072 StdoutStream
*stream
;
1075 struct epoll_event ev
;
1079 fd
= accept4(s
->stdout_fd
, NULL
, NULL
, SOCK_NONBLOCK
|SOCK_CLOEXEC
);
1081 if (errno
== EAGAIN
)
1084 log_error("Failed to accept stdout connection: %m");
1088 if (s
->n_stdout_streams
>= STDOUT_STREAMS_MAX
) {
1089 log_warning("Too many stdout streams, refusing connection.");
1090 close_nointr_nofail(fd
);
1094 stream
= new0(StdoutStream
, 1);
1096 log_error("Out of memory.");
1097 close_nointr_nofail(fd
);
1103 len
= sizeof(stream
->ucred
);
1104 if (getsockopt(fd
, SOL_SOCKET
, SO_PEERCRED
, &stream
->ucred
, &len
) < 0) {
1105 log_error("Failed to determine peer credentials: %m");
1110 if (shutdown(fd
, SHUT_WR
) < 0) {
1111 log_error("Failed to shutdown writing side of socket: %m");
1117 ev
.data
.ptr
= stream
;
1118 ev
.events
= EPOLLIN
;
1119 if (epoll_ctl(s
->epoll_fd
, EPOLL_CTL_ADD
, fd
, &ev
) < 0) {
1120 log_error("Failed to add stream to event loop: %m");
1126 LIST_PREPEND(StdoutStream
, stdout_stream
, s
->stdout_streams
, stream
);
1127 s
->n_stdout_streams
++;
1132 stdout_stream_free(stream
);
1136 static int system_journal_open(Server
*s
) {
1142 r
= sd_id128_get_machine(&machine
);
1146 sd_id128_to_string(machine
, ids
);
1148 if (!s
->system_journal
) {
1150 /* First try to create the machine path, but not the prefix */
1151 fn
= strappend("/var/log/journal/", ids
);
1154 (void) mkdir(fn
, 0755);
1157 /* The create the system journal file */
1158 fn
= join("/var/log/journal/", ids
, "/system.journal", NULL
);
1162 r
= journal_file_open(fn
, O_RDWR
|O_CREAT
, 0640, NULL
, &s
->system_journal
);
1166 journal_default_metrics(&s
->system_metrics
, s
->system_journal
->fd
);
1168 s
->system_journal
->metrics
= s
->system_metrics
;
1169 s
->system_journal
->compress
= s
->compress
;
1171 fix_perms(s
->system_journal
, 0);
1174 if (r
!= -ENOENT
&& r
!= -EROFS
)
1175 log_warning("Failed to open system journal: %s", strerror(-r
));
1181 if (!s
->runtime_journal
) {
1183 fn
= join("/run/log/journal/", ids
, "/system.journal", NULL
);
1187 if (s
->system_journal
) {
1189 /* Try to open the runtime journal, but only
1190 * if it already exists, so that we can flush
1191 * it into the system journal */
1193 r
= journal_file_open(fn
, O_RDWR
, 0640, NULL
, &s
->runtime_journal
);
1198 log_warning("Failed to open runtime journal: %s", strerror(-r
));
1205 /* OK, we really need the runtime journal, so create
1206 * it if necessary. */
1208 (void) mkdir_parents(fn
, 0755);
1209 r
= journal_file_open(fn
, O_RDWR
|O_CREAT
, 0640, NULL
, &s
->runtime_journal
);
1213 log_error("Failed to open runtime journal: %s", strerror(-r
));
1218 if (s
->runtime_journal
) {
1219 journal_default_metrics(&s
->runtime_metrics
, s
->runtime_journal
->fd
);
1221 s
->runtime_journal
->metrics
= s
->runtime_metrics
;
1222 s
->runtime_journal
->compress
= s
->compress
;
1224 fix_perms(s
->runtime_journal
, 0);
1231 static int server_flush_to_var(Server
*s
) {
1232 char path
[] = "/run/log/journal/xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
1241 if (!s
->runtime_journal
)
1244 ts
= now(CLOCK_MONOTONIC
);
1245 if (s
->var_available_timestamp
+ RECHECK_VAR_AVAILABLE_USEC
> ts
)
1248 s
->var_available_timestamp
= ts
;
1250 system_journal_open(s
);
1252 if (!s
->system_journal
)
1255 r
= sd_id128_get_machine(&machine
);
1257 log_error("Failed to get machine id: %s", strerror(-r
));
1261 r
= sd_journal_open(&j
, SD_JOURNAL_RUNTIME_ONLY
);
1263 log_error("Failed to read runtime journal: %s", strerror(-r
));
1267 SD_JOURNAL_FOREACH(j
) {
1270 f
= j
->current_file
;
1271 assert(f
&& f
->current_offset
> 0);
1273 r
= journal_file_move_to_object(f
, OBJECT_ENTRY
, f
->current_offset
, &o
);
1275 log_error("Can't read entry: %s", strerror(-r
));
1279 r
= journal_file_copy_entry(f
, s
->system_journal
, o
, f
->current_offset
, NULL
, NULL
, NULL
);
1281 log_info("Allocation limit reached.");
1283 journal_file_post_change(s
->system_journal
);
1287 r
= journal_file_copy_entry(f
, s
->system_journal
, o
, f
->current_offset
, NULL
, NULL
, NULL
);
1291 log_error("Can't write entry: %s", strerror(-r
));
1297 journal_file_post_change(s
->system_journal
);
1299 journal_file_close(s
->runtime_journal
);
1300 s
->runtime_journal
= NULL
;
1303 sd_id128_to_string(machine
, path
+ 17);
1304 rm_rf(path
, false, true, false);
1310 static void forward_syslog(Server
*s
, const void *buffer
, size_t length
, struct ucred
*ucred
, struct timeval
*tv
) {
1311 struct msghdr msghdr
;
1313 struct cmsghdr
*cmsg
;
1315 struct cmsghdr cmsghdr
;
1316 uint8_t buf
[CMSG_SPACE(sizeof(struct ucred
)) +
1317 CMSG_SPACE(sizeof(struct timeval
))];
1319 union sockaddr_union sa
;
1326 iovec
.iov_base
= (void*) buffer
;
1327 iovec
.iov_len
= length
;
1328 msghdr
.msg_iov
= &iovec
;
1329 msghdr
.msg_iovlen
= 1;
1332 sa
.un
.sun_family
= AF_UNIX
;
1333 strncpy(sa
.un
.sun_path
, "/run/systemd/syslog", sizeof(sa
.un
.sun_path
));
1334 msghdr
.msg_name
= &sa
;
1335 msghdr
.msg_namelen
= offsetof(union sockaddr_union
, un
.sun_path
) + strlen(sa
.un
.sun_path
);
1338 msghdr
.msg_control
= &control
;
1339 msghdr
.msg_controllen
= sizeof(control
);
1341 cmsg
= CMSG_FIRSTHDR(&msghdr
);
1342 cmsg
->cmsg_level
= SOL_SOCKET
;
1343 cmsg
->cmsg_type
= SCM_CREDENTIALS
;
1344 cmsg
->cmsg_len
= CMSG_LEN(sizeof(struct ucred
));
1345 memcpy(CMSG_DATA(cmsg
), ucred
, sizeof(struct ucred
));
1346 msghdr
.msg_controllen
= cmsg
->cmsg_len
;
1348 /* Forward the syslog message we received via /dev/log to
1349 * /run/systemd/syslog. Unfortunately we currently can't set
1350 * the SO_TIMESTAMP auxiliary data, and hence we don't. */
1352 if (sendmsg(s
->syslog_fd
, &msghdr
, MSG_NOSIGNAL
) >= 0)
1355 if (errno
== ESRCH
) {
1358 /* Hmm, presumably the sender process vanished
1359 * by now, so let's fix it as good as we
1364 memcpy(CMSG_DATA(cmsg
), &u
, sizeof(struct ucred
));
1366 if (sendmsg(s
->syslog_fd
, &msghdr
, MSG_NOSIGNAL
) >= 0)
1370 log_debug("Failed to forward syslog message: %m");
1373 static int process_event(Server
*s
, struct epoll_event
*ev
) {
1376 if (ev
->data
.fd
== s
->signal_fd
) {
1377 struct signalfd_siginfo sfsi
;
1380 if (ev
->events
!= EPOLLIN
) {
1381 log_info("Got invalid event from epoll.");
1385 n
= read(s
->signal_fd
, &sfsi
, sizeof(sfsi
));
1386 if (n
!= sizeof(sfsi
)) {
1391 if (errno
== EINTR
|| errno
== EAGAIN
)
1397 if (sfsi
.ssi_signo
== SIGUSR1
) {
1398 server_flush_to_var(s
);
1402 log_debug("Received SIG%s", signal_to_string(sfsi
.ssi_signo
));
1405 } else if (ev
->data
.fd
== s
->native_fd
||
1406 ev
->data
.fd
== s
->syslog_fd
) {
1408 if (ev
->events
!= EPOLLIN
) {
1409 log_info("Got invalid event from epoll.");
1414 struct msghdr msghdr
;
1416 struct ucred
*ucred
= NULL
;
1417 struct timeval
*tv
= NULL
;
1418 struct cmsghdr
*cmsg
;
1420 struct cmsghdr cmsghdr
;
1421 uint8_t buf
[CMSG_SPACE(sizeof(struct ucred
)) +
1422 CMSG_SPACE(sizeof(struct timeval
))];
1427 if (ioctl(ev
->data
.fd
, SIOCINQ
, &v
) < 0) {
1428 log_error("SIOCINQ failed: %m");
1435 if (s
->buffer_size
< (size_t) v
) {
1439 l
= MAX(LINE_MAX
+ (size_t) v
, s
->buffer_size
* 2);
1440 b
= realloc(s
->buffer
, l
+1);
1443 log_error("Couldn't increase buffer.");
1452 iovec
.iov_base
= s
->buffer
;
1453 iovec
.iov_len
= s
->buffer_size
;
1457 msghdr
.msg_iov
= &iovec
;
1458 msghdr
.msg_iovlen
= 1;
1459 msghdr
.msg_control
= &control
;
1460 msghdr
.msg_controllen
= sizeof(control
);
1462 n
= recvmsg(ev
->data
.fd
, &msghdr
, MSG_DONTWAIT
);
1465 if (errno
== EINTR
|| errno
== EAGAIN
)
1468 log_error("recvmsg() failed: %m");
1472 for (cmsg
= CMSG_FIRSTHDR(&msghdr
); cmsg
; cmsg
= CMSG_NXTHDR(&msghdr
, cmsg
)) {
1474 if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1475 cmsg
->cmsg_type
== SCM_CREDENTIALS
&&
1476 cmsg
->cmsg_len
== CMSG_LEN(sizeof(struct ucred
)))
1477 ucred
= (struct ucred
*) CMSG_DATA(cmsg
);
1478 else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1479 cmsg
->cmsg_type
== SO_TIMESTAMP
&&
1480 cmsg
->cmsg_len
== CMSG_LEN(sizeof(struct timeval
)))
1481 tv
= (struct timeval
*) CMSG_DATA(cmsg
);
1484 if (ev
->data
.fd
== s
->syslog_fd
) {
1487 e
= memchr(s
->buffer
, '\n', n
);
1493 forward_syslog(s
, s
->buffer
, n
, ucred
, tv
);
1494 process_syslog_message(s
, strstrip(s
->buffer
), ucred
, tv
);
1496 process_native_message(s
, s
->buffer
, n
, ucred
, tv
);
1501 } else if (ev
->data
.fd
== s
->stdout_fd
) {
1503 if (ev
->events
!= EPOLLIN
) {
1504 log_info("Got invalid event from epoll.");
1508 stdout_stream_new(s
);
1512 StdoutStream
*stream
;
1514 if ((ev
->events
|EPOLLIN
|EPOLLHUP
) != (EPOLLIN
|EPOLLHUP
)) {
1515 log_info("Got invalid event from epoll.");
1519 /* If it is none of the well-known fds, it must be an
1520 * stdout stream fd. Note that this is a bit ugly here
1521 * (since we rely that none of the well-known fds
1522 * could be interpreted as pointer), but nonetheless
1523 * safe, since the well-known fds would never get an
1524 * fd > 4096, i.e. beyond the first memory page */
1526 stream
= ev
->data
.ptr
;
1528 if (stdout_stream_process(stream
) <= 0)
1529 stdout_stream_free(stream
);
1534 log_error("Unknown event.");
1538 static int open_syslog_socket(Server
*s
) {
1539 union sockaddr_union sa
;
1541 struct epoll_event ev
;
1546 if (s
->syslog_fd
< 0) {
1548 s
->syslog_fd
= socket(AF_UNIX
, SOCK_DGRAM
|SOCK_CLOEXEC
, 0);
1549 if (s
->syslog_fd
< 0) {
1550 log_error("socket() failed: %m");
1555 sa
.un
.sun_family
= AF_UNIX
;
1556 strncpy(sa
.un
.sun_path
, "/dev/log", sizeof(sa
.un
.sun_path
));
1558 unlink(sa
.un
.sun_path
);
1560 r
= bind(s
->syslog_fd
, &sa
.sa
, offsetof(union sockaddr_union
, un
.sun_path
) + strlen(sa
.un
.sun_path
));
1562 log_error("bind() failed: %m");
1566 chmod(sa
.un
.sun_path
, 0666);
1570 r
= setsockopt(s
->syslog_fd
, SOL_SOCKET
, SO_PASSCRED
, &one
, sizeof(one
));
1572 log_error("SO_PASSCRED failed: %m");
1577 r
= setsockopt(s
->syslog_fd
, SOL_SOCKET
, SO_TIMESTAMP
, &one
, sizeof(one
));
1579 log_error("SO_TIMESTAMP failed: %m");
1583 /* Since we use the same socket for forwarding this to some
1584 * other syslog implementation, make sure we don't hang
1586 timeval_store(&tv
, SYSLOG_TIMEOUT_USEC
);
1587 if (setsockopt(s
->syslog_fd
, SOL_SOCKET
, SO_SNDTIMEO
, &tv
, sizeof(tv
)) < 0) {
1588 log_error("SO_SNDTIMEO failed: %m");
1593 ev
.events
= EPOLLIN
;
1594 ev
.data
.fd
= s
->syslog_fd
;
1595 if (epoll_ctl(s
->epoll_fd
, EPOLL_CTL_ADD
, s
->syslog_fd
, &ev
) < 0) {
1596 log_error("Failed to add syslog server fd to epoll object: %m");
1603 static int open_native_socket(Server
*s
) {
1604 union sockaddr_union sa
;
1606 struct epoll_event ev
;
1610 if (s
->native_fd
< 0) {
1612 s
->native_fd
= socket(AF_UNIX
, SOCK_DGRAM
|SOCK_CLOEXEC
, 0);
1613 if (s
->native_fd
< 0) {
1614 log_error("socket() failed: %m");
1619 sa
.un
.sun_family
= AF_UNIX
;
1620 strncpy(sa
.un
.sun_path
, "/run/systemd/journal", sizeof(sa
.un
.sun_path
));
1622 unlink(sa
.un
.sun_path
);
1624 r
= bind(s
->native_fd
, &sa
.sa
, offsetof(union sockaddr_union
, un
.sun_path
) + strlen(sa
.un
.sun_path
));
1626 log_error("bind() failed: %m");
1630 chmod(sa
.un
.sun_path
, 0666);
1634 r
= setsockopt(s
->native_fd
, SOL_SOCKET
, SO_PASSCRED
, &one
, sizeof(one
));
1636 log_error("SO_PASSCRED failed: %m");
1641 r
= setsockopt(s
->native_fd
, SOL_SOCKET
, SO_TIMESTAMP
, &one
, sizeof(one
));
1643 log_error("SO_TIMESTAMP failed: %m");
1648 ev
.events
= EPOLLIN
;
1649 ev
.data
.fd
= s
->native_fd
;
1650 if (epoll_ctl(s
->epoll_fd
, EPOLL_CTL_ADD
, s
->native_fd
, &ev
) < 0) {
1651 log_error("Failed to add native server fd to epoll object: %m");
1658 static int open_stdout_socket(Server
*s
) {
1659 union sockaddr_union sa
;
1661 struct epoll_event ev
;
1665 if (s
->stdout_fd
< 0) {
1667 s
->stdout_fd
= socket(AF_UNIX
, SOCK_STREAM
|SOCK_CLOEXEC
, 0);
1668 if (s
->stdout_fd
< 0) {
1669 log_error("socket() failed: %m");
1674 sa
.un
.sun_family
= AF_UNIX
;
1675 strncpy(sa
.un
.sun_path
, "/run/systemd/stdout", sizeof(sa
.un
.sun_path
));
1677 unlink(sa
.un
.sun_path
);
1679 r
= bind(s
->stdout_fd
, &sa
.sa
, offsetof(union sockaddr_union
, un
.sun_path
) + strlen(sa
.un
.sun_path
));
1681 log_error("bind() failed: %m");
1685 chmod(sa
.un
.sun_path
, 0666);
1687 if (listen(s
->stdout_fd
, SOMAXCONN
) < 0) {
1688 log_error("liste() failed: %m");
1694 ev
.events
= EPOLLIN
;
1695 ev
.data
.fd
= s
->stdout_fd
;
1696 if (epoll_ctl(s
->epoll_fd
, EPOLL_CTL_ADD
, s
->stdout_fd
, &ev
) < 0) {
1697 log_error("Failed to add stdout server fd to epoll object: %m");
1704 static int open_signalfd(Server
*s
) {
1706 struct epoll_event ev
;
1710 assert_se(sigemptyset(&mask
) == 0);
1711 sigset_add_many(&mask
, SIGINT
, SIGTERM
, SIGUSR1
, -1);
1712 assert_se(sigprocmask(SIG_SETMASK
, &mask
, NULL
) == 0);
1714 s
->signal_fd
= signalfd(-1, &mask
, SFD_NONBLOCK
|SFD_CLOEXEC
);
1715 if (s
->signal_fd
< 0) {
1716 log_error("signalfd(): %m");
1721 ev
.events
= EPOLLIN
;
1722 ev
.data
.fd
= s
->signal_fd
;
1724 if (epoll_ctl(s
->epoll_fd
, EPOLL_CTL_ADD
, s
->signal_fd
, &ev
) < 0) {
1725 log_error("epoll_ctl(): %m");
1732 static int server_parse_config_file(Server
*s
) {
1739 fn
= "/etc/systemd/systemd-journald.conf";
1740 f
= fopen(fn
, "re");
1742 if (errno
== ENOENT
)
1745 log_warning("Failed to open configuration file %s: %m", fn
);
1749 r
= config_parse(fn
, f
, "Journal\0", config_item_perf_lookup
, (void*) journald_gperf_lookup
, false, s
);
1751 log_warning("Failed to parse configuration file: %s", strerror(-r
));
1758 static int server_init(Server
*s
) {
1764 s
->syslog_fd
= s
->native_fd
= s
->stdout_fd
= s
->signal_fd
= s
->epoll_fd
= -1;
1767 s
->rate_limit_interval
= DEFAULT_RATE_LIMIT_INTERVAL
;
1768 s
->rate_limit_burst
= DEFAULT_RATE_LIMIT_BURST
;
1770 memset(&s
->system_metrics
, 0xFF, sizeof(s
->system_metrics
));
1771 memset(&s
->runtime_metrics
, 0xFF, sizeof(s
->runtime_metrics
));
1773 server_parse_config_file(s
);
1775 s
->user_journals
= hashmap_new(trivial_hash_func
, trivial_compare_func
);
1776 if (!s
->user_journals
) {
1777 log_error("Out of memory.");
1781 s
->epoll_fd
= epoll_create1(EPOLL_CLOEXEC
);
1782 if (s
->epoll_fd
< 0) {
1783 log_error("Failed to create epoll object: %m");
1787 n
= sd_listen_fds(true);
1789 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n
));
1793 for (fd
= SD_LISTEN_FDS_START
; fd
< SD_LISTEN_FDS_START
+ n
; fd
++) {
1795 if (sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/run/systemd/native", 0) > 0) {
1797 if (s
->native_fd
>= 0) {
1798 log_error("Too many native sockets passed.");
1804 } else if (sd_is_socket_unix(fd
, SOCK_STREAM
, 1, "/run/systemd/stdout", 0) > 0) {
1806 if (s
->stdout_fd
>= 0) {
1807 log_error("Too many stdout sockets passed.");
1813 } else if (sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/dev/log", 0) > 0) {
1815 if (s
->syslog_fd
>= 0) {
1816 log_error("Too many /dev/log sockets passed.");
1823 log_error("Unknown socket passed.");
1828 r
= open_syslog_socket(s
);
1832 r
= open_native_socket(s
);
1836 r
= open_stdout_socket(s
);
1840 r
= system_journal_open(s
);
1844 r
= open_signalfd(s
);
1848 s
->rate_limit
= journal_rate_limit_new(s
->rate_limit_interval
, s
->rate_limit_burst
);
1855 static void server_done(Server
*s
) {
1859 while (s
->stdout_streams
)
1860 stdout_stream_free(s
->stdout_streams
);
1862 if (s
->system_journal
)
1863 journal_file_close(s
->system_journal
);
1865 if (s
->runtime_journal
)
1866 journal_file_close(s
->runtime_journal
);
1868 while ((f
= hashmap_steal_first(s
->user_journals
)))
1869 journal_file_close(f
);
1871 hashmap_free(s
->user_journals
);
1873 if (s
->epoll_fd
>= 0)
1874 close_nointr_nofail(s
->epoll_fd
);
1876 if (s
->signal_fd
>= 0)
1877 close_nointr_nofail(s
->signal_fd
);
1879 if (s
->syslog_fd
>= 0)
1880 close_nointr_nofail(s
->syslog_fd
);
1882 if (s
->native_fd
>= 0)
1883 close_nointr_nofail(s
->native_fd
);
1885 if (s
->stdout_fd
>= 0)
1886 close_nointr_nofail(s
->stdout_fd
);
1889 journal_rate_limit_free(s
->rate_limit
);
1894 int main(int argc
, char *argv
[]) {
1898 /* if (getppid() != 1) { */
1899 /* log_error("This program should be invoked by init only."); */
1900 /* return EXIT_FAILURE; */
1904 log_error("This program does not take arguments.");
1905 return EXIT_FAILURE
;
1908 log_set_target(LOG_TARGET_CONSOLE
);
1909 log_parse_environment();
1914 r
= server_init(&server
);
1918 server_vacuum(&server
);
1919 server_flush_to_var(&server
);
1921 log_debug("systemd-journald running as pid %lu", (unsigned long) getpid());
1925 "STATUS=Processing requests...");
1928 struct epoll_event event
;
1930 r
= epoll_wait(server
.epoll_fd
, &event
, 1, -1);
1936 log_error("epoll_wait() failed: %m");
1942 r
= process_event(&server
, &event
);
1949 log_debug("systemd-journald stopped as pid %lu", (unsigned long) getpid());
1953 "STATUS=Shutting down...");
1955 server_done(&server
);
1957 return r
< 0 ? EXIT_FAILURE
: EXIT_SUCCESS
;