]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/journal/journald.c
journal: size journal data hash table based on maximum file size metrics
[thirdparty/systemd.git] / src / journal / journald.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2011 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/epoll.h>
23 #include <sys/socket.h>
24 #include <errno.h>
25 #include <sys/signalfd.h>
26 #include <unistd.h>
27 #include <fcntl.h>
28 #include <stddef.h>
29 #include <sys/ioctl.h>
30 #include <linux/sockios.h>
31 #include <sys/statvfs.h>
32
33 #include <systemd/sd-journal.h>
34 #include <systemd/sd-messages.h>
35 #include <systemd/sd-daemon.h>
36
37 #ifdef HAVE_LOGIND
38 #include <systemd/sd-login.h>
39 #endif
40
41 #include "mkdir.h"
42 #include "hashmap.h"
43 #include "journal-file.h"
44 #include "socket-util.h"
45 #include "cgroup-util.h"
46 #include "list.h"
47 #include "journal-rate-limit.h"
48 #include "journal-internal.h"
49 #include "conf-parser.h"
50 #include "journald.h"
51 #include "virt.h"
52 #include "missing.h"
53
54 #ifdef HAVE_ACL
55 #include <sys/acl.h>
56 #include <acl/libacl.h>
57 #include "acl-util.h"
58 #endif
59
60 #ifdef HAVE_SELINUX
61 #include <selinux/selinux.h>
62 #endif
63
64 #define USER_JOURNALS_MAX 1024
65 #define STDOUT_STREAMS_MAX 4096
66
67 #define DEFAULT_RATE_LIMIT_INTERVAL (10*USEC_PER_SEC)
68 #define DEFAULT_RATE_LIMIT_BURST 200
69
70 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
71
72 #define N_IOVEC_META_FIELDS 17
73
74 #define ENTRY_SIZE_MAX (1024*1024*32)
75
76 typedef enum StdoutStreamState {
77 STDOUT_STREAM_IDENTIFIER,
78 STDOUT_STREAM_UNIT_ID,
79 STDOUT_STREAM_PRIORITY,
80 STDOUT_STREAM_LEVEL_PREFIX,
81 STDOUT_STREAM_FORWARD_TO_SYSLOG,
82 STDOUT_STREAM_FORWARD_TO_KMSG,
83 STDOUT_STREAM_FORWARD_TO_CONSOLE,
84 STDOUT_STREAM_RUNNING
85 } StdoutStreamState;
86
87 struct StdoutStream {
88 Server *server;
89 StdoutStreamState state;
90
91 int fd;
92
93 struct ucred ucred;
94 #ifdef HAVE_SELINUX
95 security_context_t security_context;
96 #endif
97
98 char *identifier;
99 char *unit_id;
100 int priority;
101 bool level_prefix:1;
102 bool forward_to_syslog:1;
103 bool forward_to_kmsg:1;
104 bool forward_to_console:1;
105
106 char buffer[LINE_MAX+1];
107 size_t length;
108
109 LIST_FIELDS(StdoutStream, stdout_stream);
110 };
111
112 static const char* const storage_table[] = {
113 [STORAGE_AUTO] = "auto",
114 [STORAGE_VOLATILE] = "volatile",
115 [STORAGE_PERSISTENT] = "persistent",
116 [STORAGE_NONE] = "none"
117 };
118
119 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
120 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
121
122 static uint64_t available_space(Server *s) {
123 char ids[33], *p;
124 const char *f;
125 sd_id128_t machine;
126 struct statvfs ss;
127 uint64_t sum = 0, avail = 0, ss_avail = 0;
128 int r;
129 DIR *d;
130 usec_t ts;
131 JournalMetrics *m;
132
133 ts = now(CLOCK_MONOTONIC);
134
135 if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts)
136 return s->cached_available_space;
137
138 r = sd_id128_get_machine(&machine);
139 if (r < 0)
140 return 0;
141
142 if (s->system_journal) {
143 f = "/var/log/journal/";
144 m = &s->system_metrics;
145 } else {
146 f = "/run/log/journal/";
147 m = &s->runtime_metrics;
148 }
149
150 assert(m);
151
152 p = strappend(f, sd_id128_to_string(machine, ids));
153 if (!p)
154 return 0;
155
156 d = opendir(p);
157 free(p);
158
159 if (!d)
160 return 0;
161
162 if (fstatvfs(dirfd(d), &ss) < 0)
163 goto finish;
164
165 for (;;) {
166 struct stat st;
167 struct dirent buf, *de;
168
169 r = readdir_r(d, &buf, &de);
170 if (r != 0)
171 break;
172
173 if (!de)
174 break;
175
176 if (!endswith(de->d_name, ".journal") &&
177 !endswith(de->d_name, ".journal~"))
178 continue;
179
180 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
181 continue;
182
183 if (!S_ISREG(st.st_mode))
184 continue;
185
186 sum += (uint64_t) st.st_blocks * 512UL;
187 }
188
189 avail = sum >= m->max_use ? 0 : m->max_use - sum;
190
191 ss_avail = ss.f_bsize * ss.f_bavail;
192
193 ss_avail = ss_avail < m->keep_free ? 0 : ss_avail - m->keep_free;
194
195 if (ss_avail < avail)
196 avail = ss_avail;
197
198 s->cached_available_space = avail;
199 s->cached_available_space_timestamp = ts;
200
201 finish:
202 closedir(d);
203
204 return avail;
205 }
206
207 static void server_read_file_gid(Server *s) {
208 const char *adm = "adm";
209 int r;
210
211 assert(s);
212
213 if (s->file_gid_valid)
214 return;
215
216 r = get_group_creds(&adm, &s->file_gid);
217 if (r < 0)
218 log_warning("Failed to resolve 'adm' group: %s", strerror(-r));
219
220 /* if we couldn't read the gid, then it will be 0, but that's
221 * fine and we shouldn't try to resolve the group again, so
222 * let's just pretend it worked right-away. */
223 s->file_gid_valid = true;
224 }
225
226 static void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
227 int r;
228 #ifdef HAVE_ACL
229 acl_t acl;
230 acl_entry_t entry;
231 acl_permset_t permset;
232 #endif
233
234 assert(f);
235
236 server_read_file_gid(s);
237
238 r = fchmod_and_fchown(f->fd, 0640, 0, s->file_gid);
239 if (r < 0)
240 log_warning("Failed to fix access mode/rights on %s, ignoring: %s", f->path, strerror(-r));
241
242 #ifdef HAVE_ACL
243 if (uid <= 0)
244 return;
245
246 acl = acl_get_fd(f->fd);
247 if (!acl) {
248 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
249 return;
250 }
251
252 r = acl_find_uid(acl, uid, &entry);
253 if (r <= 0) {
254
255 if (acl_create_entry(&acl, &entry) < 0 ||
256 acl_set_tag_type(entry, ACL_USER) < 0 ||
257 acl_set_qualifier(entry, &uid) < 0) {
258 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
259 goto finish;
260 }
261 }
262
263 if (acl_get_permset(entry, &permset) < 0 ||
264 acl_add_perm(permset, ACL_READ) < 0 ||
265 acl_calc_mask(&acl) < 0) {
266 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
267 goto finish;
268 }
269
270 if (acl_set_fd(f->fd, acl) < 0)
271 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
272
273 finish:
274 acl_free(acl);
275 #endif
276 }
277
278 static JournalFile* find_journal(Server *s, uid_t uid) {
279 char *p;
280 int r;
281 JournalFile *f;
282 char ids[33];
283 sd_id128_t machine;
284
285 assert(s);
286
287 /* We split up user logs only on /var, not on /run. If the
288 * runtime file is open, we write to it exclusively, in order
289 * to guarantee proper order as soon as we flush /run to
290 * /var and close the runtime file. */
291
292 if (s->runtime_journal)
293 return s->runtime_journal;
294
295 if (uid <= 0)
296 return s->system_journal;
297
298 r = sd_id128_get_machine(&machine);
299 if (r < 0)
300 return s->system_journal;
301
302 f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
303 if (f)
304 return f;
305
306 if (asprintf(&p, "/var/log/journal/%s/user-%lu.journal", sd_id128_to_string(machine, ids), (unsigned long) uid) < 0)
307 return s->system_journal;
308
309 while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
310 /* Too many open? Then let's close one */
311 f = hashmap_steal_first(s->user_journals);
312 assert(f);
313 journal_file_close(f);
314 }
315
316 r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, &s->system_metrics, s->system_journal, &f);
317 free(p);
318
319 if (r < 0)
320 return s->system_journal;
321
322 server_fix_perms(s, f, uid);
323
324 r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
325 if (r < 0) {
326 journal_file_close(f);
327 return s->system_journal;
328 }
329
330 return f;
331 }
332
333 static void server_rotate(Server *s) {
334 JournalFile *f;
335 void *k;
336 Iterator i;
337 int r;
338
339 log_info("Rotating...");
340
341 if (s->runtime_journal) {
342 r = journal_file_rotate(&s->runtime_journal);
343 if (r < 0)
344 if (s->runtime_journal)
345 log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
346 else
347 log_error("Failed to create new runtime journal: %s", strerror(-r));
348 else
349 server_fix_perms(s, s->runtime_journal, 0);
350 }
351
352 if (s->system_journal) {
353 r = journal_file_rotate(&s->system_journal);
354 if (r < 0)
355 if (s->system_journal)
356 log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
357 else
358 log_error("Failed to create new system journal: %s", strerror(-r));
359
360 else
361 server_fix_perms(s, s->system_journal, 0);
362 }
363
364 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
365 r = journal_file_rotate(&f);
366 if (r < 0)
367 if (f->path)
368 log_error("Failed to rotate %s: %s", f->path, strerror(-r));
369 else
370 log_error("Failed to create user journal: %s", strerror(-r));
371 else {
372 hashmap_replace(s->user_journals, k, f);
373 server_fix_perms(s, s->system_journal, PTR_TO_UINT32(k));
374 }
375 }
376 }
377
378 static void server_vacuum(Server *s) {
379 char *p;
380 char ids[33];
381 sd_id128_t machine;
382 int r;
383
384 log_info("Vacuuming...");
385
386 r = sd_id128_get_machine(&machine);
387 if (r < 0) {
388 log_error("Failed to get machine ID: %s", strerror(-r));
389 return;
390 }
391
392 sd_id128_to_string(machine, ids);
393
394 if (s->system_journal) {
395 if (asprintf(&p, "/var/log/journal/%s", ids) < 0) {
396 log_error("Out of memory.");
397 return;
398 }
399
400 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free);
401 if (r < 0 && r != -ENOENT)
402 log_error("Failed to vacuum %s: %s", p, strerror(-r));
403 free(p);
404 }
405
406 if (s->runtime_journal) {
407 if (asprintf(&p, "/run/log/journal/%s", ids) < 0) {
408 log_error("Out of memory.");
409 return;
410 }
411
412 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free);
413 if (r < 0 && r != -ENOENT)
414 log_error("Failed to vacuum %s: %s", p, strerror(-r));
415 free(p);
416 }
417
418 s->cached_available_space_timestamp = 0;
419 }
420
421 static char *shortened_cgroup_path(pid_t pid) {
422 int r;
423 char *process_path, *init_path, *path;
424
425 assert(pid > 0);
426
427 r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, pid, &process_path);
428 if (r < 0)
429 return NULL;
430
431 r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 1, &init_path);
432 if (r < 0) {
433 free(process_path);
434 return NULL;
435 }
436
437 if (endswith(init_path, "/system"))
438 init_path[strlen(init_path) - 7] = 0;
439 else if (streq(init_path, "/"))
440 init_path[0] = 0;
441
442 if (startswith(process_path, init_path)) {
443 char *p;
444
445 p = strdup(process_path + strlen(init_path));
446 if (!p) {
447 free(process_path);
448 free(init_path);
449 return NULL;
450 }
451 path = p;
452 } else {
453 path = process_path;
454 process_path = NULL;
455 }
456
457 free(process_path);
458 free(init_path);
459
460 return path;
461 }
462
463 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n) {
464 JournalFile *f;
465 bool vacuumed = false;
466 int r;
467
468 assert(s);
469 assert(iovec);
470 assert(n > 0);
471
472 f = find_journal(s, uid);
473 if (!f)
474 return;
475
476 if (journal_file_rotate_suggested(f)) {
477 log_info("Journal header limits reached or header out-of-date, rotating.");
478 server_rotate(s);
479 server_vacuum(s);
480 vacuumed = true;
481 }
482
483 for (;;) {
484 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
485 if (r >= 0)
486 return;
487
488 if (vacuumed ||
489 (r != -E2BIG && /* hit limit */
490 r != -EFBIG && /* hit fs limit */
491 r != -EDQUOT && /* quota hit */
492 r != -ENOSPC && /* disk full */
493 r != -EBADMSG && /* corrupted */
494 r != -ENODATA && /* truncated */
495 r != -EHOSTDOWN && /* other machine */
496 r != -EPROTONOSUPPORT && /* unsupported feature */
497 r != -EBUSY && /* unclean shutdown */
498 r != -ESHUTDOWN /* already archived */)) {
499 log_error("Failed to write entry, ignoring: %s", strerror(-r));
500 return;
501 }
502
503 if (r == -E2BIG || r == -EFBIG || r == EDQUOT || r == ENOSPC)
504 log_info("Allocation limit reached, rotating.");
505 else if (r == -EHOSTDOWN)
506 log_info("Journal file from other machine, rotating.");
507 else if (r == -EBUSY)
508 log_info("Unlcean shutdown, rotating.");
509 else
510 log_warning("Journal file corrupted, rotating.");
511
512 server_rotate(s);
513 server_vacuum(s);
514 vacuumed = true;
515
516 log_info("Retrying write.");
517 }
518 }
519
520 static void dispatch_message_real(
521 Server *s,
522 struct iovec *iovec, unsigned n, unsigned m,
523 struct ucred *ucred,
524 struct timeval *tv,
525 const char *label, size_t label_len,
526 const char *unit_id) {
527
528 char *pid = NULL, *uid = NULL, *gid = NULL,
529 *source_time = NULL, *boot_id = NULL, *machine_id = NULL,
530 *comm = NULL, *cmdline = NULL, *hostname = NULL,
531 *audit_session = NULL, *audit_loginuid = NULL,
532 *exe = NULL, *cgroup = NULL, *session = NULL,
533 *owner_uid = NULL, *unit = NULL, *selinux_context = NULL;
534
535 char idbuf[33];
536 sd_id128_t id;
537 int r;
538 char *t;
539 uid_t loginuid = 0, realuid = 0;
540
541 assert(s);
542 assert(iovec);
543 assert(n > 0);
544 assert(n + N_IOVEC_META_FIELDS <= m);
545
546 if (ucred) {
547 uint32_t audit;
548 #ifdef HAVE_LOGIND
549 uid_t owner;
550 #endif
551
552 realuid = ucred->uid;
553
554 if (asprintf(&pid, "_PID=%lu", (unsigned long) ucred->pid) >= 0)
555 IOVEC_SET_STRING(iovec[n++], pid);
556
557 if (asprintf(&uid, "_UID=%lu", (unsigned long) ucred->uid) >= 0)
558 IOVEC_SET_STRING(iovec[n++], uid);
559
560 if (asprintf(&gid, "_GID=%lu", (unsigned long) ucred->gid) >= 0)
561 IOVEC_SET_STRING(iovec[n++], gid);
562
563 r = get_process_comm(ucred->pid, &t);
564 if (r >= 0) {
565 comm = strappend("_COMM=", t);
566 free(t);
567
568 if (comm)
569 IOVEC_SET_STRING(iovec[n++], comm);
570 }
571
572 r = get_process_exe(ucred->pid, &t);
573 if (r >= 0) {
574 exe = strappend("_EXE=", t);
575 free(t);
576
577 if (exe)
578 IOVEC_SET_STRING(iovec[n++], exe);
579 }
580
581 r = get_process_cmdline(ucred->pid, LINE_MAX, false, &t);
582 if (r >= 0) {
583 cmdline = strappend("_CMDLINE=", t);
584 free(t);
585
586 if (cmdline)
587 IOVEC_SET_STRING(iovec[n++], cmdline);
588 }
589
590 r = audit_session_from_pid(ucred->pid, &audit);
591 if (r >= 0)
592 if (asprintf(&audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit) >= 0)
593 IOVEC_SET_STRING(iovec[n++], audit_session);
594
595 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
596 if (r >= 0)
597 if (asprintf(&audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid) >= 0)
598 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
599
600 t = shortened_cgroup_path(ucred->pid);
601 if (t) {
602 cgroup = strappend("_SYSTEMD_CGROUP=", t);
603 free(t);
604
605 if (cgroup)
606 IOVEC_SET_STRING(iovec[n++], cgroup);
607 }
608
609 #ifdef HAVE_LOGIND
610 if (sd_pid_get_session(ucred->pid, &t) >= 0) {
611 session = strappend("_SYSTEMD_SESSION=", t);
612 free(t);
613
614 if (session)
615 IOVEC_SET_STRING(iovec[n++], session);
616 }
617
618 if (sd_pid_get_owner_uid(ucred->uid, &owner) >= 0)
619 if (asprintf(&owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner) >= 0)
620 IOVEC_SET_STRING(iovec[n++], owner_uid);
621 #endif
622
623 if (cg_pid_get_unit(ucred->pid, &t) >= 0) {
624 unit = strappend("_SYSTEMD_UNIT=", t);
625 free(t);
626 } else if (unit_id)
627 unit = strappend("_SYSTEMD_UNIT=", unit_id);
628
629 if (unit)
630 IOVEC_SET_STRING(iovec[n++], unit);
631
632 #ifdef HAVE_SELINUX
633 if (label) {
634 selinux_context = malloc(sizeof("_SELINUX_CONTEXT=") + label_len);
635 if (selinux_context) {
636 memcpy(selinux_context, "_SELINUX_CONTEXT=", sizeof("_SELINUX_CONTEXT=")-1);
637 memcpy(selinux_context+sizeof("_SELINUX_CONTEXT=")-1, label, label_len);
638 selinux_context[sizeof("_SELINUX_CONTEXT=")-1+label_len] = 0;
639 IOVEC_SET_STRING(iovec[n++], selinux_context);
640 }
641 } else {
642 security_context_t con;
643
644 if (getpidcon(ucred->pid, &con) >= 0) {
645 selinux_context = strappend("_SELINUX_CONTEXT=", con);
646 if (selinux_context)
647 IOVEC_SET_STRING(iovec[n++], selinux_context);
648
649 freecon(con);
650 }
651 }
652 #endif
653 }
654
655 if (tv) {
656 if (asprintf(&source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu",
657 (unsigned long long) timeval_load(tv)) >= 0)
658 IOVEC_SET_STRING(iovec[n++], source_time);
659 }
660
661 /* Note that strictly speaking storing the boot id here is
662 * redundant since the entry includes this in-line
663 * anyway. However, we need this indexed, too. */
664 r = sd_id128_get_boot(&id);
665 if (r >= 0)
666 if (asprintf(&boot_id, "_BOOT_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
667 IOVEC_SET_STRING(iovec[n++], boot_id);
668
669 r = sd_id128_get_machine(&id);
670 if (r >= 0)
671 if (asprintf(&machine_id, "_MACHINE_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
672 IOVEC_SET_STRING(iovec[n++], machine_id);
673
674 t = gethostname_malloc();
675 if (t) {
676 hostname = strappend("_HOSTNAME=", t);
677 free(t);
678 if (hostname)
679 IOVEC_SET_STRING(iovec[n++], hostname);
680 }
681
682 assert(n <= m);
683
684 write_to_journal(s, realuid == 0 ? 0 : loginuid, iovec, n);
685
686 free(pid);
687 free(uid);
688 free(gid);
689 free(comm);
690 free(exe);
691 free(cmdline);
692 free(source_time);
693 free(boot_id);
694 free(machine_id);
695 free(hostname);
696 free(audit_session);
697 free(audit_loginuid);
698 free(cgroup);
699 free(session);
700 free(owner_uid);
701 free(unit);
702 free(selinux_context);
703 }
704
705 static void driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
706 char mid[11 + 32 + 1];
707 char buffer[16 + LINE_MAX + 1];
708 struct iovec iovec[N_IOVEC_META_FIELDS + 4];
709 int n = 0;
710 va_list ap;
711 struct ucred ucred;
712
713 assert(s);
714 assert(format);
715
716 IOVEC_SET_STRING(iovec[n++], "PRIORITY=5");
717 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
718
719 memcpy(buffer, "MESSAGE=", 8);
720 va_start(ap, format);
721 vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
722 va_end(ap);
723 char_array_0(buffer);
724 IOVEC_SET_STRING(iovec[n++], buffer);
725
726 snprintf(mid, sizeof(mid), "MESSAGE_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(message_id));
727 char_array_0(mid);
728 IOVEC_SET_STRING(iovec[n++], mid);
729
730 zero(ucred);
731 ucred.pid = getpid();
732 ucred.uid = getuid();
733 ucred.gid = getgid();
734
735 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL);
736 }
737
738 static void dispatch_message(Server *s,
739 struct iovec *iovec, unsigned n, unsigned m,
740 struct ucred *ucred,
741 struct timeval *tv,
742 const char *label, size_t label_len,
743 const char *unit_id,
744 int priority) {
745 int rl;
746 char *path = NULL, *c;
747
748 assert(s);
749 assert(iovec || n == 0);
750
751 if (n == 0)
752 return;
753
754 if (LOG_PRI(priority) > s->max_level_store)
755 return;
756
757 if (!ucred)
758 goto finish;
759
760 path = shortened_cgroup_path(ucred->pid);
761 if (!path)
762 goto finish;
763
764 /* example: /user/lennart/3/foobar
765 * /system/dbus.service/foobar
766 *
767 * So let's cut of everything past the third /, since that is
768 * wher user directories start */
769
770 c = strchr(path, '/');
771 if (c) {
772 c = strchr(c+1, '/');
773 if (c) {
774 c = strchr(c+1, '/');
775 if (c)
776 *c = 0;
777 }
778 }
779
780 rl = journal_rate_limit_test(s->rate_limit, path, priority & LOG_PRIMASK, available_space(s));
781
782 if (rl == 0) {
783 free(path);
784 return;
785 }
786
787 /* Write a suppression message if we suppressed something */
788 if (rl > 1)
789 driver_message(s, SD_MESSAGE_JOURNAL_DROPPED, "Suppressed %u messages from %s", rl - 1, path);
790
791 free(path);
792
793 finish:
794 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id);
795 }
796
797 static void forward_syslog_iovec(Server *s, const struct iovec *iovec, unsigned n_iovec, struct ucred *ucred, struct timeval *tv) {
798 struct msghdr msghdr;
799 struct cmsghdr *cmsg;
800 union {
801 struct cmsghdr cmsghdr;
802 uint8_t buf[CMSG_SPACE(sizeof(struct ucred))];
803 } control;
804 union sockaddr_union sa;
805
806 assert(s);
807 assert(iovec);
808 assert(n_iovec > 0);
809
810 zero(msghdr);
811 msghdr.msg_iov = (struct iovec*) iovec;
812 msghdr.msg_iovlen = n_iovec;
813
814 zero(sa);
815 sa.un.sun_family = AF_UNIX;
816 strncpy(sa.un.sun_path, "/run/systemd/journal/syslog", sizeof(sa.un.sun_path));
817 msghdr.msg_name = &sa;
818 msghdr.msg_namelen = offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path);
819
820 if (ucred) {
821 zero(control);
822 msghdr.msg_control = &control;
823 msghdr.msg_controllen = sizeof(control);
824
825 cmsg = CMSG_FIRSTHDR(&msghdr);
826 cmsg->cmsg_level = SOL_SOCKET;
827 cmsg->cmsg_type = SCM_CREDENTIALS;
828 cmsg->cmsg_len = CMSG_LEN(sizeof(struct ucred));
829 memcpy(CMSG_DATA(cmsg), ucred, sizeof(struct ucred));
830 msghdr.msg_controllen = cmsg->cmsg_len;
831 }
832
833 /* Forward the syslog message we received via /dev/log to
834 * /run/systemd/syslog. Unfortunately we currently can't set
835 * the SO_TIMESTAMP auxiliary data, and hence we don't. */
836
837 if (sendmsg(s->syslog_fd, &msghdr, MSG_NOSIGNAL) >= 0)
838 return;
839
840 /* The socket is full? I guess the syslog implementation is
841 * too slow, and we shouldn't wait for that... */
842 if (errno == EAGAIN)
843 return;
844
845 if (ucred && errno == ESRCH) {
846 struct ucred u;
847
848 /* Hmm, presumably the sender process vanished
849 * by now, so let's fix it as good as we
850 * can, and retry */
851
852 u = *ucred;
853 u.pid = getpid();
854 memcpy(CMSG_DATA(cmsg), &u, sizeof(struct ucred));
855
856 if (sendmsg(s->syslog_fd, &msghdr, MSG_NOSIGNAL) >= 0)
857 return;
858
859 if (errno == EAGAIN)
860 return;
861 }
862
863 log_debug("Failed to forward syslog message: %m");
864 }
865
866 static void forward_syslog_raw(Server *s, int priority, const char *buffer, struct ucred *ucred, struct timeval *tv) {
867 struct iovec iovec;
868
869 assert(s);
870 assert(buffer);
871
872 if (LOG_PRI(priority) > s->max_level_syslog)
873 return;
874
875 IOVEC_SET_STRING(iovec, buffer);
876 forward_syslog_iovec(s, &iovec, 1, ucred, tv);
877 }
878
879 static void forward_syslog(Server *s, int priority, const char *identifier, const char *message, struct ucred *ucred, struct timeval *tv) {
880 struct iovec iovec[5];
881 char header_priority[6], header_time[64], header_pid[16];
882 int n = 0;
883 time_t t;
884 struct tm *tm;
885 char *ident_buf = NULL;
886
887 assert(s);
888 assert(priority >= 0);
889 assert(priority <= 999);
890 assert(message);
891
892 if (LOG_PRI(priority) > s->max_level_syslog)
893 return;
894
895 /* First: priority field */
896 snprintf(header_priority, sizeof(header_priority), "<%i>", priority);
897 char_array_0(header_priority);
898 IOVEC_SET_STRING(iovec[n++], header_priority);
899
900 /* Second: timestamp */
901 t = tv ? tv->tv_sec : ((time_t) (now(CLOCK_REALTIME) / USEC_PER_SEC));
902 tm = localtime(&t);
903 if (!tm)
904 return;
905 if (strftime(header_time, sizeof(header_time), "%h %e %T ", tm) <= 0)
906 return;
907 IOVEC_SET_STRING(iovec[n++], header_time);
908
909 /* Third: identifier and PID */
910 if (ucred) {
911 if (!identifier) {
912 get_process_comm(ucred->pid, &ident_buf);
913 identifier = ident_buf;
914 }
915
916 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
917 char_array_0(header_pid);
918
919 if (identifier)
920 IOVEC_SET_STRING(iovec[n++], identifier);
921
922 IOVEC_SET_STRING(iovec[n++], header_pid);
923 } else if (identifier) {
924 IOVEC_SET_STRING(iovec[n++], identifier);
925 IOVEC_SET_STRING(iovec[n++], ": ");
926 }
927
928 /* Fourth: message */
929 IOVEC_SET_STRING(iovec[n++], message);
930
931 forward_syslog_iovec(s, iovec, n, ucred, tv);
932
933 free(ident_buf);
934 }
935
936 static int fixup_priority(int priority) {
937
938 if ((priority & LOG_FACMASK) == 0)
939 return (priority & LOG_PRIMASK) | LOG_USER;
940
941 return priority;
942 }
943
944 static void forward_kmsg(Server *s, int priority, const char *identifier, const char *message, struct ucred *ucred) {
945 struct iovec iovec[5];
946 char header_priority[6], header_pid[16];
947 int n = 0;
948 char *ident_buf = NULL;
949 int fd;
950
951 assert(s);
952 assert(priority >= 0);
953 assert(priority <= 999);
954 assert(message);
955
956 if (LOG_PRI(priority) > s->max_level_kmsg)
957 return;
958
959 /* Never allow messages with kernel facility to be written to
960 * kmsg, regardless where the data comes from. */
961 priority = fixup_priority(priority);
962
963 /* First: priority field */
964 snprintf(header_priority, sizeof(header_priority), "<%i>", priority);
965 char_array_0(header_priority);
966 IOVEC_SET_STRING(iovec[n++], header_priority);
967
968 /* Second: identifier and PID */
969 if (ucred) {
970 if (!identifier) {
971 get_process_comm(ucred->pid, &ident_buf);
972 identifier = ident_buf;
973 }
974
975 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
976 char_array_0(header_pid);
977
978 if (identifier)
979 IOVEC_SET_STRING(iovec[n++], identifier);
980
981 IOVEC_SET_STRING(iovec[n++], header_pid);
982 } else if (identifier) {
983 IOVEC_SET_STRING(iovec[n++], identifier);
984 IOVEC_SET_STRING(iovec[n++], ": ");
985 }
986
987 /* Fourth: message */
988 IOVEC_SET_STRING(iovec[n++], message);
989 IOVEC_SET_STRING(iovec[n++], "\n");
990
991 fd = open("/dev/kmsg", O_WRONLY|O_NOCTTY|O_CLOEXEC);
992 if (fd < 0) {
993 log_debug("Failed to open /dev/kmsg for logging: %s", strerror(errno));
994 goto finish;
995 }
996
997 if (writev(fd, iovec, n) < 0)
998 log_debug("Failed to write to /dev/kmsg for logging: %s", strerror(errno));
999
1000 close_nointr_nofail(fd);
1001
1002 finish:
1003 free(ident_buf);
1004 }
1005
1006 static void forward_console(Server *s, int priority, const char *identifier, const char *message, struct ucred *ucred) {
1007 struct iovec iovec[4];
1008 char header_pid[16];
1009 int n = 0, fd;
1010 char *ident_buf = NULL;
1011 const char *tty;
1012
1013 assert(s);
1014 assert(message);
1015
1016 if (LOG_PRI(priority) > s->max_level_console)
1017 return;
1018
1019 /* First: identifier and PID */
1020 if (ucred) {
1021 if (!identifier) {
1022 get_process_comm(ucred->pid, &ident_buf);
1023 identifier = ident_buf;
1024 }
1025
1026 snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
1027 char_array_0(header_pid);
1028
1029 if (identifier)
1030 IOVEC_SET_STRING(iovec[n++], identifier);
1031
1032 IOVEC_SET_STRING(iovec[n++], header_pid);
1033 } else if (identifier) {
1034 IOVEC_SET_STRING(iovec[n++], identifier);
1035 IOVEC_SET_STRING(iovec[n++], ": ");
1036 }
1037
1038 /* Third: message */
1039 IOVEC_SET_STRING(iovec[n++], message);
1040 IOVEC_SET_STRING(iovec[n++], "\n");
1041
1042 tty = s->tty_path ? s->tty_path : "/dev/console";
1043
1044 fd = open_terminal(tty, O_WRONLY|O_NOCTTY|O_CLOEXEC);
1045 if (fd < 0) {
1046 log_debug("Failed to open %s for logging: %s", tty, strerror(errno));
1047 goto finish;
1048 }
1049
1050 if (writev(fd, iovec, n) < 0)
1051 log_debug("Failed to write to %s for logging: %s", tty, strerror(errno));
1052
1053 close_nointr_nofail(fd);
1054
1055 finish:
1056 free(ident_buf);
1057 }
1058
1059 static void read_identifier(const char **buf, char **identifier, char **pid) {
1060 const char *p;
1061 char *t;
1062 size_t l, e;
1063
1064 assert(buf);
1065 assert(identifier);
1066 assert(pid);
1067
1068 p = *buf;
1069
1070 p += strspn(p, WHITESPACE);
1071 l = strcspn(p, WHITESPACE);
1072
1073 if (l <= 0 ||
1074 p[l-1] != ':')
1075 return;
1076
1077 e = l;
1078 l--;
1079
1080 if (p[l-1] == ']') {
1081 size_t k = l-1;
1082
1083 for (;;) {
1084
1085 if (p[k] == '[') {
1086 t = strndup(p+k+1, l-k-2);
1087 if (t)
1088 *pid = t;
1089
1090 l = k;
1091 break;
1092 }
1093
1094 if (k == 0)
1095 break;
1096
1097 k--;
1098 }
1099 }
1100
1101 t = strndup(p, l);
1102 if (t)
1103 *identifier = t;
1104
1105 *buf = p + e;
1106 *buf += strspn(*buf, WHITESPACE);
1107 }
1108
1109 static void process_syslog_message(Server *s, const char *buf, struct ucred *ucred, struct timeval *tv, const char *label, size_t label_len) {
1110 char *message = NULL, *syslog_priority = NULL, *syslog_facility = NULL, *syslog_identifier = NULL, *syslog_pid = NULL;
1111 struct iovec iovec[N_IOVEC_META_FIELDS + 6];
1112 unsigned n = 0;
1113 int priority = LOG_USER | LOG_INFO;
1114 char *identifier = NULL, *pid = NULL;
1115 const char *orig;
1116
1117 assert(s);
1118 assert(buf);
1119
1120 orig = buf;
1121 parse_syslog_priority((char**) &buf, &priority);
1122
1123 if (s->forward_to_syslog)
1124 forward_syslog_raw(s, priority, orig, ucred, tv);
1125
1126 skip_syslog_date((char**) &buf);
1127 read_identifier(&buf, &identifier, &pid);
1128
1129 if (s->forward_to_kmsg)
1130 forward_kmsg(s, priority, identifier, buf, ucred);
1131
1132 if (s->forward_to_console)
1133 forward_console(s, priority, identifier, buf, ucred);
1134
1135 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=syslog");
1136
1137 if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1138 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1139
1140 if (priority & LOG_FACMASK)
1141 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1142 IOVEC_SET_STRING(iovec[n++], syslog_facility);
1143
1144 if (identifier) {
1145 syslog_identifier = strappend("SYSLOG_IDENTIFIER=", identifier);
1146 if (syslog_identifier)
1147 IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1148 }
1149
1150 if (pid) {
1151 syslog_pid = strappend("SYSLOG_PID=", pid);
1152 if (syslog_pid)
1153 IOVEC_SET_STRING(iovec[n++], syslog_pid);
1154 }
1155
1156 message = strappend("MESSAGE=", buf);
1157 if (message)
1158 IOVEC_SET_STRING(iovec[n++], message);
1159
1160 dispatch_message(s, iovec, n, ELEMENTSOF(iovec), ucred, tv, label, label_len, NULL, priority);
1161
1162 free(message);
1163 free(identifier);
1164 free(pid);
1165 free(syslog_priority);
1166 free(syslog_facility);
1167 free(syslog_identifier);
1168 }
1169
1170 static bool valid_user_field(const char *p, size_t l) {
1171 const char *a;
1172
1173 /* We kinda enforce POSIX syntax recommendations for
1174 environment variables here, but make a couple of additional
1175 requirements.
1176
1177 http://pubs.opengroup.org/onlinepubs/000095399/basedefs/xbd_chap08.html */
1178
1179 /* No empty field names */
1180 if (l <= 0)
1181 return false;
1182
1183 /* Don't allow names longer than 64 chars */
1184 if (l > 64)
1185 return false;
1186
1187 /* Variables starting with an underscore are protected */
1188 if (p[0] == '_')
1189 return false;
1190
1191 /* Don't allow digits as first character */
1192 if (p[0] >= '0' && p[0] <= '9')
1193 return false;
1194
1195 /* Only allow A-Z0-9 and '_' */
1196 for (a = p; a < p + l; a++)
1197 if (!((*a >= 'A' && *a <= 'Z') ||
1198 (*a >= '0' && *a <= '9') ||
1199 *a == '_'))
1200 return false;
1201
1202 return true;
1203 }
1204
1205 static void process_native_message(
1206 Server *s,
1207 const void *buffer, size_t buffer_size,
1208 struct ucred *ucred,
1209 struct timeval *tv,
1210 const char *label, size_t label_len) {
1211
1212 struct iovec *iovec = NULL;
1213 unsigned n = 0, m = 0, j, tn = (unsigned) -1;
1214 const char *p;
1215 size_t remaining;
1216 int priority = LOG_INFO;
1217 char *identifier = NULL, *message = NULL;
1218
1219 assert(s);
1220 assert(buffer || buffer_size == 0);
1221
1222 p = buffer;
1223 remaining = buffer_size;
1224
1225 while (remaining > 0) {
1226 const char *e, *q;
1227
1228 e = memchr(p, '\n', remaining);
1229
1230 if (!e) {
1231 /* Trailing noise, let's ignore it, and flush what we collected */
1232 log_debug("Received message with trailing noise, ignoring.");
1233 break;
1234 }
1235
1236 if (e == p) {
1237 /* Entry separator */
1238 dispatch_message(s, iovec, n, m, ucred, tv, label, label_len, NULL, priority);
1239 n = 0;
1240 priority = LOG_INFO;
1241
1242 p++;
1243 remaining--;
1244 continue;
1245 }
1246
1247 if (*p == '.' || *p == '#') {
1248 /* Ignore control commands for now, and
1249 * comments too. */
1250 remaining -= (e - p) + 1;
1251 p = e + 1;
1252 continue;
1253 }
1254
1255 /* A property follows */
1256
1257 if (n+N_IOVEC_META_FIELDS >= m) {
1258 struct iovec *c;
1259 unsigned u;
1260
1261 u = MAX((n+N_IOVEC_META_FIELDS+1) * 2U, 4U);
1262 c = realloc(iovec, u * sizeof(struct iovec));
1263 if (!c) {
1264 log_error("Out of memory");
1265 break;
1266 }
1267
1268 iovec = c;
1269 m = u;
1270 }
1271
1272 q = memchr(p, '=', e - p);
1273 if (q) {
1274 if (valid_user_field(p, q - p)) {
1275 size_t l;
1276
1277 l = e - p;
1278
1279 /* If the field name starts with an
1280 * underscore, skip the variable,
1281 * since that indidates a trusted
1282 * field */
1283 iovec[n].iov_base = (char*) p;
1284 iovec[n].iov_len = l;
1285 n++;
1286
1287 /* We need to determine the priority
1288 * of this entry for the rate limiting
1289 * logic */
1290 if (l == 10 &&
1291 memcmp(p, "PRIORITY=", 9) == 0 &&
1292 p[9] >= '0' && p[9] <= '9')
1293 priority = (priority & LOG_FACMASK) | (p[9] - '0');
1294
1295 else if (l == 17 &&
1296 memcmp(p, "SYSLOG_FACILITY=", 16) == 0 &&
1297 p[16] >= '0' && p[16] <= '9')
1298 priority = (priority & LOG_PRIMASK) | ((p[16] - '0') << 3);
1299
1300 else if (l == 18 &&
1301 memcmp(p, "SYSLOG_FACILITY=", 16) == 0 &&
1302 p[16] >= '0' && p[16] <= '9' &&
1303 p[17] >= '0' && p[17] <= '9')
1304 priority = (priority & LOG_PRIMASK) | (((p[16] - '0')*10 + (p[17] - '0')) << 3);
1305
1306 else if (l >= 19 &&
1307 memcmp(p, "SYSLOG_IDENTIFIER=", 18) == 0) {
1308 char *t;
1309
1310 t = strndup(p + 18, l - 18);
1311 if (t) {
1312 free(identifier);
1313 identifier = t;
1314 }
1315 } else if (l >= 8 &&
1316 memcmp(p, "MESSAGE=", 8) == 0) {
1317 char *t;
1318
1319 t = strndup(p + 8, l - 8);
1320 if (t) {
1321 free(message);
1322 message = t;
1323 }
1324 }
1325 }
1326
1327 remaining -= (e - p) + 1;
1328 p = e + 1;
1329 continue;
1330 } else {
1331 le64_t l_le;
1332 uint64_t l;
1333 char *k;
1334
1335 if (remaining < e - p + 1 + sizeof(uint64_t) + 1) {
1336 log_debug("Failed to parse message, ignoring.");
1337 break;
1338 }
1339
1340 memcpy(&l_le, e + 1, sizeof(uint64_t));
1341 l = le64toh(l_le);
1342
1343 if (remaining < e - p + 1 + sizeof(uint64_t) + l + 1 ||
1344 e[1+sizeof(uint64_t)+l] != '\n') {
1345 log_debug("Failed to parse message, ignoring.");
1346 break;
1347 }
1348
1349 k = malloc((e - p) + 1 + l);
1350 if (!k) {
1351 log_error("Out of memory");
1352 break;
1353 }
1354
1355 memcpy(k, p, e - p);
1356 k[e - p] = '=';
1357 memcpy(k + (e - p) + 1, e + 1 + sizeof(uint64_t), l);
1358
1359 if (valid_user_field(p, e - p)) {
1360 iovec[n].iov_base = k;
1361 iovec[n].iov_len = (e - p) + 1 + l;
1362 n++;
1363 } else
1364 free(k);
1365
1366 remaining -= (e - p) + 1 + sizeof(uint64_t) + l + 1;
1367 p = e + 1 + sizeof(uint64_t) + l + 1;
1368 }
1369 }
1370
1371 if (n <= 0)
1372 goto finish;
1373
1374 tn = n++;
1375 IOVEC_SET_STRING(iovec[tn], "_TRANSPORT=journal");
1376
1377 if (message) {
1378 if (s->forward_to_syslog)
1379 forward_syslog(s, priority, identifier, message, ucred, tv);
1380
1381 if (s->forward_to_kmsg)
1382 forward_kmsg(s, priority, identifier, message, ucred);
1383
1384 if (s->forward_to_console)
1385 forward_console(s, priority, identifier, message, ucred);
1386 }
1387
1388 dispatch_message(s, iovec, n, m, ucred, tv, label, label_len, NULL, priority);
1389
1390 finish:
1391 for (j = 0; j < n; j++) {
1392 if (j == tn)
1393 continue;
1394
1395 if (iovec[j].iov_base < buffer ||
1396 (const uint8_t*) iovec[j].iov_base >= (const uint8_t*) buffer + buffer_size)
1397 free(iovec[j].iov_base);
1398 }
1399
1400 free(iovec);
1401 free(identifier);
1402 free(message);
1403 }
1404
1405 static void process_native_file(
1406 Server *s,
1407 int fd,
1408 struct ucred *ucred,
1409 struct timeval *tv,
1410 const char *label, size_t label_len) {
1411
1412 struct stat st;
1413 void *p;
1414 ssize_t n;
1415
1416 assert(s);
1417 assert(fd >= 0);
1418
1419 /* Data is in the passed file, since it didn't fit in a
1420 * datagram. We can't map the file here, since clients might
1421 * then truncate it and trigger a SIGBUS for us. So let's
1422 * stupidly read it */
1423
1424 if (fstat(fd, &st) < 0) {
1425 log_error("Failed to stat passed file, ignoring: %m");
1426 return;
1427 }
1428
1429 if (!S_ISREG(st.st_mode)) {
1430 log_error("File passed is not regular. Ignoring.");
1431 return;
1432 }
1433
1434 if (st.st_size <= 0)
1435 return;
1436
1437 if (st.st_size > ENTRY_SIZE_MAX) {
1438 log_error("File passed too large. Ignoring.");
1439 return;
1440 }
1441
1442 p = malloc(st.st_size);
1443 if (!p) {
1444 log_error("Out of memory");
1445 return;
1446 }
1447
1448 n = pread(fd, p, st.st_size, 0);
1449 if (n < 0)
1450 log_error("Failed to read file, ignoring: %s", strerror(-n));
1451 else if (n > 0)
1452 process_native_message(s, p, n, ucred, tv, label, label_len);
1453
1454 free(p);
1455 }
1456
1457 static int stdout_stream_log(StdoutStream *s, const char *p) {
1458 struct iovec iovec[N_IOVEC_META_FIELDS + 5];
1459 char *message = NULL, *syslog_priority = NULL, *syslog_facility = NULL, *syslog_identifier = NULL;
1460 unsigned n = 0;
1461 int priority;
1462 char *label = NULL;
1463 size_t label_len = 0;
1464
1465 assert(s);
1466 assert(p);
1467
1468 if (isempty(p))
1469 return 0;
1470
1471 priority = s->priority;
1472
1473 if (s->level_prefix)
1474 parse_syslog_priority((char**) &p, &priority);
1475
1476 if (s->forward_to_syslog || s->server->forward_to_syslog)
1477 forward_syslog(s->server, fixup_priority(priority), s->identifier, p, &s->ucred, NULL);
1478
1479 if (s->forward_to_kmsg || s->server->forward_to_kmsg)
1480 forward_kmsg(s->server, priority, s->identifier, p, &s->ucred);
1481
1482 if (s->forward_to_console || s->server->forward_to_console)
1483 forward_console(s->server, priority, s->identifier, p, &s->ucred);
1484
1485 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=stdout");
1486
1487 if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1488 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1489
1490 if (priority & LOG_FACMASK)
1491 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1492 IOVEC_SET_STRING(iovec[n++], syslog_facility);
1493
1494 if (s->identifier) {
1495 syslog_identifier = strappend("SYSLOG_IDENTIFIER=", s->identifier);
1496 if (syslog_identifier)
1497 IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1498 }
1499
1500 message = strappend("MESSAGE=", p);
1501 if (message)
1502 IOVEC_SET_STRING(iovec[n++], message);
1503
1504 #ifdef HAVE_SELINUX
1505 if (s->security_context) {
1506 label = (char*) s->security_context;
1507 label_len = strlen((char*) s->security_context);
1508 }
1509 #endif
1510
1511 dispatch_message(s->server, iovec, n, ELEMENTSOF(iovec), &s->ucred, NULL, label, label_len, s->unit_id, priority);
1512
1513 free(message);
1514 free(syslog_priority);
1515 free(syslog_facility);
1516 free(syslog_identifier);
1517
1518 return 0;
1519 }
1520
1521 static int stdout_stream_line(StdoutStream *s, char *p) {
1522 int r;
1523
1524 assert(s);
1525 assert(p);
1526
1527 p = strstrip(p);
1528
1529 switch (s->state) {
1530
1531 case STDOUT_STREAM_IDENTIFIER:
1532 if (isempty(p))
1533 s->identifier = NULL;
1534 else {
1535 s->identifier = strdup(p);
1536 if (!s->identifier) {
1537 log_error("Out of memory");
1538 return -ENOMEM;
1539 }
1540 }
1541
1542 s->state = STDOUT_STREAM_UNIT_ID;
1543 return 0;
1544
1545 case STDOUT_STREAM_UNIT_ID:
1546 if (s->ucred.uid == 0) {
1547 if (isempty(p))
1548 s->unit_id = NULL;
1549 else {
1550 s->unit_id = strdup(p);
1551 if (!s->unit_id) {
1552 log_error("Out of memory");
1553 return -ENOMEM;
1554 }
1555 }
1556 }
1557
1558 s->state = STDOUT_STREAM_PRIORITY;
1559 return 0;
1560
1561 case STDOUT_STREAM_PRIORITY:
1562 r = safe_atoi(p, &s->priority);
1563 if (r < 0 || s->priority <= 0 || s->priority >= 999) {
1564 log_warning("Failed to parse log priority line.");
1565 return -EINVAL;
1566 }
1567
1568 s->state = STDOUT_STREAM_LEVEL_PREFIX;
1569 return 0;
1570
1571 case STDOUT_STREAM_LEVEL_PREFIX:
1572 r = parse_boolean(p);
1573 if (r < 0) {
1574 log_warning("Failed to parse level prefix line.");
1575 return -EINVAL;
1576 }
1577
1578 s->level_prefix = !!r;
1579 s->state = STDOUT_STREAM_FORWARD_TO_SYSLOG;
1580 return 0;
1581
1582 case STDOUT_STREAM_FORWARD_TO_SYSLOG:
1583 r = parse_boolean(p);
1584 if (r < 0) {
1585 log_warning("Failed to parse forward to syslog line.");
1586 return -EINVAL;
1587 }
1588
1589 s->forward_to_syslog = !!r;
1590 s->state = STDOUT_STREAM_FORWARD_TO_KMSG;
1591 return 0;
1592
1593 case STDOUT_STREAM_FORWARD_TO_KMSG:
1594 r = parse_boolean(p);
1595 if (r < 0) {
1596 log_warning("Failed to parse copy to kmsg line.");
1597 return -EINVAL;
1598 }
1599
1600 s->forward_to_kmsg = !!r;
1601 s->state = STDOUT_STREAM_FORWARD_TO_CONSOLE;
1602 return 0;
1603
1604 case STDOUT_STREAM_FORWARD_TO_CONSOLE:
1605 r = parse_boolean(p);
1606 if (r < 0) {
1607 log_warning("Failed to parse copy to console line.");
1608 return -EINVAL;
1609 }
1610
1611 s->forward_to_console = !!r;
1612 s->state = STDOUT_STREAM_RUNNING;
1613 return 0;
1614
1615 case STDOUT_STREAM_RUNNING:
1616 return stdout_stream_log(s, p);
1617 }
1618
1619 assert_not_reached("Unknown stream state");
1620 }
1621
1622 static int stdout_stream_scan(StdoutStream *s, bool force_flush) {
1623 char *p;
1624 size_t remaining;
1625 int r;
1626
1627 assert(s);
1628
1629 p = s->buffer;
1630 remaining = s->length;
1631 for (;;) {
1632 char *end;
1633 size_t skip;
1634
1635 end = memchr(p, '\n', remaining);
1636 if (end)
1637 skip = end - p + 1;
1638 else if (remaining >= sizeof(s->buffer) - 1) {
1639 end = p + sizeof(s->buffer) - 1;
1640 skip = remaining;
1641 } else
1642 break;
1643
1644 *end = 0;
1645
1646 r = stdout_stream_line(s, p);
1647 if (r < 0)
1648 return r;
1649
1650 remaining -= skip;
1651 p += skip;
1652 }
1653
1654 if (force_flush && remaining > 0) {
1655 p[remaining] = 0;
1656 r = stdout_stream_line(s, p);
1657 if (r < 0)
1658 return r;
1659
1660 p += remaining;
1661 remaining = 0;
1662 }
1663
1664 if (p > s->buffer) {
1665 memmove(s->buffer, p, remaining);
1666 s->length = remaining;
1667 }
1668
1669 return 0;
1670 }
1671
1672 static int stdout_stream_process(StdoutStream *s) {
1673 ssize_t l;
1674 int r;
1675
1676 assert(s);
1677
1678 l = read(s->fd, s->buffer+s->length, sizeof(s->buffer)-1-s->length);
1679 if (l < 0) {
1680
1681 if (errno == EAGAIN)
1682 return 0;
1683
1684 log_warning("Failed to read from stream: %m");
1685 return -errno;
1686 }
1687
1688 if (l == 0) {
1689 r = stdout_stream_scan(s, true);
1690 if (r < 0)
1691 return r;
1692
1693 return 0;
1694 }
1695
1696 s->length += l;
1697 r = stdout_stream_scan(s, false);
1698 if (r < 0)
1699 return r;
1700
1701 return 1;
1702
1703 }
1704
1705 static void stdout_stream_free(StdoutStream *s) {
1706 assert(s);
1707
1708 if (s->server) {
1709 assert(s->server->n_stdout_streams > 0);
1710 s->server->n_stdout_streams --;
1711 LIST_REMOVE(StdoutStream, stdout_stream, s->server->stdout_streams, s);
1712 }
1713
1714 if (s->fd >= 0) {
1715 if (s->server)
1716 epoll_ctl(s->server->epoll_fd, EPOLL_CTL_DEL, s->fd, NULL);
1717
1718 close_nointr_nofail(s->fd);
1719 }
1720
1721 #ifdef HAVE_SELINUX
1722 if (s->security_context)
1723 freecon(s->security_context);
1724 #endif
1725
1726 free(s->identifier);
1727 free(s);
1728 }
1729
1730 static int stdout_stream_new(Server *s) {
1731 StdoutStream *stream;
1732 int fd, r;
1733 socklen_t len;
1734 struct epoll_event ev;
1735
1736 assert(s);
1737
1738 fd = accept4(s->stdout_fd, NULL, NULL, SOCK_NONBLOCK|SOCK_CLOEXEC);
1739 if (fd < 0) {
1740 if (errno == EAGAIN)
1741 return 0;
1742
1743 log_error("Failed to accept stdout connection: %m");
1744 return -errno;
1745 }
1746
1747 if (s->n_stdout_streams >= STDOUT_STREAMS_MAX) {
1748 log_warning("Too many stdout streams, refusing connection.");
1749 close_nointr_nofail(fd);
1750 return 0;
1751 }
1752
1753 stream = new0(StdoutStream, 1);
1754 if (!stream) {
1755 log_error("Out of memory.");
1756 close_nointr_nofail(fd);
1757 return -ENOMEM;
1758 }
1759
1760 stream->fd = fd;
1761
1762 len = sizeof(stream->ucred);
1763 if (getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &stream->ucred, &len) < 0) {
1764 log_error("Failed to determine peer credentials: %m");
1765 r = -errno;
1766 goto fail;
1767 }
1768
1769 #ifdef HAVE_SELINUX
1770 if (getpeercon(fd, &stream->security_context) < 0 && errno != ENOPROTOOPT)
1771 log_error("Failed to determine peer security context: %m");
1772 #endif
1773
1774 if (shutdown(fd, SHUT_WR) < 0) {
1775 log_error("Failed to shutdown writing side of socket: %m");
1776 r = -errno;
1777 goto fail;
1778 }
1779
1780 zero(ev);
1781 ev.data.ptr = stream;
1782 ev.events = EPOLLIN;
1783 if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, fd, &ev) < 0) {
1784 log_error("Failed to add stream to event loop: %m");
1785 r = -errno;
1786 goto fail;
1787 }
1788
1789 stream->server = s;
1790 LIST_PREPEND(StdoutStream, stdout_stream, s->stdout_streams, stream);
1791 s->n_stdout_streams ++;
1792
1793 return 0;
1794
1795 fail:
1796 stdout_stream_free(stream);
1797 return r;
1798 }
1799
1800 static int parse_kernel_timestamp(char **_p, usec_t *t) {
1801 usec_t r;
1802 int k, i;
1803 char *p;
1804
1805 assert(_p);
1806 assert(*_p);
1807 assert(t);
1808
1809 p = *_p;
1810
1811 if (strlen(p) < 14 || p[0] != '[' || p[13] != ']' || p[6] != '.')
1812 return 0;
1813
1814 r = 0;
1815
1816 for (i = 1; i <= 5; i++) {
1817 r *= 10;
1818
1819 if (p[i] == ' ')
1820 continue;
1821
1822 k = undecchar(p[i]);
1823 if (k < 0)
1824 return 0;
1825
1826 r += k;
1827 }
1828
1829 for (i = 7; i <= 12; i++) {
1830 r *= 10;
1831
1832 k = undecchar(p[i]);
1833 if (k < 0)
1834 return 0;
1835
1836 r += k;
1837 }
1838
1839 *t = r;
1840 *_p += 14;
1841 *_p += strspn(*_p, WHITESPACE);
1842
1843 return 1;
1844 }
1845
1846 static bool is_us(const char *pid) {
1847 pid_t t;
1848
1849 assert(pid);
1850
1851 if (parse_pid(pid, &t) < 0)
1852 return false;
1853
1854 return t == getpid();
1855 }
1856
1857 static void proc_kmsg_line(Server *s, const char *p) {
1858 struct iovec iovec[N_IOVEC_META_FIELDS + 7];
1859 char *message = NULL, *syslog_priority = NULL, *syslog_pid = NULL, *syslog_facility = NULL, *syslog_identifier = NULL, *source_time = NULL;
1860 int priority = LOG_KERN | LOG_INFO;
1861 unsigned n = 0;
1862 usec_t usec;
1863 char *identifier = NULL, *pid = NULL;
1864
1865 assert(s);
1866 assert(p);
1867
1868 if (isempty(p))
1869 return;
1870
1871 parse_syslog_priority((char **) &p, &priority);
1872
1873 if (s->forward_to_kmsg && (priority & LOG_FACMASK) != LOG_KERN)
1874 return;
1875
1876 if (parse_kernel_timestamp((char **) &p, &usec) > 0) {
1877 if (asprintf(&source_time, "_SOURCE_MONOTONIC_TIMESTAMP=%llu",
1878 (unsigned long long) usec) >= 0)
1879 IOVEC_SET_STRING(iovec[n++], source_time);
1880 }
1881
1882 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=kernel");
1883
1884 if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
1885 IOVEC_SET_STRING(iovec[n++], syslog_priority);
1886
1887 if ((priority & LOG_FACMASK) == LOG_KERN) {
1888
1889 if (s->forward_to_syslog)
1890 forward_syslog(s, priority, "kernel", p, NULL, NULL);
1891
1892 IOVEC_SET_STRING(iovec[n++], "SYSLOG_IDENTIFIER=kernel");
1893 } else {
1894 read_identifier(&p, &identifier, &pid);
1895
1896 /* Avoid any messages we generated ourselves via
1897 * log_info() and friends. */
1898 if (pid && is_us(pid))
1899 goto finish;
1900
1901 if (s->forward_to_syslog)
1902 forward_syslog(s, priority, identifier, p, NULL, NULL);
1903
1904 if (identifier) {
1905 syslog_identifier = strappend("SYSLOG_IDENTIFIER=", identifier);
1906 if (syslog_identifier)
1907 IOVEC_SET_STRING(iovec[n++], syslog_identifier);
1908 }
1909
1910 if (pid) {
1911 syslog_pid = strappend("SYSLOG_PID=", pid);
1912 if (syslog_pid)
1913 IOVEC_SET_STRING(iovec[n++], syslog_pid);
1914 }
1915
1916 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
1917 IOVEC_SET_STRING(iovec[n++], syslog_facility);
1918 }
1919
1920 message = strappend("MESSAGE=", p);
1921 if (message)
1922 IOVEC_SET_STRING(iovec[n++], message);
1923
1924 dispatch_message(s, iovec, n, ELEMENTSOF(iovec), NULL, NULL, NULL, 0, NULL, priority);
1925
1926 finish:
1927 free(message);
1928 free(syslog_priority);
1929 free(syslog_identifier);
1930 free(syslog_pid);
1931 free(syslog_facility);
1932 free(source_time);
1933 free(identifier);
1934 free(pid);
1935 }
1936
1937 static void proc_kmsg_scan(Server *s) {
1938 char *p;
1939 size_t remaining;
1940
1941 assert(s);
1942
1943 p = s->proc_kmsg_buffer;
1944 remaining = s->proc_kmsg_length;
1945 for (;;) {
1946 char *end;
1947 size_t skip;
1948
1949 end = memchr(p, '\n', remaining);
1950 if (end)
1951 skip = end - p + 1;
1952 else if (remaining >= sizeof(s->proc_kmsg_buffer) - 1) {
1953 end = p + sizeof(s->proc_kmsg_buffer) - 1;
1954 skip = remaining;
1955 } else
1956 break;
1957
1958 *end = 0;
1959
1960 proc_kmsg_line(s, p);
1961
1962 remaining -= skip;
1963 p += skip;
1964 }
1965
1966 if (p > s->proc_kmsg_buffer) {
1967 memmove(s->proc_kmsg_buffer, p, remaining);
1968 s->proc_kmsg_length = remaining;
1969 }
1970 }
1971
1972 static int system_journal_open(Server *s) {
1973 int r;
1974 char *fn;
1975 sd_id128_t machine;
1976 char ids[33];
1977
1978 r = sd_id128_get_machine(&machine);
1979 if (r < 0)
1980 return r;
1981
1982 sd_id128_to_string(machine, ids);
1983
1984 if (!s->system_journal &&
1985 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
1986 access("/run/systemd/journal/flushed", F_OK) >= 0) {
1987
1988 /* If in auto mode: first try to create the machine
1989 * path, but not the prefix.
1990 *
1991 * If in persistent mode: create /var/log/journal and
1992 * the machine path */
1993
1994 if (s->storage == STORAGE_PERSISTENT)
1995 (void) mkdir("/var/log/journal/", 0755);
1996
1997 fn = strappend("/var/log/journal/", ids);
1998 if (!fn)
1999 return -ENOMEM;
2000
2001 (void) mkdir(fn, 0755);
2002 free(fn);
2003
2004 fn = strjoin("/var/log/journal/", ids, "/system.journal", NULL);
2005 if (!fn)
2006 return -ENOMEM;
2007
2008 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, &s->system_metrics, NULL, &s->system_journal);
2009 free(fn);
2010
2011 if (r >= 0) {
2012 s->system_journal->compress = s->compress;
2013
2014 server_fix_perms(s, s->system_journal, 0);
2015 } else if (r < 0) {
2016
2017 if (r != -ENOENT && r != -EROFS)
2018 log_warning("Failed to open system journal: %s", strerror(-r));
2019
2020 r = 0;
2021 }
2022 }
2023
2024 if (!s->runtime_journal &&
2025 (s->storage != STORAGE_NONE)) {
2026
2027 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
2028 if (!fn)
2029 return -ENOMEM;
2030
2031 if (s->system_journal) {
2032
2033 /* Try to open the runtime journal, but only
2034 * if it already exists, so that we can flush
2035 * it into the system journal */
2036
2037 r = journal_file_open(fn, O_RDWR, 0640, &s->runtime_metrics, NULL, &s->runtime_journal);
2038 free(fn);
2039
2040 if (r < 0) {
2041 if (r != -ENOENT)
2042 log_warning("Failed to open runtime journal: %s", strerror(-r));
2043
2044 r = 0;
2045 }
2046
2047 } else {
2048
2049 /* OK, we really need the runtime journal, so create
2050 * it if necessary. */
2051
2052 (void) mkdir_parents(fn, 0755);
2053 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, &s->runtime_metrics, NULL, &s->runtime_journal);
2054 free(fn);
2055
2056 if (r < 0) {
2057 log_error("Failed to open runtime journal: %s", strerror(-r));
2058 return r;
2059 }
2060 }
2061
2062 if (s->runtime_journal) {
2063 s->runtime_journal->compress = s->compress;
2064
2065 server_fix_perms(s, s->runtime_journal, 0);
2066 }
2067 }
2068
2069 return r;
2070 }
2071
2072 static int server_flush_to_var(Server *s) {
2073 Object *o = NULL;
2074 int r;
2075 sd_id128_t machine;
2076 sd_journal *j;
2077
2078 assert(s);
2079
2080 if (s->storage != STORAGE_AUTO &&
2081 s->storage != STORAGE_PERSISTENT)
2082 return 0;
2083
2084 if (!s->runtime_journal)
2085 return 0;
2086
2087 system_journal_open(s);
2088
2089 if (!s->system_journal)
2090 return 0;
2091
2092 log_info("Flushing to /var...");
2093
2094 r = sd_id128_get_machine(&machine);
2095 if (r < 0) {
2096 log_error("Failed to get machine id: %s", strerror(-r));
2097 return r;
2098 }
2099
2100 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
2101 if (r < 0) {
2102 log_error("Failed to read runtime journal: %s", strerror(-r));
2103 return r;
2104 }
2105
2106 SD_JOURNAL_FOREACH(j) {
2107 JournalFile *f;
2108
2109 f = j->current_file;
2110 assert(f && f->current_offset > 0);
2111
2112 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
2113 if (r < 0) {
2114 log_error("Can't read entry: %s", strerror(-r));
2115 goto finish;
2116 }
2117
2118 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
2119 if (r == -E2BIG) {
2120 log_info("Allocation limit reached.");
2121
2122 journal_file_post_change(s->system_journal);
2123 server_rotate(s);
2124 server_vacuum(s);
2125
2126 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
2127 }
2128
2129 if (r < 0) {
2130 log_error("Can't write entry: %s", strerror(-r));
2131 goto finish;
2132 }
2133 }
2134
2135 finish:
2136 journal_file_post_change(s->system_journal);
2137
2138 journal_file_close(s->runtime_journal);
2139 s->runtime_journal = NULL;
2140
2141 if (r >= 0) {
2142 char path[] = "/run/log/journal/xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
2143 sd_id128_to_string(machine, path + 17);
2144 rm_rf(path, false, true, false);
2145 }
2146
2147 return r;
2148 }
2149
2150 static int server_read_proc_kmsg(Server *s) {
2151 ssize_t l;
2152 assert(s);
2153 assert(s->proc_kmsg_fd >= 0);
2154
2155 l = read(s->proc_kmsg_fd, s->proc_kmsg_buffer + s->proc_kmsg_length, sizeof(s->proc_kmsg_buffer) - 1 - s->proc_kmsg_length);
2156 if (l == 0) /* the kernel is stupid and in some race
2157 * conditions returns 0 in the middle of the
2158 * stream. */
2159 return 0;
2160 if (l < 0) {
2161
2162 if (errno == EAGAIN || errno == EINTR)
2163 return 0;
2164
2165 log_error("Failed to read from kernel: %m");
2166 return -errno;
2167 }
2168
2169 s->proc_kmsg_length += l;
2170
2171 proc_kmsg_scan(s);
2172 return 1;
2173 }
2174
2175 static int server_flush_proc_kmsg(Server *s) {
2176 int r;
2177
2178 assert(s);
2179
2180 if (s->proc_kmsg_fd < 0)
2181 return 0;
2182
2183 log_info("Flushing /proc/kmsg...");
2184
2185 for (;;) {
2186 r = server_read_proc_kmsg(s);
2187 if (r < 0)
2188 return r;
2189
2190 if (r == 0)
2191 break;
2192 }
2193
2194 return 0;
2195 }
2196
2197 static int process_event(Server *s, struct epoll_event *ev) {
2198 assert(s);
2199 assert(ev);
2200
2201 if (ev->data.fd == s->signal_fd) {
2202 struct signalfd_siginfo sfsi;
2203 ssize_t n;
2204
2205 if (ev->events != EPOLLIN) {
2206 log_info("Got invalid event from epoll.");
2207 return -EIO;
2208 }
2209
2210 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
2211 if (n != sizeof(sfsi)) {
2212
2213 if (n >= 0)
2214 return -EIO;
2215
2216 if (errno == EINTR || errno == EAGAIN)
2217 return 1;
2218
2219 return -errno;
2220 }
2221
2222 if (sfsi.ssi_signo == SIGUSR1) {
2223 touch("/run/systemd/journal/flushed");
2224 server_flush_to_var(s);
2225 return 1;
2226 }
2227
2228 if (sfsi.ssi_signo == SIGUSR2) {
2229 server_rotate(s);
2230 server_vacuum(s);
2231 return 1;
2232 }
2233
2234 log_debug("Received SIG%s", signal_to_string(sfsi.ssi_signo));
2235 return 0;
2236
2237 } else if (ev->data.fd == s->proc_kmsg_fd) {
2238 int r;
2239
2240 if (ev->events != EPOLLIN) {
2241 log_info("Got invalid event from epoll.");
2242 return -EIO;
2243 }
2244
2245 r = server_read_proc_kmsg(s);
2246 if (r < 0)
2247 return r;
2248
2249 return 1;
2250
2251 } else if (ev->data.fd == s->native_fd ||
2252 ev->data.fd == s->syslog_fd) {
2253
2254 if (ev->events != EPOLLIN) {
2255 log_info("Got invalid event from epoll.");
2256 return -EIO;
2257 }
2258
2259 for (;;) {
2260 struct msghdr msghdr;
2261 struct iovec iovec;
2262 struct ucred *ucred = NULL;
2263 struct timeval *tv = NULL;
2264 struct cmsghdr *cmsg;
2265 char *label = NULL;
2266 size_t label_len = 0;
2267 union {
2268 struct cmsghdr cmsghdr;
2269
2270 /* We use NAME_MAX space for the
2271 * SELinux label here. The kernel
2272 * currently enforces no limit, but
2273 * according to suggestions from the
2274 * SELinux people this will change and
2275 * it will probably be identical to
2276 * NAME_MAX. For now we use that, but
2277 * this should be updated one day when
2278 * the final limit is known.*/
2279 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
2280 CMSG_SPACE(sizeof(struct timeval)) +
2281 CMSG_SPACE(sizeof(int)) + /* fd */
2282 CMSG_SPACE(NAME_MAX)]; /* selinux label */
2283 } control;
2284 ssize_t n;
2285 int v;
2286 int *fds = NULL;
2287 unsigned n_fds = 0;
2288
2289 if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
2290 log_error("SIOCINQ failed: %m");
2291 return -errno;
2292 }
2293
2294 if (s->buffer_size < (size_t) v) {
2295 void *b;
2296 size_t l;
2297
2298 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
2299 b = realloc(s->buffer, l+1);
2300
2301 if (!b) {
2302 log_error("Couldn't increase buffer.");
2303 return -ENOMEM;
2304 }
2305
2306 s->buffer_size = l;
2307 s->buffer = b;
2308 }
2309
2310 zero(iovec);
2311 iovec.iov_base = s->buffer;
2312 iovec.iov_len = s->buffer_size;
2313
2314 zero(control);
2315 zero(msghdr);
2316 msghdr.msg_iov = &iovec;
2317 msghdr.msg_iovlen = 1;
2318 msghdr.msg_control = &control;
2319 msghdr.msg_controllen = sizeof(control);
2320
2321 n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
2322 if (n < 0) {
2323
2324 if (errno == EINTR || errno == EAGAIN)
2325 return 1;
2326
2327 log_error("recvmsg() failed: %m");
2328 return -errno;
2329 }
2330
2331 for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
2332
2333 if (cmsg->cmsg_level == SOL_SOCKET &&
2334 cmsg->cmsg_type == SCM_CREDENTIALS &&
2335 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
2336 ucred = (struct ucred*) CMSG_DATA(cmsg);
2337 else if (cmsg->cmsg_level == SOL_SOCKET &&
2338 cmsg->cmsg_type == SCM_SECURITY) {
2339 label = (char*) CMSG_DATA(cmsg);
2340 label_len = cmsg->cmsg_len - CMSG_LEN(0);
2341 } else if (cmsg->cmsg_level == SOL_SOCKET &&
2342 cmsg->cmsg_type == SO_TIMESTAMP &&
2343 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
2344 tv = (struct timeval*) CMSG_DATA(cmsg);
2345 else if (cmsg->cmsg_level == SOL_SOCKET &&
2346 cmsg->cmsg_type == SCM_RIGHTS) {
2347 fds = (int*) CMSG_DATA(cmsg);
2348 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
2349 }
2350 }
2351
2352 if (ev->data.fd == s->syslog_fd) {
2353 char *e;
2354
2355 if (n > 0 && n_fds == 0) {
2356 e = memchr(s->buffer, '\n', n);
2357 if (e)
2358 *e = 0;
2359 else
2360 s->buffer[n] = 0;
2361
2362 process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
2363 } else if (n_fds > 0)
2364 log_warning("Got file descriptors via syslog socket. Ignoring.");
2365
2366 } else {
2367 if (n > 0 && n_fds == 0)
2368 process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
2369 else if (n == 0 && n_fds == 1)
2370 process_native_file(s, fds[0], ucred, tv, label, label_len);
2371 else if (n_fds > 0)
2372 log_warning("Got too many file descriptors via native socket. Ignoring.");
2373 }
2374
2375 close_many(fds, n_fds);
2376 }
2377
2378 return 1;
2379
2380 } else if (ev->data.fd == s->stdout_fd) {
2381
2382 if (ev->events != EPOLLIN) {
2383 log_info("Got invalid event from epoll.");
2384 return -EIO;
2385 }
2386
2387 stdout_stream_new(s);
2388 return 1;
2389
2390 } else {
2391 StdoutStream *stream;
2392
2393 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
2394 log_info("Got invalid event from epoll.");
2395 return -EIO;
2396 }
2397
2398 /* If it is none of the well-known fds, it must be an
2399 * stdout stream fd. Note that this is a bit ugly here
2400 * (since we rely that none of the well-known fds
2401 * could be interpreted as pointer), but nonetheless
2402 * safe, since the well-known fds would never get an
2403 * fd > 4096, i.e. beyond the first memory page */
2404
2405 stream = ev->data.ptr;
2406
2407 if (stdout_stream_process(stream) <= 0)
2408 stdout_stream_free(stream);
2409
2410 return 1;
2411 }
2412
2413 log_error("Unknown event.");
2414 return 0;
2415 }
2416
2417 static int open_syslog_socket(Server *s) {
2418 union sockaddr_union sa;
2419 int one, r;
2420 struct epoll_event ev;
2421
2422 assert(s);
2423
2424 if (s->syslog_fd < 0) {
2425
2426 s->syslog_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2427 if (s->syslog_fd < 0) {
2428 log_error("socket() failed: %m");
2429 return -errno;
2430 }
2431
2432 zero(sa);
2433 sa.un.sun_family = AF_UNIX;
2434 strncpy(sa.un.sun_path, "/dev/log", sizeof(sa.un.sun_path));
2435
2436 unlink(sa.un.sun_path);
2437
2438 r = bind(s->syslog_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2439 if (r < 0) {
2440 log_error("bind() failed: %m");
2441 return -errno;
2442 }
2443
2444 chmod(sa.un.sun_path, 0666);
2445 } else
2446 fd_nonblock(s->syslog_fd, 1);
2447
2448 one = 1;
2449 r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
2450 if (r < 0) {
2451 log_error("SO_PASSCRED failed: %m");
2452 return -errno;
2453 }
2454
2455 #ifdef HAVE_SELINUX
2456 one = 1;
2457 r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSSEC, &one, sizeof(one));
2458 if (r < 0)
2459 log_warning("SO_PASSSEC failed: %m");
2460 #endif
2461
2462 one = 1;
2463 r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_TIMESTAMP, &one, sizeof(one));
2464 if (r < 0) {
2465 log_error("SO_TIMESTAMP failed: %m");
2466 return -errno;
2467 }
2468
2469 zero(ev);
2470 ev.events = EPOLLIN;
2471 ev.data.fd = s->syslog_fd;
2472 if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->syslog_fd, &ev) < 0) {
2473 log_error("Failed to add syslog server fd to epoll object: %m");
2474 return -errno;
2475 }
2476
2477 return 0;
2478 }
2479
2480 static int open_native_socket(Server*s) {
2481 union sockaddr_union sa;
2482 int one, r;
2483 struct epoll_event ev;
2484
2485 assert(s);
2486
2487 if (s->native_fd < 0) {
2488
2489 s->native_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2490 if (s->native_fd < 0) {
2491 log_error("socket() failed: %m");
2492 return -errno;
2493 }
2494
2495 zero(sa);
2496 sa.un.sun_family = AF_UNIX;
2497 strncpy(sa.un.sun_path, "/run/systemd/journal/socket", sizeof(sa.un.sun_path));
2498
2499 unlink(sa.un.sun_path);
2500
2501 r = bind(s->native_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2502 if (r < 0) {
2503 log_error("bind() failed: %m");
2504 return -errno;
2505 }
2506
2507 chmod(sa.un.sun_path, 0666);
2508 } else
2509 fd_nonblock(s->native_fd, 1);
2510
2511 one = 1;
2512 r = setsockopt(s->native_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
2513 if (r < 0) {
2514 log_error("SO_PASSCRED failed: %m");
2515 return -errno;
2516 }
2517
2518 #ifdef HAVE_SELINUX
2519 one = 1;
2520 r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSSEC, &one, sizeof(one));
2521 if (r < 0)
2522 log_warning("SO_PASSSEC failed: %m");
2523 #endif
2524
2525 one = 1;
2526 r = setsockopt(s->native_fd, SOL_SOCKET, SO_TIMESTAMP, &one, sizeof(one));
2527 if (r < 0) {
2528 log_error("SO_TIMESTAMP failed: %m");
2529 return -errno;
2530 }
2531
2532 zero(ev);
2533 ev.events = EPOLLIN;
2534 ev.data.fd = s->native_fd;
2535 if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->native_fd, &ev) < 0) {
2536 log_error("Failed to add native server fd to epoll object: %m");
2537 return -errno;
2538 }
2539
2540 return 0;
2541 }
2542
2543 static int open_stdout_socket(Server *s) {
2544 union sockaddr_union sa;
2545 int r;
2546 struct epoll_event ev;
2547
2548 assert(s);
2549
2550 if (s->stdout_fd < 0) {
2551
2552 s->stdout_fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2553 if (s->stdout_fd < 0) {
2554 log_error("socket() failed: %m");
2555 return -errno;
2556 }
2557
2558 zero(sa);
2559 sa.un.sun_family = AF_UNIX;
2560 strncpy(sa.un.sun_path, "/run/systemd/journal/stdout", sizeof(sa.un.sun_path));
2561
2562 unlink(sa.un.sun_path);
2563
2564 r = bind(s->stdout_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
2565 if (r < 0) {
2566 log_error("bind() failed: %m");
2567 return -errno;
2568 }
2569
2570 chmod(sa.un.sun_path, 0666);
2571
2572 if (listen(s->stdout_fd, SOMAXCONN) < 0) {
2573 log_error("liste() failed: %m");
2574 return -errno;
2575 }
2576 } else
2577 fd_nonblock(s->stdout_fd, 1);
2578
2579 zero(ev);
2580 ev.events = EPOLLIN;
2581 ev.data.fd = s->stdout_fd;
2582 if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->stdout_fd, &ev) < 0) {
2583 log_error("Failed to add stdout server fd to epoll object: %m");
2584 return -errno;
2585 }
2586
2587 return 0;
2588 }
2589
2590 static int open_proc_kmsg(Server *s) {
2591 struct epoll_event ev;
2592
2593 assert(s);
2594
2595 if (!s->import_proc_kmsg)
2596 return 0;
2597
2598 s->proc_kmsg_fd = open("/proc/kmsg", O_RDONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
2599 if (s->proc_kmsg_fd < 0) {
2600 log_warning("Failed to open /proc/kmsg, ignoring: %m");
2601 return 0;
2602 }
2603
2604 zero(ev);
2605 ev.events = EPOLLIN;
2606 ev.data.fd = s->proc_kmsg_fd;
2607 if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->proc_kmsg_fd, &ev) < 0) {
2608 log_error("Failed to add /proc/kmsg fd to epoll object: %m");
2609 return -errno;
2610 }
2611
2612 return 0;
2613 }
2614
2615 static int open_signalfd(Server *s) {
2616 sigset_t mask;
2617 struct epoll_event ev;
2618
2619 assert(s);
2620
2621 assert_se(sigemptyset(&mask) == 0);
2622 sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
2623 assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
2624
2625 s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
2626 if (s->signal_fd < 0) {
2627 log_error("signalfd(): %m");
2628 return -errno;
2629 }
2630
2631 zero(ev);
2632 ev.events = EPOLLIN;
2633 ev.data.fd = s->signal_fd;
2634
2635 if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
2636 log_error("epoll_ctl(): %m");
2637 return -errno;
2638 }
2639
2640 return 0;
2641 }
2642
2643 static int server_parse_proc_cmdline(Server *s) {
2644 char *line, *w, *state;
2645 int r;
2646 size_t l;
2647
2648 if (detect_container(NULL) > 0)
2649 return 0;
2650
2651 r = read_one_line_file("/proc/cmdline", &line);
2652 if (r < 0) {
2653 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
2654 return 0;
2655 }
2656
2657 FOREACH_WORD_QUOTED(w, l, line, state) {
2658 char *word;
2659
2660 word = strndup(w, l);
2661 if (!word) {
2662 r = -ENOMEM;
2663 goto finish;
2664 }
2665
2666 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
2667 r = parse_boolean(word + 35);
2668 if (r < 0)
2669 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
2670 else
2671 s->forward_to_syslog = r;
2672 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
2673 r = parse_boolean(word + 33);
2674 if (r < 0)
2675 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
2676 else
2677 s->forward_to_kmsg = r;
2678 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
2679 r = parse_boolean(word + 36);
2680 if (r < 0)
2681 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
2682 else
2683 s->forward_to_console = r;
2684 } else if (startswith(word, "systemd.journald"))
2685 log_warning("Invalid systemd.journald parameter. Ignoring.");
2686
2687 free(word);
2688 }
2689
2690 r = 0;
2691
2692 finish:
2693 free(line);
2694 return r;
2695 }
2696
2697 static int server_parse_config_file(Server *s) {
2698 FILE *f;
2699 const char *fn;
2700 int r;
2701
2702 assert(s);
2703
2704 fn = "/etc/systemd/journald.conf";
2705 f = fopen(fn, "re");
2706 if (!f) {
2707 if (errno == ENOENT)
2708 return 0;
2709
2710 log_warning("Failed to open configuration file %s: %m", fn);
2711 return -errno;
2712 }
2713
2714 r = config_parse(fn, f, "Journal\0", config_item_perf_lookup, (void*) journald_gperf_lookup, false, s);
2715 if (r < 0)
2716 log_warning("Failed to parse configuration file: %s", strerror(-r));
2717
2718 fclose(f);
2719
2720 return r;
2721 }
2722
2723 static int server_init(Server *s) {
2724 int n, r, fd;
2725
2726 assert(s);
2727
2728 zero(*s);
2729 s->syslog_fd = s->native_fd = s->stdout_fd = s->signal_fd = s->epoll_fd = s->proc_kmsg_fd = -1;
2730 s->compress = true;
2731
2732 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
2733 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
2734
2735 s->forward_to_syslog = true;
2736
2737 s->max_level_store = LOG_DEBUG;
2738 s->max_level_syslog = LOG_DEBUG;
2739 s->max_level_kmsg = LOG_NOTICE;
2740 s->max_level_console = LOG_INFO;
2741
2742 memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
2743 memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
2744
2745 server_parse_config_file(s);
2746 server_parse_proc_cmdline(s);
2747
2748 s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
2749 if (!s->user_journals) {
2750 log_error("Out of memory.");
2751 return -ENOMEM;
2752 }
2753
2754 s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
2755 if (s->epoll_fd < 0) {
2756 log_error("Failed to create epoll object: %m");
2757 return -errno;
2758 }
2759
2760 n = sd_listen_fds(true);
2761 if (n < 0) {
2762 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
2763 return n;
2764 }
2765
2766 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
2767
2768 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
2769
2770 if (s->native_fd >= 0) {
2771 log_error("Too many native sockets passed.");
2772 return -EINVAL;
2773 }
2774
2775 s->native_fd = fd;
2776
2777 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
2778
2779 if (s->stdout_fd >= 0) {
2780 log_error("Too many stdout sockets passed.");
2781 return -EINVAL;
2782 }
2783
2784 s->stdout_fd = fd;
2785
2786 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
2787
2788 if (s->syslog_fd >= 0) {
2789 log_error("Too many /dev/log sockets passed.");
2790 return -EINVAL;
2791 }
2792
2793 s->syslog_fd = fd;
2794
2795 } else {
2796 log_error("Unknown socket passed.");
2797 return -EINVAL;
2798 }
2799 }
2800
2801 r = open_syslog_socket(s);
2802 if (r < 0)
2803 return r;
2804
2805 r = open_native_socket(s);
2806 if (r < 0)
2807 return r;
2808
2809 r = open_stdout_socket(s);
2810 if (r < 0)
2811 return r;
2812
2813 r = open_proc_kmsg(s);
2814 if (r < 0)
2815 return r;
2816
2817 r = open_signalfd(s);
2818 if (r < 0)
2819 return r;
2820
2821 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
2822 if (!s->rate_limit)
2823 return -ENOMEM;
2824
2825 r = system_journal_open(s);
2826 if (r < 0)
2827 return r;
2828
2829 return 0;
2830 }
2831
2832 static void server_done(Server *s) {
2833 JournalFile *f;
2834 assert(s);
2835
2836 while (s->stdout_streams)
2837 stdout_stream_free(s->stdout_streams);
2838
2839 if (s->system_journal)
2840 journal_file_close(s->system_journal);
2841
2842 if (s->runtime_journal)
2843 journal_file_close(s->runtime_journal);
2844
2845 while ((f = hashmap_steal_first(s->user_journals)))
2846 journal_file_close(f);
2847
2848 hashmap_free(s->user_journals);
2849
2850 if (s->epoll_fd >= 0)
2851 close_nointr_nofail(s->epoll_fd);
2852
2853 if (s->signal_fd >= 0)
2854 close_nointr_nofail(s->signal_fd);
2855
2856 if (s->syslog_fd >= 0)
2857 close_nointr_nofail(s->syslog_fd);
2858
2859 if (s->native_fd >= 0)
2860 close_nointr_nofail(s->native_fd);
2861
2862 if (s->stdout_fd >= 0)
2863 close_nointr_nofail(s->stdout_fd);
2864
2865 if (s->proc_kmsg_fd >= 0)
2866 close_nointr_nofail(s->proc_kmsg_fd);
2867
2868 if (s->rate_limit)
2869 journal_rate_limit_free(s->rate_limit);
2870
2871 free(s->buffer);
2872 free(s->tty_path);
2873 }
2874
2875 int main(int argc, char *argv[]) {
2876 Server server;
2877 int r;
2878
2879 /* if (getppid() != 1) { */
2880 /* log_error("This program should be invoked by init only."); */
2881 /* return EXIT_FAILURE; */
2882 /* } */
2883
2884 if (argc > 1) {
2885 log_error("This program does not take arguments.");
2886 return EXIT_FAILURE;
2887 }
2888
2889 log_set_target(LOG_TARGET_SAFE);
2890 log_set_facility(LOG_SYSLOG);
2891 log_parse_environment();
2892 log_open();
2893
2894 umask(0022);
2895
2896 r = server_init(&server);
2897 if (r < 0)
2898 goto finish;
2899
2900 server_vacuum(&server);
2901 server_flush_to_var(&server);
2902 server_flush_proc_kmsg(&server);
2903
2904 log_debug("systemd-journald running as pid %lu", (unsigned long) getpid());
2905 driver_message(&server, SD_MESSAGE_JOURNAL_START, "Journal started");
2906
2907 sd_notify(false,
2908 "READY=1\n"
2909 "STATUS=Processing requests...");
2910
2911 for (;;) {
2912 struct epoll_event event;
2913
2914 r = epoll_wait(server.epoll_fd, &event, 1, -1);
2915 if (r < 0) {
2916
2917 if (errno == EINTR)
2918 continue;
2919
2920 log_error("epoll_wait() failed: %m");
2921 r = -errno;
2922 goto finish;
2923 } else if (r == 0)
2924 break;
2925
2926 r = process_event(&server, &event);
2927 if (r < 0)
2928 goto finish;
2929 else if (r == 0)
2930 break;
2931 }
2932
2933 log_debug("systemd-journald stopped as pid %lu", (unsigned long) getpid());
2934 driver_message(&server, SD_MESSAGE_JOURNAL_STOP, "Journal stopped");
2935
2936 finish:
2937 sd_notify(false,
2938 "STATUS=Shutting down...");
2939
2940 server_done(&server);
2941
2942 return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
2943 }