]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/journal/journald.c
journald: split console transport stuff into its own file
[thirdparty/systemd.git] / src / journal / journald.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2011 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/epoll.h>
23 #include <sys/socket.h>
24 #include <errno.h>
25 #include <sys/signalfd.h>
26 #include <unistd.h>
27 #include <fcntl.h>
28 #include <stddef.h>
29 #include <sys/ioctl.h>
30 #include <linux/sockios.h>
31 #include <sys/statvfs.h>
32 #include <sys/mman.h>
33
34 #include <libudev.h>
35 #include <systemd/sd-journal.h>
36 #include <systemd/sd-messages.h>
37 #include <systemd/sd-daemon.h>
38
39 #ifdef HAVE_LOGIND
40 #include <systemd/sd-login.h>
41 #endif
42
43 #include "mkdir.h"
44 #include "hashmap.h"
45 #include "journal-file.h"
46 #include "socket-util.h"
47 #include "cgroup-util.h"
48 #include "list.h"
49 #include "virt.h"
50 #include "missing.h"
51 #include "conf-parser.h"
52 #include "journal-rate-limit.h"
53 #include "journal-internal.h"
54 #include "journal-vacuum.h"
55 #include "journal-authenticate.h"
56 #include "journald.h"
57 #include "journald-kmsg.h"
58 #include "journald-syslog.h"
59 #include "journald-stream.h"
60 #include "journald-console.h"
61
62 #ifdef HAVE_ACL
63 #include <sys/acl.h>
64 #include <acl/libacl.h>
65 #include "acl-util.h"
66 #endif
67
68 #ifdef HAVE_SELINUX
69 #include <selinux/selinux.h>
70 #endif
71
72 #define USER_JOURNALS_MAX 1024
73
74 #define DEFAULT_RATE_LIMIT_INTERVAL (10*USEC_PER_SEC)
75 #define DEFAULT_RATE_LIMIT_BURST 200
76
77 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
78
79 #define ENTRY_SIZE_MAX (1024*1024*32)
80
81 static const char* const storage_table[] = {
82 [STORAGE_AUTO] = "auto",
83 [STORAGE_VOLATILE] = "volatile",
84 [STORAGE_PERSISTENT] = "persistent",
85 [STORAGE_NONE] = "none"
86 };
87
88 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
89 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
90
91 static uint64_t available_space(Server *s) {
92 char ids[33], *p;
93 const char *f;
94 sd_id128_t machine;
95 struct statvfs ss;
96 uint64_t sum = 0, avail = 0, ss_avail = 0;
97 int r;
98 DIR *d;
99 usec_t ts;
100 JournalMetrics *m;
101
102 ts = now(CLOCK_MONOTONIC);
103
104 if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts)
105 return s->cached_available_space;
106
107 r = sd_id128_get_machine(&machine);
108 if (r < 0)
109 return 0;
110
111 if (s->system_journal) {
112 f = "/var/log/journal/";
113 m = &s->system_metrics;
114 } else {
115 f = "/run/log/journal/";
116 m = &s->runtime_metrics;
117 }
118
119 assert(m);
120
121 p = strappend(f, sd_id128_to_string(machine, ids));
122 if (!p)
123 return 0;
124
125 d = opendir(p);
126 free(p);
127
128 if (!d)
129 return 0;
130
131 if (fstatvfs(dirfd(d), &ss) < 0)
132 goto finish;
133
134 for (;;) {
135 struct stat st;
136 struct dirent buf, *de;
137
138 r = readdir_r(d, &buf, &de);
139 if (r != 0)
140 break;
141
142 if (!de)
143 break;
144
145 if (!endswith(de->d_name, ".journal") &&
146 !endswith(de->d_name, ".journal~"))
147 continue;
148
149 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
150 continue;
151
152 if (!S_ISREG(st.st_mode))
153 continue;
154
155 sum += (uint64_t) st.st_blocks * 512UL;
156 }
157
158 avail = sum >= m->max_use ? 0 : m->max_use - sum;
159
160 ss_avail = ss.f_bsize * ss.f_bavail;
161
162 ss_avail = ss_avail < m->keep_free ? 0 : ss_avail - m->keep_free;
163
164 if (ss_avail < avail)
165 avail = ss_avail;
166
167 s->cached_available_space = avail;
168 s->cached_available_space_timestamp = ts;
169
170 finish:
171 closedir(d);
172
173 return avail;
174 }
175
176 static void server_read_file_gid(Server *s) {
177 const char *adm = "adm";
178 int r;
179
180 assert(s);
181
182 if (s->file_gid_valid)
183 return;
184
185 r = get_group_creds(&adm, &s->file_gid);
186 if (r < 0)
187 log_warning("Failed to resolve 'adm' group: %s", strerror(-r));
188
189 /* if we couldn't read the gid, then it will be 0, but that's
190 * fine and we shouldn't try to resolve the group again, so
191 * let's just pretend it worked right-away. */
192 s->file_gid_valid = true;
193 }
194
195 static void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
196 int r;
197 #ifdef HAVE_ACL
198 acl_t acl;
199 acl_entry_t entry;
200 acl_permset_t permset;
201 #endif
202
203 assert(f);
204
205 server_read_file_gid(s);
206
207 r = fchmod_and_fchown(f->fd, 0640, 0, s->file_gid);
208 if (r < 0)
209 log_warning("Failed to fix access mode/rights on %s, ignoring: %s", f->path, strerror(-r));
210
211 #ifdef HAVE_ACL
212 if (uid <= 0)
213 return;
214
215 acl = acl_get_fd(f->fd);
216 if (!acl) {
217 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
218 return;
219 }
220
221 r = acl_find_uid(acl, uid, &entry);
222 if (r <= 0) {
223
224 if (acl_create_entry(&acl, &entry) < 0 ||
225 acl_set_tag_type(entry, ACL_USER) < 0 ||
226 acl_set_qualifier(entry, &uid) < 0) {
227 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
228 goto finish;
229 }
230 }
231
232 if (acl_get_permset(entry, &permset) < 0 ||
233 acl_add_perm(permset, ACL_READ) < 0 ||
234 acl_calc_mask(&acl) < 0) {
235 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
236 goto finish;
237 }
238
239 if (acl_set_fd(f->fd, acl) < 0)
240 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
241
242 finish:
243 acl_free(acl);
244 #endif
245 }
246
247 static JournalFile* find_journal(Server *s, uid_t uid) {
248 char *p;
249 int r;
250 JournalFile *f;
251 sd_id128_t machine;
252
253 assert(s);
254
255 /* We split up user logs only on /var, not on /run. If the
256 * runtime file is open, we write to it exclusively, in order
257 * to guarantee proper order as soon as we flush /run to
258 * /var and close the runtime file. */
259
260 if (s->runtime_journal)
261 return s->runtime_journal;
262
263 if (uid <= 0)
264 return s->system_journal;
265
266 r = sd_id128_get_machine(&machine);
267 if (r < 0)
268 return s->system_journal;
269
270 f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
271 if (f)
272 return f;
273
274 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-%lu.journal",
275 SD_ID128_FORMAT_VAL(machine), (unsigned long) uid) < 0)
276 return s->system_journal;
277
278 while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
279 /* Too many open? Then let's close one */
280 f = hashmap_steal_first(s->user_journals);
281 assert(f);
282 journal_file_close(f);
283 }
284
285 r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, s->system_journal, &f);
286 free(p);
287
288 if (r < 0)
289 return s->system_journal;
290
291 server_fix_perms(s, f, uid);
292
293 r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
294 if (r < 0) {
295 journal_file_close(f);
296 return s->system_journal;
297 }
298
299 return f;
300 }
301
302 static void server_rotate(Server *s) {
303 JournalFile *f;
304 void *k;
305 Iterator i;
306 int r;
307
308 log_info("Rotating...");
309
310 if (s->runtime_journal) {
311 r = journal_file_rotate(&s->runtime_journal, s->compress, false);
312 if (r < 0)
313 if (s->runtime_journal)
314 log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
315 else
316 log_error("Failed to create new runtime journal: %s", strerror(-r));
317 else
318 server_fix_perms(s, s->runtime_journal, 0);
319 }
320
321 if (s->system_journal) {
322 r = journal_file_rotate(&s->system_journal, s->compress, s->seal);
323 if (r < 0)
324 if (s->system_journal)
325 log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
326 else
327 log_error("Failed to create new system journal: %s", strerror(-r));
328
329 else
330 server_fix_perms(s, s->system_journal, 0);
331 }
332
333 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
334 r = journal_file_rotate(&f, s->compress, s->seal);
335 if (r < 0)
336 if (f->path)
337 log_error("Failed to rotate %s: %s", f->path, strerror(-r));
338 else
339 log_error("Failed to create user journal: %s", strerror(-r));
340 else {
341 hashmap_replace(s->user_journals, k, f);
342 server_fix_perms(s, s->system_journal, PTR_TO_UINT32(k));
343 }
344 }
345 }
346
347 static void server_vacuum(Server *s) {
348 char *p;
349 char ids[33];
350 sd_id128_t machine;
351 int r;
352
353 log_info("Vacuuming...");
354
355 r = sd_id128_get_machine(&machine);
356 if (r < 0) {
357 log_error("Failed to get machine ID: %s", strerror(-r));
358 return;
359 }
360
361 sd_id128_to_string(machine, ids);
362
363 if (s->system_journal) {
364 if (asprintf(&p, "/var/log/journal/%s", ids) < 0) {
365 log_oom();
366 return;
367 }
368
369 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free);
370 if (r < 0 && r != -ENOENT)
371 log_error("Failed to vacuum %s: %s", p, strerror(-r));
372 free(p);
373 }
374
375 if (s->runtime_journal) {
376 if (asprintf(&p, "/run/log/journal/%s", ids) < 0) {
377 log_oom();
378 return;
379 }
380
381 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free);
382 if (r < 0 && r != -ENOENT)
383 log_error("Failed to vacuum %s: %s", p, strerror(-r));
384 free(p);
385 }
386
387 s->cached_available_space_timestamp = 0;
388 }
389
390 static char *shortened_cgroup_path(pid_t pid) {
391 int r;
392 char *process_path, *init_path, *path;
393
394 assert(pid > 0);
395
396 r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, pid, &process_path);
397 if (r < 0)
398 return NULL;
399
400 r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 1, &init_path);
401 if (r < 0) {
402 free(process_path);
403 return NULL;
404 }
405
406 if (endswith(init_path, "/system"))
407 init_path[strlen(init_path) - 7] = 0;
408 else if (streq(init_path, "/"))
409 init_path[0] = 0;
410
411 if (startswith(process_path, init_path)) {
412 char *p;
413
414 p = strdup(process_path + strlen(init_path));
415 if (!p) {
416 free(process_path);
417 free(init_path);
418 return NULL;
419 }
420 path = p;
421 } else {
422 path = process_path;
423 process_path = NULL;
424 }
425
426 free(process_path);
427 free(init_path);
428
429 return path;
430 }
431
432 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n) {
433 JournalFile *f;
434 bool vacuumed = false;
435 int r;
436
437 assert(s);
438 assert(iovec);
439 assert(n > 0);
440
441 f = find_journal(s, uid);
442 if (!f)
443 return;
444
445 if (journal_file_rotate_suggested(f)) {
446 log_info("Journal header limits reached or header out-of-date, rotating.");
447 server_rotate(s);
448 server_vacuum(s);
449 vacuumed = true;
450
451 f = find_journal(s, uid);
452 if (!f)
453 return;
454 }
455
456 for (;;) {
457 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
458 if (r >= 0)
459 return;
460
461 if (vacuumed ||
462 (r != -E2BIG && /* hit limit */
463 r != -EFBIG && /* hit fs limit */
464 r != -EDQUOT && /* quota hit */
465 r != -ENOSPC && /* disk full */
466 r != -EBADMSG && /* corrupted */
467 r != -ENODATA && /* truncated */
468 r != -EHOSTDOWN && /* other machine */
469 r != -EPROTONOSUPPORT && /* unsupported feature */
470 r != -EBUSY && /* unclean shutdown */
471 r != -ESHUTDOWN /* already archived */)) {
472 log_error("Failed to write entry, ignoring: %s", strerror(-r));
473 return;
474 }
475
476 if (r == -E2BIG || r == -EFBIG || r == EDQUOT || r == ENOSPC)
477 log_info("Allocation limit reached, rotating.");
478 else if (r == -EHOSTDOWN)
479 log_info("Journal file from other machine, rotating.");
480 else if (r == -EBUSY)
481 log_info("Unlcean shutdown, rotating.");
482 else
483 log_warning("Journal file corrupted, rotating.");
484
485 server_rotate(s);
486 server_vacuum(s);
487 vacuumed = true;
488
489 f = find_journal(s, uid);
490 if (!f)
491 return;
492
493 log_info("Retrying write.");
494 }
495 }
496
497 static void dispatch_message_real(
498 Server *s,
499 struct iovec *iovec, unsigned n, unsigned m,
500 struct ucred *ucred,
501 struct timeval *tv,
502 const char *label, size_t label_len,
503 const char *unit_id) {
504
505 char *pid = NULL, *uid = NULL, *gid = NULL,
506 *source_time = NULL, *boot_id = NULL, *machine_id = NULL,
507 *comm = NULL, *cmdline = NULL, *hostname = NULL,
508 *audit_session = NULL, *audit_loginuid = NULL,
509 *exe = NULL, *cgroup = NULL, *session = NULL,
510 *owner_uid = NULL, *unit = NULL, *selinux_context = NULL;
511
512 char idbuf[33];
513 sd_id128_t id;
514 int r;
515 char *t;
516 uid_t loginuid = 0, realuid = 0;
517
518 assert(s);
519 assert(iovec);
520 assert(n > 0);
521 assert(n + N_IOVEC_META_FIELDS <= m);
522
523 if (ucred) {
524 uint32_t audit;
525 #ifdef HAVE_LOGIND
526 uid_t owner;
527 #endif
528
529 realuid = ucred->uid;
530
531 if (asprintf(&pid, "_PID=%lu", (unsigned long) ucred->pid) >= 0)
532 IOVEC_SET_STRING(iovec[n++], pid);
533
534 if (asprintf(&uid, "_UID=%lu", (unsigned long) ucred->uid) >= 0)
535 IOVEC_SET_STRING(iovec[n++], uid);
536
537 if (asprintf(&gid, "_GID=%lu", (unsigned long) ucred->gid) >= 0)
538 IOVEC_SET_STRING(iovec[n++], gid);
539
540 r = get_process_comm(ucred->pid, &t);
541 if (r >= 0) {
542 comm = strappend("_COMM=", t);
543 free(t);
544
545 if (comm)
546 IOVEC_SET_STRING(iovec[n++], comm);
547 }
548
549 r = get_process_exe(ucred->pid, &t);
550 if (r >= 0) {
551 exe = strappend("_EXE=", t);
552 free(t);
553
554 if (exe)
555 IOVEC_SET_STRING(iovec[n++], exe);
556 }
557
558 r = get_process_cmdline(ucred->pid, LINE_MAX, false, &t);
559 if (r >= 0) {
560 cmdline = strappend("_CMDLINE=", t);
561 free(t);
562
563 if (cmdline)
564 IOVEC_SET_STRING(iovec[n++], cmdline);
565 }
566
567 r = audit_session_from_pid(ucred->pid, &audit);
568 if (r >= 0)
569 if (asprintf(&audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit) >= 0)
570 IOVEC_SET_STRING(iovec[n++], audit_session);
571
572 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
573 if (r >= 0)
574 if (asprintf(&audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid) >= 0)
575 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
576
577 t = shortened_cgroup_path(ucred->pid);
578 if (t) {
579 cgroup = strappend("_SYSTEMD_CGROUP=", t);
580 free(t);
581
582 if (cgroup)
583 IOVEC_SET_STRING(iovec[n++], cgroup);
584 }
585
586 #ifdef HAVE_LOGIND
587 if (sd_pid_get_session(ucred->pid, &t) >= 0) {
588 session = strappend("_SYSTEMD_SESSION=", t);
589 free(t);
590
591 if (session)
592 IOVEC_SET_STRING(iovec[n++], session);
593 }
594
595 if (sd_pid_get_owner_uid(ucred->uid, &owner) >= 0)
596 if (asprintf(&owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner) >= 0)
597 IOVEC_SET_STRING(iovec[n++], owner_uid);
598 #endif
599
600 if (cg_pid_get_unit(ucred->pid, &t) >= 0) {
601 unit = strappend("_SYSTEMD_UNIT=", t);
602 free(t);
603 } else if (unit_id)
604 unit = strappend("_SYSTEMD_UNIT=", unit_id);
605
606 if (unit)
607 IOVEC_SET_STRING(iovec[n++], unit);
608
609 #ifdef HAVE_SELINUX
610 if (label) {
611 selinux_context = malloc(sizeof("_SELINUX_CONTEXT=") + label_len);
612 if (selinux_context) {
613 memcpy(selinux_context, "_SELINUX_CONTEXT=", sizeof("_SELINUX_CONTEXT=")-1);
614 memcpy(selinux_context+sizeof("_SELINUX_CONTEXT=")-1, label, label_len);
615 selinux_context[sizeof("_SELINUX_CONTEXT=")-1+label_len] = 0;
616 IOVEC_SET_STRING(iovec[n++], selinux_context);
617 }
618 } else {
619 security_context_t con;
620
621 if (getpidcon(ucred->pid, &con) >= 0) {
622 selinux_context = strappend("_SELINUX_CONTEXT=", con);
623 if (selinux_context)
624 IOVEC_SET_STRING(iovec[n++], selinux_context);
625
626 freecon(con);
627 }
628 }
629 #endif
630 }
631
632 if (tv) {
633 if (asprintf(&source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu",
634 (unsigned long long) timeval_load(tv)) >= 0)
635 IOVEC_SET_STRING(iovec[n++], source_time);
636 }
637
638 /* Note that strictly speaking storing the boot id here is
639 * redundant since the entry includes this in-line
640 * anyway. However, we need this indexed, too. */
641 r = sd_id128_get_boot(&id);
642 if (r >= 0)
643 if (asprintf(&boot_id, "_BOOT_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
644 IOVEC_SET_STRING(iovec[n++], boot_id);
645
646 r = sd_id128_get_machine(&id);
647 if (r >= 0)
648 if (asprintf(&machine_id, "_MACHINE_ID=%s", sd_id128_to_string(id, idbuf)) >= 0)
649 IOVEC_SET_STRING(iovec[n++], machine_id);
650
651 t = gethostname_malloc();
652 if (t) {
653 hostname = strappend("_HOSTNAME=", t);
654 free(t);
655 if (hostname)
656 IOVEC_SET_STRING(iovec[n++], hostname);
657 }
658
659 assert(n <= m);
660
661 write_to_journal(s, realuid == 0 ? 0 : loginuid, iovec, n);
662
663 free(pid);
664 free(uid);
665 free(gid);
666 free(comm);
667 free(exe);
668 free(cmdline);
669 free(source_time);
670 free(boot_id);
671 free(machine_id);
672 free(hostname);
673 free(audit_session);
674 free(audit_loginuid);
675 free(cgroup);
676 free(session);
677 free(owner_uid);
678 free(unit);
679 free(selinux_context);
680 }
681
682 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
683 char mid[11 + 32 + 1];
684 char buffer[16 + LINE_MAX + 1];
685 struct iovec iovec[N_IOVEC_META_FIELDS + 4];
686 int n = 0;
687 va_list ap;
688 struct ucred ucred;
689
690 assert(s);
691 assert(format);
692
693 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
694 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
695
696 memcpy(buffer, "MESSAGE=", 8);
697 va_start(ap, format);
698 vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
699 va_end(ap);
700 char_array_0(buffer);
701 IOVEC_SET_STRING(iovec[n++], buffer);
702
703 snprintf(mid, sizeof(mid), "MESSAGE_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(message_id));
704 char_array_0(mid);
705 IOVEC_SET_STRING(iovec[n++], mid);
706
707 zero(ucred);
708 ucred.pid = getpid();
709 ucred.uid = getuid();
710 ucred.gid = getgid();
711
712 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL);
713 }
714
715 void server_dispatch_message(
716 Server *s,
717 struct iovec *iovec, unsigned n, unsigned m,
718 struct ucred *ucred,
719 struct timeval *tv,
720 const char *label, size_t label_len,
721 const char *unit_id,
722 int priority) {
723
724 int rl;
725 char *path = NULL, *c;
726
727 assert(s);
728 assert(iovec || n == 0);
729
730 if (n == 0)
731 return;
732
733 if (LOG_PRI(priority) > s->max_level_store)
734 return;
735
736 if (!ucred)
737 goto finish;
738
739 path = shortened_cgroup_path(ucred->pid);
740 if (!path)
741 goto finish;
742
743 /* example: /user/lennart/3/foobar
744 * /system/dbus.service/foobar
745 *
746 * So let's cut of everything past the third /, since that is
747 * wher user directories start */
748
749 c = strchr(path, '/');
750 if (c) {
751 c = strchr(c+1, '/');
752 if (c) {
753 c = strchr(c+1, '/');
754 if (c)
755 *c = 0;
756 }
757 }
758
759 rl = journal_rate_limit_test(s->rate_limit, path, priority & LOG_PRIMASK, available_space(s));
760
761 if (rl == 0) {
762 free(path);
763 return;
764 }
765
766 /* Write a suppression message if we suppressed something */
767 if (rl > 1)
768 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED, "Suppressed %u messages from %s", rl - 1, path);
769
770 free(path);
771
772 finish:
773 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id);
774 }
775
776 static bool valid_user_field(const char *p, size_t l) {
777 const char *a;
778
779 /* We kinda enforce POSIX syntax recommendations for
780 environment variables here, but make a couple of additional
781 requirements.
782
783 http://pubs.opengroup.org/onlinepubs/000095399/basedefs/xbd_chap08.html */
784
785 /* No empty field names */
786 if (l <= 0)
787 return false;
788
789 /* Don't allow names longer than 64 chars */
790 if (l > 64)
791 return false;
792
793 /* Variables starting with an underscore are protected */
794 if (p[0] == '_')
795 return false;
796
797 /* Don't allow digits as first character */
798 if (p[0] >= '0' && p[0] <= '9')
799 return false;
800
801 /* Only allow A-Z0-9 and '_' */
802 for (a = p; a < p + l; a++)
803 if (!((*a >= 'A' && *a <= 'Z') ||
804 (*a >= '0' && *a <= '9') ||
805 *a == '_'))
806 return false;
807
808 return true;
809 }
810
811 static void process_native_message(
812 Server *s,
813 const void *buffer, size_t buffer_size,
814 struct ucred *ucred,
815 struct timeval *tv,
816 const char *label, size_t label_len) {
817
818 struct iovec *iovec = NULL;
819 unsigned n = 0, m = 0, j, tn = (unsigned) -1;
820 const char *p;
821 size_t remaining;
822 int priority = LOG_INFO;
823 char *identifier = NULL, *message = NULL;
824
825 assert(s);
826 assert(buffer || buffer_size == 0);
827
828 p = buffer;
829 remaining = buffer_size;
830
831 while (remaining > 0) {
832 const char *e, *q;
833
834 e = memchr(p, '\n', remaining);
835
836 if (!e) {
837 /* Trailing noise, let's ignore it, and flush what we collected */
838 log_debug("Received message with trailing noise, ignoring.");
839 break;
840 }
841
842 if (e == p) {
843 /* Entry separator */
844 server_dispatch_message(s, iovec, n, m, ucred, tv, label, label_len, NULL, priority);
845 n = 0;
846 priority = LOG_INFO;
847
848 p++;
849 remaining--;
850 continue;
851 }
852
853 if (*p == '.' || *p == '#') {
854 /* Ignore control commands for now, and
855 * comments too. */
856 remaining -= (e - p) + 1;
857 p = e + 1;
858 continue;
859 }
860
861 /* A property follows */
862
863 if (n+N_IOVEC_META_FIELDS >= m) {
864 struct iovec *c;
865 unsigned u;
866
867 u = MAX((n+N_IOVEC_META_FIELDS+1) * 2U, 4U);
868 c = realloc(iovec, u * sizeof(struct iovec));
869 if (!c) {
870 log_oom();
871 break;
872 }
873
874 iovec = c;
875 m = u;
876 }
877
878 q = memchr(p, '=', e - p);
879 if (q) {
880 if (valid_user_field(p, q - p)) {
881 size_t l;
882
883 l = e - p;
884
885 /* If the field name starts with an
886 * underscore, skip the variable,
887 * since that indidates a trusted
888 * field */
889 iovec[n].iov_base = (char*) p;
890 iovec[n].iov_len = l;
891 n++;
892
893 /* We need to determine the priority
894 * of this entry for the rate limiting
895 * logic */
896 if (l == 10 &&
897 memcmp(p, "PRIORITY=", 9) == 0 &&
898 p[9] >= '0' && p[9] <= '9')
899 priority = (priority & LOG_FACMASK) | (p[9] - '0');
900
901 else if (l == 17 &&
902 memcmp(p, "SYSLOG_FACILITY=", 16) == 0 &&
903 p[16] >= '0' && p[16] <= '9')
904 priority = (priority & LOG_PRIMASK) | ((p[16] - '0') << 3);
905
906 else if (l == 18 &&
907 memcmp(p, "SYSLOG_FACILITY=", 16) == 0 &&
908 p[16] >= '0' && p[16] <= '9' &&
909 p[17] >= '0' && p[17] <= '9')
910 priority = (priority & LOG_PRIMASK) | (((p[16] - '0')*10 + (p[17] - '0')) << 3);
911
912 else if (l >= 19 &&
913 memcmp(p, "SYSLOG_IDENTIFIER=", 18) == 0) {
914 char *t;
915
916 t = strndup(p + 18, l - 18);
917 if (t) {
918 free(identifier);
919 identifier = t;
920 }
921 } else if (l >= 8 &&
922 memcmp(p, "MESSAGE=", 8) == 0) {
923 char *t;
924
925 t = strndup(p + 8, l - 8);
926 if (t) {
927 free(message);
928 message = t;
929 }
930 }
931 }
932
933 remaining -= (e - p) + 1;
934 p = e + 1;
935 continue;
936 } else {
937 le64_t l_le;
938 uint64_t l;
939 char *k;
940
941 if (remaining < e - p + 1 + sizeof(uint64_t) + 1) {
942 log_debug("Failed to parse message, ignoring.");
943 break;
944 }
945
946 memcpy(&l_le, e + 1, sizeof(uint64_t));
947 l = le64toh(l_le);
948
949 if (remaining < e - p + 1 + sizeof(uint64_t) + l + 1 ||
950 e[1+sizeof(uint64_t)+l] != '\n') {
951 log_debug("Failed to parse message, ignoring.");
952 break;
953 }
954
955 k = malloc((e - p) + 1 + l);
956 if (!k) {
957 log_oom();
958 break;
959 }
960
961 memcpy(k, p, e - p);
962 k[e - p] = '=';
963 memcpy(k + (e - p) + 1, e + 1 + sizeof(uint64_t), l);
964
965 if (valid_user_field(p, e - p)) {
966 iovec[n].iov_base = k;
967 iovec[n].iov_len = (e - p) + 1 + l;
968 n++;
969 } else
970 free(k);
971
972 remaining -= (e - p) + 1 + sizeof(uint64_t) + l + 1;
973 p = e + 1 + sizeof(uint64_t) + l + 1;
974 }
975 }
976
977 if (n <= 0)
978 goto finish;
979
980 tn = n++;
981 IOVEC_SET_STRING(iovec[tn], "_TRANSPORT=journal");
982
983 if (message) {
984 if (s->forward_to_syslog)
985 server_forward_syslog(s, priority, identifier, message, ucred, tv);
986
987 if (s->forward_to_kmsg)
988 server_forward_kmsg(s, priority, identifier, message, ucred);
989
990 if (s->forward_to_console)
991 server_forward_console(s, priority, identifier, message, ucred);
992 }
993
994 server_dispatch_message(s, iovec, n, m, ucred, tv, label, label_len, NULL, priority);
995
996 finish:
997 for (j = 0; j < n; j++) {
998 if (j == tn)
999 continue;
1000
1001 if (iovec[j].iov_base < buffer ||
1002 (const uint8_t*) iovec[j].iov_base >= (const uint8_t*) buffer + buffer_size)
1003 free(iovec[j].iov_base);
1004 }
1005
1006 free(iovec);
1007 free(identifier);
1008 free(message);
1009 }
1010
1011 static void process_native_file(
1012 Server *s,
1013 int fd,
1014 struct ucred *ucred,
1015 struct timeval *tv,
1016 const char *label, size_t label_len) {
1017
1018 struct stat st;
1019 void *p;
1020 ssize_t n;
1021
1022 assert(s);
1023 assert(fd >= 0);
1024
1025 /* Data is in the passed file, since it didn't fit in a
1026 * datagram. We can't map the file here, since clients might
1027 * then truncate it and trigger a SIGBUS for us. So let's
1028 * stupidly read it */
1029
1030 if (fstat(fd, &st) < 0) {
1031 log_error("Failed to stat passed file, ignoring: %m");
1032 return;
1033 }
1034
1035 if (!S_ISREG(st.st_mode)) {
1036 log_error("File passed is not regular. Ignoring.");
1037 return;
1038 }
1039
1040 if (st.st_size <= 0)
1041 return;
1042
1043 if (st.st_size > ENTRY_SIZE_MAX) {
1044 log_error("File passed too large. Ignoring.");
1045 return;
1046 }
1047
1048 p = malloc(st.st_size);
1049 if (!p) {
1050 log_oom();
1051 return;
1052 }
1053
1054 n = pread(fd, p, st.st_size, 0);
1055 if (n < 0)
1056 log_error("Failed to read file, ignoring: %s", strerror(-n));
1057 else if (n > 0)
1058 process_native_message(s, p, n, ucred, tv, label, label_len);
1059
1060 free(p);
1061 }
1062
1063 static int system_journal_open(Server *s) {
1064 int r;
1065 char *fn;
1066 sd_id128_t machine;
1067 char ids[33];
1068
1069 r = sd_id128_get_machine(&machine);
1070 if (r < 0)
1071 return r;
1072
1073 sd_id128_to_string(machine, ids);
1074
1075 if (!s->system_journal &&
1076 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
1077 access("/run/systemd/journal/flushed", F_OK) >= 0) {
1078
1079 /* If in auto mode: first try to create the machine
1080 * path, but not the prefix.
1081 *
1082 * If in persistent mode: create /var/log/journal and
1083 * the machine path */
1084
1085 if (s->storage == STORAGE_PERSISTENT)
1086 (void) mkdir("/var/log/journal/", 0755);
1087
1088 fn = strappend("/var/log/journal/", ids);
1089 if (!fn)
1090 return -ENOMEM;
1091
1092 (void) mkdir(fn, 0755);
1093 free(fn);
1094
1095 fn = strjoin("/var/log/journal/", ids, "/system.journal", NULL);
1096 if (!fn)
1097 return -ENOMEM;
1098
1099 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
1100 free(fn);
1101
1102 if (r >= 0)
1103 server_fix_perms(s, s->system_journal, 0);
1104 else if (r < 0) {
1105
1106 if (r != -ENOENT && r != -EROFS)
1107 log_warning("Failed to open system journal: %s", strerror(-r));
1108
1109 r = 0;
1110 }
1111 }
1112
1113 if (!s->runtime_journal &&
1114 (s->storage != STORAGE_NONE)) {
1115
1116 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
1117 if (!fn)
1118 return -ENOMEM;
1119
1120 if (s->system_journal) {
1121
1122 /* Try to open the runtime journal, but only
1123 * if it already exists, so that we can flush
1124 * it into the system journal */
1125
1126 r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
1127 free(fn);
1128
1129 if (r < 0) {
1130 if (r != -ENOENT)
1131 log_warning("Failed to open runtime journal: %s", strerror(-r));
1132
1133 r = 0;
1134 }
1135
1136 } else {
1137
1138 /* OK, we really need the runtime journal, so create
1139 * it if necessary. */
1140
1141 (void) mkdir_parents(fn, 0755);
1142 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
1143 free(fn);
1144
1145 if (r < 0) {
1146 log_error("Failed to open runtime journal: %s", strerror(-r));
1147 return r;
1148 }
1149 }
1150
1151 if (s->runtime_journal)
1152 server_fix_perms(s, s->runtime_journal, 0);
1153 }
1154
1155 return r;
1156 }
1157
1158 static int server_flush_to_var(Server *s) {
1159 Object *o = NULL;
1160 int r;
1161 sd_id128_t machine;
1162 sd_journal *j;
1163
1164 assert(s);
1165
1166 if (s->storage != STORAGE_AUTO &&
1167 s->storage != STORAGE_PERSISTENT)
1168 return 0;
1169
1170 if (!s->runtime_journal)
1171 return 0;
1172
1173 system_journal_open(s);
1174
1175 if (!s->system_journal)
1176 return 0;
1177
1178 log_info("Flushing to /var...");
1179
1180 r = sd_id128_get_machine(&machine);
1181 if (r < 0) {
1182 log_error("Failed to get machine id: %s", strerror(-r));
1183 return r;
1184 }
1185
1186 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
1187 if (r < 0) {
1188 log_error("Failed to read runtime journal: %s", strerror(-r));
1189 return r;
1190 }
1191
1192 SD_JOURNAL_FOREACH(j) {
1193 JournalFile *f;
1194
1195 f = j->current_file;
1196 assert(f && f->current_offset > 0);
1197
1198 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1199 if (r < 0) {
1200 log_error("Can't read entry: %s", strerror(-r));
1201 goto finish;
1202 }
1203
1204 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1205 if (r == -E2BIG) {
1206 log_info("Allocation limit reached.");
1207
1208 journal_file_post_change(s->system_journal);
1209 server_rotate(s);
1210 server_vacuum(s);
1211
1212 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1213 }
1214
1215 if (r < 0) {
1216 log_error("Can't write entry: %s", strerror(-r));
1217 goto finish;
1218 }
1219 }
1220
1221 finish:
1222 journal_file_post_change(s->system_journal);
1223
1224 journal_file_close(s->runtime_journal);
1225 s->runtime_journal = NULL;
1226
1227 if (r >= 0)
1228 rm_rf("/run/log/journal", false, true, false);
1229
1230 return r;
1231 }
1232
1233 static int process_event(Server *s, struct epoll_event *ev) {
1234 assert(s);
1235 assert(ev);
1236
1237 if (ev->data.fd == s->signal_fd) {
1238 struct signalfd_siginfo sfsi;
1239 ssize_t n;
1240
1241 if (ev->events != EPOLLIN) {
1242 log_info("Got invalid event from epoll.");
1243 return -EIO;
1244 }
1245
1246 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
1247 if (n != sizeof(sfsi)) {
1248
1249 if (n >= 0)
1250 return -EIO;
1251
1252 if (errno == EINTR || errno == EAGAIN)
1253 return 1;
1254
1255 return -errno;
1256 }
1257
1258 log_info("Received SIG%s", signal_to_string(sfsi.ssi_signo));
1259
1260 if (sfsi.ssi_signo == SIGUSR1) {
1261 touch("/run/systemd/journal/flushed");
1262 server_flush_to_var(s);
1263 return 1;
1264 }
1265
1266 if (sfsi.ssi_signo == SIGUSR2) {
1267 server_rotate(s);
1268 server_vacuum(s);
1269 return 1;
1270 }
1271
1272 return 0;
1273
1274 } else if (ev->data.fd == s->dev_kmsg_fd) {
1275 int r;
1276
1277 if (ev->events != EPOLLIN) {
1278 log_info("Got invalid event from epoll.");
1279 return -EIO;
1280 }
1281
1282 r = server_read_dev_kmsg(s);
1283 if (r < 0)
1284 return r;
1285
1286 return 1;
1287
1288 } else if (ev->data.fd == s->native_fd ||
1289 ev->data.fd == s->syslog_fd) {
1290
1291 if (ev->events != EPOLLIN) {
1292 log_info("Got invalid event from epoll.");
1293 return -EIO;
1294 }
1295
1296 for (;;) {
1297 struct msghdr msghdr;
1298 struct iovec iovec;
1299 struct ucred *ucred = NULL;
1300 struct timeval *tv = NULL;
1301 struct cmsghdr *cmsg;
1302 char *label = NULL;
1303 size_t label_len = 0;
1304 union {
1305 struct cmsghdr cmsghdr;
1306
1307 /* We use NAME_MAX space for the
1308 * SELinux label here. The kernel
1309 * currently enforces no limit, but
1310 * according to suggestions from the
1311 * SELinux people this will change and
1312 * it will probably be identical to
1313 * NAME_MAX. For now we use that, but
1314 * this should be updated one day when
1315 * the final limit is known.*/
1316 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1317 CMSG_SPACE(sizeof(struct timeval)) +
1318 CMSG_SPACE(sizeof(int)) + /* fd */
1319 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1320 } control;
1321 ssize_t n;
1322 int v;
1323 int *fds = NULL;
1324 unsigned n_fds = 0;
1325
1326 if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
1327 log_error("SIOCINQ failed: %m");
1328 return -errno;
1329 }
1330
1331 if (s->buffer_size < (size_t) v) {
1332 void *b;
1333 size_t l;
1334
1335 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
1336 b = realloc(s->buffer, l+1);
1337
1338 if (!b) {
1339 log_error("Couldn't increase buffer.");
1340 return -ENOMEM;
1341 }
1342
1343 s->buffer_size = l;
1344 s->buffer = b;
1345 }
1346
1347 zero(iovec);
1348 iovec.iov_base = s->buffer;
1349 iovec.iov_len = s->buffer_size;
1350
1351 zero(control);
1352 zero(msghdr);
1353 msghdr.msg_iov = &iovec;
1354 msghdr.msg_iovlen = 1;
1355 msghdr.msg_control = &control;
1356 msghdr.msg_controllen = sizeof(control);
1357
1358 n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1359 if (n < 0) {
1360
1361 if (errno == EINTR || errno == EAGAIN)
1362 return 1;
1363
1364 log_error("recvmsg() failed: %m");
1365 return -errno;
1366 }
1367
1368 for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1369
1370 if (cmsg->cmsg_level == SOL_SOCKET &&
1371 cmsg->cmsg_type == SCM_CREDENTIALS &&
1372 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1373 ucred = (struct ucred*) CMSG_DATA(cmsg);
1374 else if (cmsg->cmsg_level == SOL_SOCKET &&
1375 cmsg->cmsg_type == SCM_SECURITY) {
1376 label = (char*) CMSG_DATA(cmsg);
1377 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1378 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1379 cmsg->cmsg_type == SO_TIMESTAMP &&
1380 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1381 tv = (struct timeval*) CMSG_DATA(cmsg);
1382 else if (cmsg->cmsg_level == SOL_SOCKET &&
1383 cmsg->cmsg_type == SCM_RIGHTS) {
1384 fds = (int*) CMSG_DATA(cmsg);
1385 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1386 }
1387 }
1388
1389 if (ev->data.fd == s->syslog_fd) {
1390 char *e;
1391
1392 if (n > 0 && n_fds == 0) {
1393 e = memchr(s->buffer, '\n', n);
1394 if (e)
1395 *e = 0;
1396 else
1397 s->buffer[n] = 0;
1398
1399 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1400 } else if (n_fds > 0)
1401 log_warning("Got file descriptors via syslog socket. Ignoring.");
1402
1403 } else {
1404 if (n > 0 && n_fds == 0)
1405 process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1406 else if (n == 0 && n_fds == 1)
1407 process_native_file(s, fds[0], ucred, tv, label, label_len);
1408 else if (n_fds > 0)
1409 log_warning("Got too many file descriptors via native socket. Ignoring.");
1410 }
1411
1412 close_many(fds, n_fds);
1413 }
1414
1415 return 1;
1416
1417 } else if (ev->data.fd == s->stdout_fd) {
1418
1419 if (ev->events != EPOLLIN) {
1420 log_info("Got invalid event from epoll.");
1421 return -EIO;
1422 }
1423
1424 stdout_stream_new(s);
1425 return 1;
1426
1427 } else {
1428 StdoutStream *stream;
1429
1430 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
1431 log_info("Got invalid event from epoll.");
1432 return -EIO;
1433 }
1434
1435 /* If it is none of the well-known fds, it must be an
1436 * stdout stream fd. Note that this is a bit ugly here
1437 * (since we rely that none of the well-known fds
1438 * could be interpreted as pointer), but nonetheless
1439 * safe, since the well-known fds would never get an
1440 * fd > 4096, i.e. beyond the first memory page */
1441
1442 stream = ev->data.ptr;
1443
1444 if (stdout_stream_process(stream) <= 0)
1445 stdout_stream_free(stream);
1446
1447 return 1;
1448 }
1449
1450 log_error("Unknown event.");
1451 return 0;
1452 }
1453
1454
1455 static int open_native_socket(Server*s) {
1456 union sockaddr_union sa;
1457 int one, r;
1458 struct epoll_event ev;
1459
1460 assert(s);
1461
1462 if (s->native_fd < 0) {
1463
1464 s->native_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
1465 if (s->native_fd < 0) {
1466 log_error("socket() failed: %m");
1467 return -errno;
1468 }
1469
1470 zero(sa);
1471 sa.un.sun_family = AF_UNIX;
1472 strncpy(sa.un.sun_path, "/run/systemd/journal/socket", sizeof(sa.un.sun_path));
1473
1474 unlink(sa.un.sun_path);
1475
1476 r = bind(s->native_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
1477 if (r < 0) {
1478 log_error("bind() failed: %m");
1479 return -errno;
1480 }
1481
1482 chmod(sa.un.sun_path, 0666);
1483 } else
1484 fd_nonblock(s->native_fd, 1);
1485
1486 one = 1;
1487 r = setsockopt(s->native_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
1488 if (r < 0) {
1489 log_error("SO_PASSCRED failed: %m");
1490 return -errno;
1491 }
1492
1493 #ifdef HAVE_SELINUX
1494 one = 1;
1495 r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSSEC, &one, sizeof(one));
1496 if (r < 0)
1497 log_warning("SO_PASSSEC failed: %m");
1498 #endif
1499
1500 one = 1;
1501 r = setsockopt(s->native_fd, SOL_SOCKET, SO_TIMESTAMP, &one, sizeof(one));
1502 if (r < 0) {
1503 log_error("SO_TIMESTAMP failed: %m");
1504 return -errno;
1505 }
1506
1507 zero(ev);
1508 ev.events = EPOLLIN;
1509 ev.data.fd = s->native_fd;
1510 if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->native_fd, &ev) < 0) {
1511 log_error("Failed to add native server fd to epoll object: %m");
1512 return -errno;
1513 }
1514
1515 return 0;
1516 }
1517
1518
1519 static int open_signalfd(Server *s) {
1520 sigset_t mask;
1521 struct epoll_event ev;
1522
1523 assert(s);
1524
1525 assert_se(sigemptyset(&mask) == 0);
1526 sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1527 assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1528
1529 s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
1530 if (s->signal_fd < 0) {
1531 log_error("signalfd(): %m");
1532 return -errno;
1533 }
1534
1535 zero(ev);
1536 ev.events = EPOLLIN;
1537 ev.data.fd = s->signal_fd;
1538
1539 if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
1540 log_error("epoll_ctl(): %m");
1541 return -errno;
1542 }
1543
1544 return 0;
1545 }
1546
1547 static int server_parse_proc_cmdline(Server *s) {
1548 char *line, *w, *state;
1549 int r;
1550 size_t l;
1551
1552 if (detect_container(NULL) > 0)
1553 return 0;
1554
1555 r = read_one_line_file("/proc/cmdline", &line);
1556 if (r < 0) {
1557 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
1558 return 0;
1559 }
1560
1561 FOREACH_WORD_QUOTED(w, l, line, state) {
1562 char *word;
1563
1564 word = strndup(w, l);
1565 if (!word) {
1566 r = -ENOMEM;
1567 goto finish;
1568 }
1569
1570 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1571 r = parse_boolean(word + 35);
1572 if (r < 0)
1573 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1574 else
1575 s->forward_to_syslog = r;
1576 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1577 r = parse_boolean(word + 33);
1578 if (r < 0)
1579 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1580 else
1581 s->forward_to_kmsg = r;
1582 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1583 r = parse_boolean(word + 36);
1584 if (r < 0)
1585 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1586 else
1587 s->forward_to_console = r;
1588 } else if (startswith(word, "systemd.journald"))
1589 log_warning("Invalid systemd.journald parameter. Ignoring.");
1590
1591 free(word);
1592 }
1593
1594 r = 0;
1595
1596 finish:
1597 free(line);
1598 return r;
1599 }
1600
1601 static int server_parse_config_file(Server *s) {
1602 FILE *f;
1603 const char *fn;
1604 int r;
1605
1606 assert(s);
1607
1608 fn = "/etc/systemd/journald.conf";
1609 f = fopen(fn, "re");
1610 if (!f) {
1611 if (errno == ENOENT)
1612 return 0;
1613
1614 log_warning("Failed to open configuration file %s: %m", fn);
1615 return -errno;
1616 }
1617
1618 r = config_parse(fn, f, "Journal\0", config_item_perf_lookup, (void*) journald_gperf_lookup, false, s);
1619 if (r < 0)
1620 log_warning("Failed to parse configuration file: %s", strerror(-r));
1621
1622 fclose(f);
1623
1624 return r;
1625 }
1626
1627 static int server_init(Server *s) {
1628 int n, r, fd;
1629
1630 assert(s);
1631
1632 zero(*s);
1633 s->syslog_fd = s->native_fd = s->stdout_fd = s->signal_fd = s->epoll_fd = s->dev_kmsg_fd = -1;
1634 s->compress = true;
1635 s->seal = true;
1636
1637 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1638 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1639
1640 s->forward_to_syslog = true;
1641
1642 s->max_level_store = LOG_DEBUG;
1643 s->max_level_syslog = LOG_DEBUG;
1644 s->max_level_kmsg = LOG_NOTICE;
1645 s->max_level_console = LOG_INFO;
1646
1647 memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1648 memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1649
1650 server_parse_config_file(s);
1651 server_parse_proc_cmdline(s);
1652
1653 mkdir_p("/run/systemd/journal", 0755);
1654
1655 s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
1656 if (!s->user_journals)
1657 return log_oom();
1658
1659 s->mmap = mmap_cache_new();
1660 if (!s->mmap)
1661 return log_oom();
1662
1663 s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
1664 if (s->epoll_fd < 0) {
1665 log_error("Failed to create epoll object: %m");
1666 return -errno;
1667 }
1668
1669 n = sd_listen_fds(true);
1670 if (n < 0) {
1671 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
1672 return n;
1673 }
1674
1675 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1676
1677 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1678
1679 if (s->native_fd >= 0) {
1680 log_error("Too many native sockets passed.");
1681 return -EINVAL;
1682 }
1683
1684 s->native_fd = fd;
1685
1686 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1687
1688 if (s->stdout_fd >= 0) {
1689 log_error("Too many stdout sockets passed.");
1690 return -EINVAL;
1691 }
1692
1693 s->stdout_fd = fd;
1694
1695 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
1696
1697 if (s->syslog_fd >= 0) {
1698 log_error("Too many /dev/log sockets passed.");
1699 return -EINVAL;
1700 }
1701
1702 s->syslog_fd = fd;
1703
1704 } else {
1705 log_error("Unknown socket passed.");
1706 return -EINVAL;
1707 }
1708 }
1709
1710 r = server_open_syslog_socket(s);
1711 if (r < 0)
1712 return r;
1713
1714 r = open_native_socket(s);
1715 if (r < 0)
1716 return r;
1717
1718 r = server_open_stdout_socket(s);
1719 if (r < 0)
1720 return r;
1721
1722 r = server_open_dev_kmsg(s);
1723 if (r < 0)
1724 return r;
1725
1726 r = server_open_kernel_seqnum(s);
1727 if (r < 0)
1728 return r;
1729
1730 r = open_signalfd(s);
1731 if (r < 0)
1732 return r;
1733
1734 s->udev = udev_new();
1735 if (!s->udev)
1736 return -ENOMEM;
1737
1738 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
1739 if (!s->rate_limit)
1740 return -ENOMEM;
1741
1742 r = system_journal_open(s);
1743 if (r < 0)
1744 return r;
1745
1746 return 0;
1747 }
1748
1749 static void maybe_append_tags(Server *s) {
1750 #ifdef HAVE_GCRYPT
1751 JournalFile *f;
1752 Iterator i;
1753 usec_t n;
1754
1755 n = now(CLOCK_REALTIME);
1756
1757 if (s->system_journal)
1758 journal_file_maybe_append_tag(s->system_journal, n);
1759
1760 HASHMAP_FOREACH(f, s->user_journals, i)
1761 journal_file_maybe_append_tag(f, n);
1762 #endif
1763 }
1764
1765 static void server_done(Server *s) {
1766 JournalFile *f;
1767 assert(s);
1768
1769 while (s->stdout_streams)
1770 stdout_stream_free(s->stdout_streams);
1771
1772 if (s->system_journal)
1773 journal_file_close(s->system_journal);
1774
1775 if (s->runtime_journal)
1776 journal_file_close(s->runtime_journal);
1777
1778 while ((f = hashmap_steal_first(s->user_journals)))
1779 journal_file_close(f);
1780
1781 hashmap_free(s->user_journals);
1782
1783 if (s->epoll_fd >= 0)
1784 close_nointr_nofail(s->epoll_fd);
1785
1786 if (s->signal_fd >= 0)
1787 close_nointr_nofail(s->signal_fd);
1788
1789 if (s->syslog_fd >= 0)
1790 close_nointr_nofail(s->syslog_fd);
1791
1792 if (s->native_fd >= 0)
1793 close_nointr_nofail(s->native_fd);
1794
1795 if (s->stdout_fd >= 0)
1796 close_nointr_nofail(s->stdout_fd);
1797
1798 if (s->dev_kmsg_fd >= 0)
1799 close_nointr_nofail(s->dev_kmsg_fd);
1800
1801 if (s->rate_limit)
1802 journal_rate_limit_free(s->rate_limit);
1803
1804 if (s->kernel_seqnum)
1805 munmap(s->kernel_seqnum, sizeof(uint64_t));
1806
1807 free(s->buffer);
1808 free(s->tty_path);
1809
1810 if (s->mmap)
1811 mmap_cache_unref(s->mmap);
1812
1813 if (s->udev)
1814 udev_unref(s->udev);
1815 }
1816
1817 int main(int argc, char *argv[]) {
1818 Server server;
1819 int r;
1820
1821 /* if (getppid() != 1) { */
1822 /* log_error("This program should be invoked by init only."); */
1823 /* return EXIT_FAILURE; */
1824 /* } */
1825
1826 if (argc > 1) {
1827 log_error("This program does not take arguments.");
1828 return EXIT_FAILURE;
1829 }
1830
1831 log_set_target(LOG_TARGET_SAFE);
1832 log_set_facility(LOG_SYSLOG);
1833 log_set_max_level(LOG_DEBUG);
1834 log_parse_environment();
1835 log_open();
1836
1837 umask(0022);
1838
1839 r = server_init(&server);
1840 if (r < 0)
1841 goto finish;
1842
1843 server_vacuum(&server);
1844 server_flush_to_var(&server);
1845 server_flush_dev_kmsg(&server);
1846
1847 log_debug("systemd-journald running as pid %lu", (unsigned long) getpid());
1848 server_driver_message(&server, SD_MESSAGE_JOURNAL_START, "Journal started");
1849
1850 sd_notify(false,
1851 "READY=1\n"
1852 "STATUS=Processing requests...");
1853
1854 for (;;) {
1855 struct epoll_event event;
1856 int t;
1857
1858 #ifdef HAVE_GCRYPT
1859 usec_t u;
1860
1861 if (server.system_journal &&
1862 journal_file_next_evolve_usec(server.system_journal, &u)) {
1863 usec_t n;
1864
1865 n = now(CLOCK_REALTIME);
1866
1867 if (n >= u)
1868 t = 0;
1869 else
1870 t = (int) ((u - n + USEC_PER_MSEC - 1) / USEC_PER_MSEC);
1871 } else
1872 #endif
1873 t = -1;
1874
1875 r = epoll_wait(server.epoll_fd, &event, 1, t);
1876 if (r < 0) {
1877
1878 if (errno == EINTR)
1879 continue;
1880
1881 log_error("epoll_wait() failed: %m");
1882 r = -errno;
1883 goto finish;
1884 }
1885
1886 if (r > 0) {
1887 r = process_event(&server, &event);
1888 if (r < 0)
1889 goto finish;
1890 else if (r == 0)
1891 break;
1892 }
1893
1894 maybe_append_tags(&server);
1895 }
1896
1897 log_debug("systemd-journald stopped as pid %lu", (unsigned long) getpid());
1898 server_driver_message(&server, SD_MESSAGE_JOURNAL_STOP, "Journal stopped");
1899
1900 finish:
1901 sd_notify(false,
1902 "STATUS=Shutting down...");
1903
1904 server_done(&server);
1905
1906 return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
1907 }