]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/journal/journald-server.c
journal: restore watchdog support
[thirdparty/systemd.git] / src / journal / journald-server.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2011 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #ifdef HAVE_SELINUX
23 #include <selinux/selinux.h>
24 #endif
25 #include <sys/ioctl.h>
26 #include <sys/mman.h>
27 #include <sys/signalfd.h>
28 #include <sys/statvfs.h>
29 #include <linux/sockios.h>
30
31 #include "libudev.h"
32 #include "sd-daemon.h"
33 #include "sd-journal.h"
34 #include "sd-messages.h"
35
36 #include "acl-util.h"
37 #include "alloc-util.h"
38 #include "audit-util.h"
39 #include "cgroup-util.h"
40 #include "conf-parser.h"
41 #include "dirent-util.h"
42 #include "extract-word.h"
43 #include "fd-util.h"
44 #include "formats-util.h"
45 #include "fs-util.h"
46 #include "hashmap.h"
47 #include "hostname-util.h"
48 #include "io-util.h"
49 #include "journal-authenticate.h"
50 #include "journal-file.h"
51 #include "journal-internal.h"
52 #include "journal-vacuum.h"
53 #include "journald-audit.h"
54 #include "journald-kmsg.h"
55 #include "journald-native.h"
56 #include "journald-rate-limit.h"
57 #include "journald-server.h"
58 #include "journald-stream.h"
59 #include "journald-syslog.h"
60 #include "missing.h"
61 #include "mkdir.h"
62 #include "parse-util.h"
63 #include "proc-cmdline.h"
64 #include "process-util.h"
65 #include "rm-rf.h"
66 #include "selinux-util.h"
67 #include "signal-util.h"
68 #include "socket-util.h"
69 #include "string-table.h"
70 #include "string-util.h"
71
72 #define USER_JOURNALS_MAX 1024
73
74 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
75 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
76 #define DEFAULT_RATE_LIMIT_BURST 1000
77 #define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
78
79 #define RECHECK_SPACE_USEC (30*USEC_PER_SEC)
80
81 #define NOTIFY_SNDBUF_SIZE (8*1024*1024)
82
83 static int determine_space_for(
84 Server *s,
85 JournalMetrics *metrics,
86 const char *path,
87 const char *name,
88 bool verbose,
89 bool patch_min_use,
90 uint64_t *available,
91 uint64_t *limit) {
92
93 uint64_t sum = 0, ss_avail, avail;
94 _cleanup_closedir_ DIR *d = NULL;
95 struct dirent *de;
96 struct statvfs ss;
97 const char *p;
98 usec_t ts;
99
100 assert(s);
101 assert(metrics);
102 assert(path);
103 assert(name);
104
105 ts = now(CLOCK_MONOTONIC);
106
107 if (!verbose && s->cached_space_timestamp + RECHECK_SPACE_USEC > ts) {
108
109 if (available)
110 *available = s->cached_space_available;
111 if (limit)
112 *limit = s->cached_space_limit;
113
114 return 0;
115 }
116
117 p = strjoina(path, SERVER_MACHINE_ID(s));
118 d = opendir(p);
119 if (!d)
120 return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR, errno, "Failed to open %s: %m", p);
121
122 if (fstatvfs(dirfd(d), &ss) < 0)
123 return log_error_errno(errno, "Failed to fstatvfs(%s): %m", p);
124
125 FOREACH_DIRENT_ALL(de, d, break) {
126 struct stat st;
127
128 if (!endswith(de->d_name, ".journal") &&
129 !endswith(de->d_name, ".journal~"))
130 continue;
131
132 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) {
133 log_debug_errno(errno, "Failed to stat %s/%s, ignoring: %m", p, de->d_name);
134 continue;
135 }
136
137 if (!S_ISREG(st.st_mode))
138 continue;
139
140 sum += (uint64_t) st.st_blocks * 512UL;
141 }
142
143 /* If request, then let's bump the min_use limit to the
144 * current usage on disk. We do this when starting up and
145 * first opening the journal files. This way sudden spikes in
146 * disk usage will not cause journald to vacuum files without
147 * bounds. Note that this means that only a restart of
148 * journald will make it reset this value. */
149
150 if (patch_min_use)
151 metrics->min_use = MAX(metrics->min_use, sum);
152
153 ss_avail = ss.f_bsize * ss.f_bavail;
154 avail = LESS_BY(ss_avail, metrics->keep_free);
155
156 s->cached_space_limit = MIN(MAX(sum + avail, metrics->min_use), metrics->max_use);
157 s->cached_space_available = LESS_BY(s->cached_space_limit, sum);
158 s->cached_space_timestamp = ts;
159
160 if (verbose) {
161 char fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
162 fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX], fb6[FORMAT_BYTES_MAX];
163
164 server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
165 "%s (%s) is currently using %s.\n"
166 "Maximum allowed usage is set to %s.\n"
167 "Leaving at least %s free (of currently available %s of space).\n"
168 "Enforced usage limit is thus %s, of which %s are still available.",
169 name, path,
170 format_bytes(fb1, sizeof(fb1), sum),
171 format_bytes(fb2, sizeof(fb2), metrics->max_use),
172 format_bytes(fb3, sizeof(fb3), metrics->keep_free),
173 format_bytes(fb4, sizeof(fb4), ss_avail),
174 format_bytes(fb5, sizeof(fb5), s->cached_space_limit),
175 format_bytes(fb6, sizeof(fb6), s->cached_space_available));
176 }
177
178 if (available)
179 *available = s->cached_space_available;
180 if (limit)
181 *limit = s->cached_space_limit;
182
183 return 1;
184 }
185
186 static int determine_space(Server *s, bool verbose, bool patch_min_use, uint64_t *available, uint64_t *limit) {
187 JournalMetrics *metrics;
188 const char *path, *name;
189
190 assert(s);
191
192 if (s->system_journal) {
193 path = "/var/log/journal/";
194 metrics = &s->system_metrics;
195 name = "System journal";
196 } else {
197 path = "/run/log/journal/";
198 metrics = &s->runtime_metrics;
199 name = "Runtime journal";
200 }
201
202 return determine_space_for(s, metrics, path, name, verbose, patch_min_use, available, limit);
203 }
204
205 void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
206 int r;
207 #ifdef HAVE_ACL
208 _cleanup_(acl_freep) acl_t acl = NULL;
209 acl_entry_t entry;
210 acl_permset_t permset;
211 #endif
212
213 assert(f);
214
215 r = fchmod(f->fd, 0640);
216 if (r < 0)
217 log_warning_errno(errno, "Failed to fix access mode on %s, ignoring: %m", f->path);
218
219 #ifdef HAVE_ACL
220 if (uid <= SYSTEM_UID_MAX)
221 return;
222
223 acl = acl_get_fd(f->fd);
224 if (!acl) {
225 log_warning_errno(errno, "Failed to read ACL on %s, ignoring: %m", f->path);
226 return;
227 }
228
229 r = acl_find_uid(acl, uid, &entry);
230 if (r <= 0) {
231
232 if (acl_create_entry(&acl, &entry) < 0 ||
233 acl_set_tag_type(entry, ACL_USER) < 0 ||
234 acl_set_qualifier(entry, &uid) < 0) {
235 log_warning_errno(errno, "Failed to patch ACL on %s, ignoring: %m", f->path);
236 return;
237 }
238 }
239
240 /* We do not recalculate the mask unconditionally here,
241 * so that the fchmod() mask above stays intact. */
242 if (acl_get_permset(entry, &permset) < 0 ||
243 acl_add_perm(permset, ACL_READ) < 0 ||
244 calc_acl_mask_if_needed(&acl) < 0) {
245 log_warning_errno(errno, "Failed to patch ACL on %s, ignoring: %m", f->path);
246 return;
247 }
248
249 if (acl_set_fd(f->fd, acl) < 0)
250 log_warning_errno(errno, "Failed to set ACL on %s, ignoring: %m", f->path);
251
252 #endif
253 }
254
255 static JournalFile* find_journal(Server *s, uid_t uid) {
256 _cleanup_free_ char *p = NULL;
257 int r;
258 JournalFile *f;
259 sd_id128_t machine;
260
261 assert(s);
262
263 /* We split up user logs only on /var, not on /run. If the
264 * runtime file is open, we write to it exclusively, in order
265 * to guarantee proper order as soon as we flush /run to
266 * /var and close the runtime file. */
267
268 if (s->runtime_journal)
269 return s->runtime_journal;
270
271 if (uid <= SYSTEM_UID_MAX)
272 return s->system_journal;
273
274 r = sd_id128_get_machine(&machine);
275 if (r < 0)
276 return s->system_journal;
277
278 f = ordered_hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
279 if (f)
280 return f;
281
282 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-"UID_FMT".journal",
283 SD_ID128_FORMAT_VAL(machine), uid) < 0)
284 return s->system_journal;
285
286 while (ordered_hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
287 /* Too many open? Then let's close one */
288 f = ordered_hashmap_steal_first(s->user_journals);
289 assert(f);
290 journal_file_close(f);
291 }
292
293 r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &f);
294 if (r < 0)
295 return s->system_journal;
296
297 server_fix_perms(s, f, uid);
298
299 r = ordered_hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
300 if (r < 0) {
301 journal_file_close(f);
302 return s->system_journal;
303 }
304
305 return f;
306 }
307
308 static int do_rotate(
309 Server *s,
310 JournalFile **f,
311 const char* name,
312 bool seal,
313 uint32_t uid) {
314
315 int r;
316 assert(s);
317
318 if (!*f)
319 return -EINVAL;
320
321 r = journal_file_rotate(f, s->compress, seal);
322 if (r < 0)
323 if (*f)
324 log_error_errno(r, "Failed to rotate %s: %m", (*f)->path);
325 else
326 log_error_errno(r, "Failed to create new %s journal: %m", name);
327 else
328 server_fix_perms(s, *f, uid);
329
330 return r;
331 }
332
333 void server_rotate(Server *s) {
334 JournalFile *f;
335 void *k;
336 Iterator i;
337 int r;
338
339 log_debug("Rotating...");
340
341 (void) do_rotate(s, &s->runtime_journal, "runtime", false, 0);
342 (void) do_rotate(s, &s->system_journal, "system", s->seal, 0);
343
344 ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
345 r = do_rotate(s, &f, "user", s->seal, PTR_TO_UINT32(k));
346 if (r >= 0)
347 ordered_hashmap_replace(s->user_journals, k, f);
348 else if (!f)
349 /* Old file has been closed and deallocated */
350 ordered_hashmap_remove(s->user_journals, k);
351 }
352 }
353
354 void server_sync(Server *s) {
355 JournalFile *f;
356 void *k;
357 Iterator i;
358 int r;
359
360 if (s->system_journal) {
361 r = journal_file_set_offline(s->system_journal);
362 if (r < 0)
363 log_warning_errno(r, "Failed to sync system journal, ignoring: %m");
364 }
365
366 ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
367 r = journal_file_set_offline(f);
368 if (r < 0)
369 log_warning_errno(r, "Failed to sync user journal, ignoring: %m");
370 }
371
372 if (s->sync_event_source) {
373 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
374 if (r < 0)
375 log_error_errno(r, "Failed to disable sync timer source: %m");
376 }
377
378 s->sync_scheduled = false;
379 }
380
381 static void do_vacuum(
382 Server *s,
383 JournalFile *f,
384 JournalMetrics *metrics,
385 const char *path,
386 const char *name,
387 bool verbose,
388 bool patch_min_use) {
389
390 const char *p;
391 uint64_t limit;
392 int r;
393
394 assert(s);
395 assert(metrics);
396 assert(path);
397 assert(name);
398
399 if (!f)
400 return;
401
402 p = strjoina(path, SERVER_MACHINE_ID(s));
403
404 limit = metrics->max_use;
405 (void) determine_space_for(s, metrics, path, name, verbose, patch_min_use, NULL, &limit);
406
407 r = journal_directory_vacuum(p, limit, metrics->n_max_files, s->max_retention_usec, &s->oldest_file_usec, verbose);
408 if (r < 0 && r != -ENOENT)
409 log_warning_errno(r, "Failed to vacuum %s, ignoring: %m", p);
410 }
411
412 int server_vacuum(Server *s, bool verbose, bool patch_min_use) {
413 assert(s);
414
415 log_debug("Vacuuming...");
416
417 s->oldest_file_usec = 0;
418
419 do_vacuum(s, s->system_journal, &s->system_metrics, "/var/log/journal/", "System journal", verbose, patch_min_use);
420 do_vacuum(s, s->runtime_journal, &s->runtime_metrics, "/run/log/journal/", "Runtime journal", verbose, patch_min_use);
421
422 s->cached_space_limit = 0;
423 s->cached_space_available = 0;
424 s->cached_space_timestamp = 0;
425
426 return 0;
427 }
428
429 static void server_cache_machine_id(Server *s) {
430 sd_id128_t id;
431 int r;
432
433 assert(s);
434
435 r = sd_id128_get_machine(&id);
436 if (r < 0)
437 return;
438
439 sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
440 }
441
442 static void server_cache_boot_id(Server *s) {
443 sd_id128_t id;
444 int r;
445
446 assert(s);
447
448 r = sd_id128_get_boot(&id);
449 if (r < 0)
450 return;
451
452 sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
453 }
454
455 static void server_cache_hostname(Server *s) {
456 _cleanup_free_ char *t = NULL;
457 char *x;
458
459 assert(s);
460
461 t = gethostname_malloc();
462 if (!t)
463 return;
464
465 x = strappend("_HOSTNAME=", t);
466 if (!x)
467 return;
468
469 free(s->hostname_field);
470 s->hostname_field = x;
471 }
472
473 static bool shall_try_append_again(JournalFile *f, int r) {
474
475 /* -E2BIG Hit configured limit
476 -EFBIG Hit fs limit
477 -EDQUOT Quota limit hit
478 -ENOSPC Disk full
479 -EIO I/O error of some kind (mmap)
480 -EHOSTDOWN Other machine
481 -EBUSY Unclean shutdown
482 -EPROTONOSUPPORT Unsupported feature
483 -EBADMSG Corrupted
484 -ENODATA Truncated
485 -ESHUTDOWN Already archived
486 -EIDRM Journal file has been deleted */
487
488 if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
489 log_debug("%s: Allocation limit reached, rotating.", f->path);
490 else if (r == -EHOSTDOWN)
491 log_info("%s: Journal file from other machine, rotating.", f->path);
492 else if (r == -EBUSY)
493 log_info("%s: Unclean shutdown, rotating.", f->path);
494 else if (r == -EPROTONOSUPPORT)
495 log_info("%s: Unsupported feature, rotating.", f->path);
496 else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
497 log_warning("%s: Journal file corrupted, rotating.", f->path);
498 else if (r == -EIO)
499 log_warning("%s: IO error, rotating.", f->path);
500 else if (r == -EIDRM)
501 log_warning("%s: Journal file has been deleted, rotating.", f->path);
502 else
503 return false;
504
505 return true;
506 }
507
508 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
509 JournalFile *f;
510 bool vacuumed = false;
511 int r;
512
513 assert(s);
514 assert(iovec);
515 assert(n > 0);
516
517 f = find_journal(s, uid);
518 if (!f)
519 return;
520
521 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
522 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
523 server_rotate(s);
524 server_vacuum(s, false, false);
525 vacuumed = true;
526
527 f = find_journal(s, uid);
528 if (!f)
529 return;
530 }
531
532 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
533 if (r >= 0) {
534 server_schedule_sync(s, priority);
535 return;
536 }
537
538 if (vacuumed || !shall_try_append_again(f, r)) {
539 log_error_errno(r, "Failed to write entry (%d items, %zu bytes), ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
540 return;
541 }
542
543 server_rotate(s);
544 server_vacuum(s, false, false);
545
546 f = find_journal(s, uid);
547 if (!f)
548 return;
549
550 log_debug("Retrying write.");
551 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
552 if (r < 0)
553 log_error_errno(r, "Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
554 else
555 server_schedule_sync(s, priority);
556 }
557
558 static void dispatch_message_real(
559 Server *s,
560 struct iovec *iovec, unsigned n, unsigned m,
561 const struct ucred *ucred,
562 const struct timeval *tv,
563 const char *label, size_t label_len,
564 const char *unit_id,
565 int priority,
566 pid_t object_pid) {
567
568 char pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
569 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
570 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
571 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
572 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
573 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
574 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
575 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
576 uid_t object_uid;
577 gid_t object_gid;
578 char *x;
579 int r;
580 char *t, *c;
581 uid_t realuid = 0, owner = 0, journal_uid;
582 bool owner_valid = false;
583 #ifdef HAVE_AUDIT
584 char audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
585 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
586 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
587 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
588
589 uint32_t audit;
590 uid_t loginuid;
591 #endif
592
593 assert(s);
594 assert(iovec);
595 assert(n > 0);
596 assert(n + N_IOVEC_META_FIELDS + (object_pid ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
597
598 if (ucred) {
599 realuid = ucred->uid;
600
601 sprintf(pid, "_PID="PID_FMT, ucred->pid);
602 IOVEC_SET_STRING(iovec[n++], pid);
603
604 sprintf(uid, "_UID="UID_FMT, ucred->uid);
605 IOVEC_SET_STRING(iovec[n++], uid);
606
607 sprintf(gid, "_GID="GID_FMT, ucred->gid);
608 IOVEC_SET_STRING(iovec[n++], gid);
609
610 r = get_process_comm(ucred->pid, &t);
611 if (r >= 0) {
612 x = strjoina("_COMM=", t);
613 free(t);
614 IOVEC_SET_STRING(iovec[n++], x);
615 }
616
617 r = get_process_exe(ucred->pid, &t);
618 if (r >= 0) {
619 x = strjoina("_EXE=", t);
620 free(t);
621 IOVEC_SET_STRING(iovec[n++], x);
622 }
623
624 r = get_process_cmdline(ucred->pid, 0, false, &t);
625 if (r >= 0) {
626 x = strjoina("_CMDLINE=", t);
627 free(t);
628 IOVEC_SET_STRING(iovec[n++], x);
629 }
630
631 r = get_process_capeff(ucred->pid, &t);
632 if (r >= 0) {
633 x = strjoina("_CAP_EFFECTIVE=", t);
634 free(t);
635 IOVEC_SET_STRING(iovec[n++], x);
636 }
637
638 #ifdef HAVE_AUDIT
639 r = audit_session_from_pid(ucred->pid, &audit);
640 if (r >= 0) {
641 sprintf(audit_session, "_AUDIT_SESSION=%"PRIu32, audit);
642 IOVEC_SET_STRING(iovec[n++], audit_session);
643 }
644
645 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
646 if (r >= 0) {
647 sprintf(audit_loginuid, "_AUDIT_LOGINUID="UID_FMT, loginuid);
648 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
649 }
650 #endif
651
652 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &c);
653 if (r >= 0) {
654 char *session = NULL;
655
656 x = strjoina("_SYSTEMD_CGROUP=", c);
657 IOVEC_SET_STRING(iovec[n++], x);
658
659 r = cg_path_get_session(c, &t);
660 if (r >= 0) {
661 session = strjoina("_SYSTEMD_SESSION=", t);
662 free(t);
663 IOVEC_SET_STRING(iovec[n++], session);
664 }
665
666 if (cg_path_get_owner_uid(c, &owner) >= 0) {
667 owner_valid = true;
668
669 sprintf(owner_uid, "_SYSTEMD_OWNER_UID="UID_FMT, owner);
670 IOVEC_SET_STRING(iovec[n++], owner_uid);
671 }
672
673 if (cg_path_get_unit(c, &t) >= 0) {
674 x = strjoina("_SYSTEMD_UNIT=", t);
675 free(t);
676 IOVEC_SET_STRING(iovec[n++], x);
677 } else if (unit_id && !session) {
678 x = strjoina("_SYSTEMD_UNIT=", unit_id);
679 IOVEC_SET_STRING(iovec[n++], x);
680 }
681
682 if (cg_path_get_user_unit(c, &t) >= 0) {
683 x = strjoina("_SYSTEMD_USER_UNIT=", t);
684 free(t);
685 IOVEC_SET_STRING(iovec[n++], x);
686 } else if (unit_id && session) {
687 x = strjoina("_SYSTEMD_USER_UNIT=", unit_id);
688 IOVEC_SET_STRING(iovec[n++], x);
689 }
690
691 if (cg_path_get_slice(c, &t) >= 0) {
692 x = strjoina("_SYSTEMD_SLICE=", t);
693 free(t);
694 IOVEC_SET_STRING(iovec[n++], x);
695 }
696
697 free(c);
698 } else if (unit_id) {
699 x = strjoina("_SYSTEMD_UNIT=", unit_id);
700 IOVEC_SET_STRING(iovec[n++], x);
701 }
702
703 #ifdef HAVE_SELINUX
704 if (mac_selinux_use()) {
705 if (label) {
706 x = alloca(strlen("_SELINUX_CONTEXT=") + label_len + 1);
707
708 *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
709 IOVEC_SET_STRING(iovec[n++], x);
710 } else {
711 security_context_t con;
712
713 if (getpidcon(ucred->pid, &con) >= 0) {
714 x = strjoina("_SELINUX_CONTEXT=", con);
715
716 freecon(con);
717 IOVEC_SET_STRING(iovec[n++], x);
718 }
719 }
720 }
721 #endif
722 }
723 assert(n <= m);
724
725 if (object_pid) {
726 r = get_process_uid(object_pid, &object_uid);
727 if (r >= 0) {
728 sprintf(o_uid, "OBJECT_UID="UID_FMT, object_uid);
729 IOVEC_SET_STRING(iovec[n++], o_uid);
730 }
731
732 r = get_process_gid(object_pid, &object_gid);
733 if (r >= 0) {
734 sprintf(o_gid, "OBJECT_GID="GID_FMT, object_gid);
735 IOVEC_SET_STRING(iovec[n++], o_gid);
736 }
737
738 r = get_process_comm(object_pid, &t);
739 if (r >= 0) {
740 x = strjoina("OBJECT_COMM=", t);
741 free(t);
742 IOVEC_SET_STRING(iovec[n++], x);
743 }
744
745 r = get_process_exe(object_pid, &t);
746 if (r >= 0) {
747 x = strjoina("OBJECT_EXE=", t);
748 free(t);
749 IOVEC_SET_STRING(iovec[n++], x);
750 }
751
752 r = get_process_cmdline(object_pid, 0, false, &t);
753 if (r >= 0) {
754 x = strjoina("OBJECT_CMDLINE=", t);
755 free(t);
756 IOVEC_SET_STRING(iovec[n++], x);
757 }
758
759 #ifdef HAVE_AUDIT
760 r = audit_session_from_pid(object_pid, &audit);
761 if (r >= 0) {
762 sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%"PRIu32, audit);
763 IOVEC_SET_STRING(iovec[n++], o_audit_session);
764 }
765
766 r = audit_loginuid_from_pid(object_pid, &loginuid);
767 if (r >= 0) {
768 sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID="UID_FMT, loginuid);
769 IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
770 }
771 #endif
772
773 r = cg_pid_get_path_shifted(object_pid, s->cgroup_root, &c);
774 if (r >= 0) {
775 x = strjoina("OBJECT_SYSTEMD_CGROUP=", c);
776 IOVEC_SET_STRING(iovec[n++], x);
777
778 r = cg_path_get_session(c, &t);
779 if (r >= 0) {
780 x = strjoina("OBJECT_SYSTEMD_SESSION=", t);
781 free(t);
782 IOVEC_SET_STRING(iovec[n++], x);
783 }
784
785 if (cg_path_get_owner_uid(c, &owner) >= 0) {
786 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID="UID_FMT, owner);
787 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
788 }
789
790 if (cg_path_get_unit(c, &t) >= 0) {
791 x = strjoina("OBJECT_SYSTEMD_UNIT=", t);
792 free(t);
793 IOVEC_SET_STRING(iovec[n++], x);
794 }
795
796 if (cg_path_get_user_unit(c, &t) >= 0) {
797 x = strjoina("OBJECT_SYSTEMD_USER_UNIT=", t);
798 free(t);
799 IOVEC_SET_STRING(iovec[n++], x);
800 }
801
802 free(c);
803 }
804 }
805 assert(n <= m);
806
807 if (tv) {
808 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv));
809 IOVEC_SET_STRING(iovec[n++], source_time);
810 }
811
812 /* Note that strictly speaking storing the boot id here is
813 * redundant since the entry includes this in-line
814 * anyway. However, we need this indexed, too. */
815 if (!isempty(s->boot_id_field))
816 IOVEC_SET_STRING(iovec[n++], s->boot_id_field);
817
818 if (!isempty(s->machine_id_field))
819 IOVEC_SET_STRING(iovec[n++], s->machine_id_field);
820
821 if (!isempty(s->hostname_field))
822 IOVEC_SET_STRING(iovec[n++], s->hostname_field);
823
824 assert(n <= m);
825
826 if (s->split_mode == SPLIT_UID && realuid > 0)
827 /* Split up strictly by any UID */
828 journal_uid = realuid;
829 else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
830 /* Split up by login UIDs. We do this only if the
831 * realuid is not root, in order not to accidentally
832 * leak privileged information to the user that is
833 * logged by a privileged process that is part of an
834 * unprivileged session. */
835 journal_uid = owner;
836 else
837 journal_uid = 0;
838
839 write_to_journal(s, journal_uid, iovec, n, priority);
840 }
841
842 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
843 char mid[11 + 32 + 1];
844 char buffer[16 + LINE_MAX + 1];
845 struct iovec iovec[N_IOVEC_META_FIELDS + 6];
846 int n = 0;
847 va_list ap;
848 struct ucred ucred = {};
849
850 assert(s);
851 assert(format);
852
853 IOVEC_SET_STRING(iovec[n++], "SYSLOG_FACILITY=3");
854 IOVEC_SET_STRING(iovec[n++], "SYSLOG_IDENTIFIER=systemd-journald");
855
856 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
857 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
858
859 memcpy(buffer, "MESSAGE=", 8);
860 va_start(ap, format);
861 vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
862 va_end(ap);
863 IOVEC_SET_STRING(iovec[n++], buffer);
864
865 if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
866 snprintf(mid, sizeof(mid), LOG_MESSAGE_ID(message_id));
867 IOVEC_SET_STRING(iovec[n++], mid);
868 }
869
870 ucred.pid = getpid();
871 ucred.uid = getuid();
872 ucred.gid = getgid();
873
874 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
875 }
876
877 void server_dispatch_message(
878 Server *s,
879 struct iovec *iovec, unsigned n, unsigned m,
880 const struct ucred *ucred,
881 const struct timeval *tv,
882 const char *label, size_t label_len,
883 const char *unit_id,
884 int priority,
885 pid_t object_pid) {
886
887 int rl, r;
888 _cleanup_free_ char *path = NULL;
889 uint64_t available = 0;
890 char *c;
891
892 assert(s);
893 assert(iovec || n == 0);
894
895 if (n == 0)
896 return;
897
898 if (LOG_PRI(priority) > s->max_level_store)
899 return;
900
901 /* Stop early in case the information will not be stored
902 * in a journal. */
903 if (s->storage == STORAGE_NONE)
904 return;
905
906 if (!ucred)
907 goto finish;
908
909 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &path);
910 if (r < 0)
911 goto finish;
912
913 /* example: /user/lennart/3/foobar
914 * /system/dbus.service/foobar
915 *
916 * So let's cut of everything past the third /, since that is
917 * where user directories start */
918
919 c = strchr(path, '/');
920 if (c) {
921 c = strchr(c+1, '/');
922 if (c) {
923 c = strchr(c+1, '/');
924 if (c)
925 *c = 0;
926 }
927 }
928
929 (void) determine_space(s, false, false, &available, NULL);
930 rl = journal_rate_limit_test(s->rate_limit, path, priority & LOG_PRIMASK, available);
931 if (rl == 0)
932 return;
933
934 /* Write a suppression message if we suppressed something */
935 if (rl > 1)
936 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
937 "Suppressed %u messages from %s", rl - 1, path);
938
939 finish:
940 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, priority, object_pid);
941 }
942
943
944 static int system_journal_open(Server *s, bool flush_requested) {
945 const char *fn;
946 int r = 0;
947
948 if (!s->system_journal &&
949 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
950 (flush_requested
951 || access("/run/systemd/journal/flushed", F_OK) >= 0)) {
952
953 /* If in auto mode: first try to create the machine
954 * path, but not the prefix.
955 *
956 * If in persistent mode: create /var/log/journal and
957 * the machine path */
958
959 if (s->storage == STORAGE_PERSISTENT)
960 (void) mkdir_p("/var/log/journal/", 0755);
961
962 fn = strjoina("/var/log/journal/", SERVER_MACHINE_ID(s));
963 (void) mkdir(fn, 0755);
964
965 fn = strjoina(fn, "/system.journal");
966 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
967 if (r >= 0) {
968 server_fix_perms(s, s->system_journal, 0);
969 (void) determine_space_for(s, &s->system_metrics, "/var/log/journal/", "System journal", true, true, NULL, NULL);
970 } else if (r < 0) {
971 if (r != -ENOENT && r != -EROFS)
972 log_warning_errno(r, "Failed to open system journal: %m");
973
974 r = 0;
975 }
976 }
977
978 if (!s->runtime_journal &&
979 (s->storage != STORAGE_NONE)) {
980
981 fn = strjoina("/run/log/journal/", SERVER_MACHINE_ID(s), "/system.journal");
982
983 if (s->system_journal) {
984
985 /* Try to open the runtime journal, but only
986 * if it already exists, so that we can flush
987 * it into the system journal */
988
989 r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
990 if (r < 0) {
991 if (r != -ENOENT)
992 log_warning_errno(r, "Failed to open runtime journal: %m");
993
994 r = 0;
995 }
996
997 } else {
998
999 /* OK, we really need the runtime journal, so create
1000 * it if necessary. */
1001
1002 (void) mkdir("/run/log", 0755);
1003 (void) mkdir("/run/log/journal", 0755);
1004 (void) mkdir_parents(fn, 0750);
1005
1006 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
1007 if (r < 0)
1008 return log_error_errno(r, "Failed to open runtime journal: %m");
1009 }
1010
1011 if (s->runtime_journal) {
1012 server_fix_perms(s, s->runtime_journal, 0);
1013 (void) determine_space_for(s, &s->runtime_metrics, "/run/log/journal/", "Runtime journal", true, true, NULL, NULL);
1014 }
1015 }
1016
1017 return r;
1018 }
1019
1020 int server_flush_to_var(Server *s) {
1021 sd_id128_t machine;
1022 sd_journal *j = NULL;
1023 char ts[FORMAT_TIMESPAN_MAX];
1024 usec_t start;
1025 unsigned n = 0;
1026 int r;
1027
1028 assert(s);
1029
1030 if (s->storage != STORAGE_AUTO &&
1031 s->storage != STORAGE_PERSISTENT)
1032 return 0;
1033
1034 if (!s->runtime_journal)
1035 return 0;
1036
1037 (void) system_journal_open(s, true);
1038
1039 if (!s->system_journal)
1040 return 0;
1041
1042 log_debug("Flushing to /var...");
1043
1044 start = now(CLOCK_MONOTONIC);
1045
1046 r = sd_id128_get_machine(&machine);
1047 if (r < 0)
1048 return r;
1049
1050 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
1051 if (r < 0)
1052 return log_error_errno(r, "Failed to read runtime journal: %m");
1053
1054 sd_journal_set_data_threshold(j, 0);
1055
1056 SD_JOURNAL_FOREACH(j) {
1057 Object *o = NULL;
1058 JournalFile *f;
1059
1060 f = j->current_file;
1061 assert(f && f->current_offset > 0);
1062
1063 n++;
1064
1065 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1066 if (r < 0) {
1067 log_error_errno(r, "Can't read entry: %m");
1068 goto finish;
1069 }
1070
1071 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1072 if (r >= 0)
1073 continue;
1074
1075 if (!shall_try_append_again(s->system_journal, r)) {
1076 log_error_errno(r, "Can't write entry: %m");
1077 goto finish;
1078 }
1079
1080 server_rotate(s);
1081 server_vacuum(s, false, false);
1082
1083 if (!s->system_journal) {
1084 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1085 r = -EIO;
1086 goto finish;
1087 }
1088
1089 log_debug("Retrying write.");
1090 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1091 if (r < 0) {
1092 log_error_errno(r, "Can't write entry: %m");
1093 goto finish;
1094 }
1095 }
1096
1097 r = 0;
1098
1099 finish:
1100 journal_file_post_change(s->system_journal);
1101
1102 s->runtime_journal = journal_file_close(s->runtime_journal);
1103
1104 if (r >= 0)
1105 (void) rm_rf("/run/log/journal", REMOVE_ROOT);
1106
1107 sd_journal_close(j);
1108
1109 server_driver_message(s, SD_ID128_NULL, "Time spent on flushing to /var is %s for %u entries.", format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0), n);
1110
1111 return r;
1112 }
1113
1114 int server_process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1115 Server *s = userdata;
1116 struct ucred *ucred = NULL;
1117 struct timeval *tv = NULL;
1118 struct cmsghdr *cmsg;
1119 char *label = NULL;
1120 size_t label_len = 0, m;
1121 struct iovec iovec;
1122 ssize_t n;
1123 int *fds = NULL, v = 0;
1124 unsigned n_fds = 0;
1125
1126 union {
1127 struct cmsghdr cmsghdr;
1128
1129 /* We use NAME_MAX space for the SELinux label
1130 * here. The kernel currently enforces no
1131 * limit, but according to suggestions from
1132 * the SELinux people this will change and it
1133 * will probably be identical to NAME_MAX. For
1134 * now we use that, but this should be updated
1135 * one day when the final limit is known. */
1136 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1137 CMSG_SPACE(sizeof(struct timeval)) +
1138 CMSG_SPACE(sizeof(int)) + /* fd */
1139 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1140 } control = {};
1141
1142 union sockaddr_union sa = {};
1143
1144 struct msghdr msghdr = {
1145 .msg_iov = &iovec,
1146 .msg_iovlen = 1,
1147 .msg_control = &control,
1148 .msg_controllen = sizeof(control),
1149 .msg_name = &sa,
1150 .msg_namelen = sizeof(sa),
1151 };
1152
1153 assert(s);
1154 assert(fd == s->native_fd || fd == s->syslog_fd || fd == s->audit_fd);
1155
1156 if (revents != EPOLLIN) {
1157 log_error("Got invalid event from epoll for datagram fd: %"PRIx32, revents);
1158 return -EIO;
1159 }
1160
1161 /* Try to get the right size, if we can. (Not all
1162 * sockets support SIOCINQ, hence we just try, but
1163 * don't rely on it. */
1164 (void) ioctl(fd, SIOCINQ, &v);
1165
1166 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1167 m = PAGE_ALIGN(MAX3((size_t) v + 1,
1168 (size_t) LINE_MAX,
1169 ALIGN(sizeof(struct nlmsghdr)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH)) + 1);
1170
1171 if (!GREEDY_REALLOC(s->buffer, s->buffer_size, m))
1172 return log_oom();
1173
1174 iovec.iov_base = s->buffer;
1175 iovec.iov_len = s->buffer_size - 1; /* Leave room for trailing NUL we add later */
1176
1177 n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1178 if (n < 0) {
1179 if (errno == EINTR || errno == EAGAIN)
1180 return 0;
1181
1182 return log_error_errno(errno, "recvmsg() failed: %m");
1183 }
1184
1185 CMSG_FOREACH(cmsg, &msghdr) {
1186
1187 if (cmsg->cmsg_level == SOL_SOCKET &&
1188 cmsg->cmsg_type == SCM_CREDENTIALS &&
1189 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1190 ucred = (struct ucred*) CMSG_DATA(cmsg);
1191 else if (cmsg->cmsg_level == SOL_SOCKET &&
1192 cmsg->cmsg_type == SCM_SECURITY) {
1193 label = (char*) CMSG_DATA(cmsg);
1194 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1195 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1196 cmsg->cmsg_type == SO_TIMESTAMP &&
1197 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1198 tv = (struct timeval*) CMSG_DATA(cmsg);
1199 else if (cmsg->cmsg_level == SOL_SOCKET &&
1200 cmsg->cmsg_type == SCM_RIGHTS) {
1201 fds = (int*) CMSG_DATA(cmsg);
1202 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1203 }
1204 }
1205
1206 /* And a trailing NUL, just in case */
1207 s->buffer[n] = 0;
1208
1209 if (fd == s->syslog_fd) {
1210 if (n > 0 && n_fds == 0)
1211 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1212 else if (n_fds > 0)
1213 log_warning("Got file descriptors via syslog socket. Ignoring.");
1214
1215 } else if (fd == s->native_fd) {
1216 if (n > 0 && n_fds == 0)
1217 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1218 else if (n == 0 && n_fds == 1)
1219 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1220 else if (n_fds > 0)
1221 log_warning("Got too many file descriptors via native socket. Ignoring.");
1222
1223 } else {
1224 assert(fd == s->audit_fd);
1225
1226 if (n > 0 && n_fds == 0)
1227 server_process_audit_message(s, s->buffer, n, ucred, &sa, msghdr.msg_namelen);
1228 else if (n_fds > 0)
1229 log_warning("Got file descriptors via audit socket. Ignoring.");
1230 }
1231
1232 close_many(fds, n_fds);
1233 return 0;
1234 }
1235
1236 static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1237 Server *s = userdata;
1238
1239 assert(s);
1240
1241 log_info("Received request to flush runtime journal from PID %"PRIu32, si->ssi_pid);
1242
1243 server_flush_to_var(s);
1244 server_sync(s);
1245 server_vacuum(s, false, false);
1246
1247 (void) touch("/run/systemd/journal/flushed");
1248
1249 return 0;
1250 }
1251
1252 static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1253 Server *s = userdata;
1254
1255 assert(s);
1256
1257 log_info("Received request to rotate journal from PID %"PRIu32, si->ssi_pid);
1258 server_rotate(s);
1259 server_vacuum(s, true, true);
1260
1261 return 0;
1262 }
1263
1264 static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1265 Server *s = userdata;
1266
1267 assert(s);
1268
1269 log_received_signal(LOG_INFO, si);
1270
1271 sd_event_exit(s->event, 0);
1272 return 0;
1273 }
1274
1275 static int setup_signals(Server *s) {
1276 int r;
1277
1278 assert(s);
1279
1280 assert(sigprocmask_many(SIG_SETMASK, NULL, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1) >= 0);
1281
1282 r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
1283 if (r < 0)
1284 return r;
1285
1286 r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s);
1287 if (r < 0)
1288 return r;
1289
1290 r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s);
1291 if (r < 0)
1292 return r;
1293
1294 r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s);
1295 if (r < 0)
1296 return r;
1297
1298 return 0;
1299 }
1300
1301 static int server_parse_proc_cmdline(Server *s) {
1302 _cleanup_free_ char *line = NULL;
1303 const char *p;
1304 int r;
1305
1306 r = proc_cmdline(&line);
1307 if (r < 0) {
1308 log_warning_errno(r, "Failed to read /proc/cmdline, ignoring: %m");
1309 return 0;
1310 }
1311
1312 p = line;
1313 for(;;) {
1314 _cleanup_free_ char *word;
1315
1316 r = extract_first_word(&p, &word, NULL, 0);
1317 if (r < 0)
1318 return log_error_errno(r, "Failed to parse journald syntax \"%s\": %m", line);
1319
1320 if (r == 0)
1321 break;
1322
1323 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1324 r = parse_boolean(word + 35);
1325 if (r < 0)
1326 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1327 else
1328 s->forward_to_syslog = r;
1329 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1330 r = parse_boolean(word + 33);
1331 if (r < 0)
1332 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1333 else
1334 s->forward_to_kmsg = r;
1335 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1336 r = parse_boolean(word + 36);
1337 if (r < 0)
1338 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1339 else
1340 s->forward_to_console = r;
1341 } else if (startswith(word, "systemd.journald.forward_to_wall=")) {
1342 r = parse_boolean(word + 33);
1343 if (r < 0)
1344 log_warning("Failed to parse forward to wall switch %s. Ignoring.", word + 33);
1345 else
1346 s->forward_to_wall = r;
1347 } else if (startswith(word, "systemd.journald"))
1348 log_warning("Invalid systemd.journald parameter. Ignoring.");
1349 }
1350
1351 /* do not warn about state here, since probably systemd already did */
1352 return 0;
1353 }
1354
1355 static int server_parse_config_file(Server *s) {
1356 assert(s);
1357
1358 return config_parse_many("/etc/systemd/journald.conf",
1359 CONF_DIRS_NULSTR("systemd/journald.conf"),
1360 "Journal\0",
1361 config_item_perf_lookup, journald_gperf_lookup,
1362 false, s);
1363 }
1364
1365 static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
1366 Server *s = userdata;
1367
1368 assert(s);
1369
1370 server_sync(s);
1371 return 0;
1372 }
1373
1374 int server_schedule_sync(Server *s, int priority) {
1375 int r;
1376
1377 assert(s);
1378
1379 if (priority <= LOG_CRIT) {
1380 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1381 server_sync(s);
1382 return 0;
1383 }
1384
1385 if (s->sync_scheduled)
1386 return 0;
1387
1388 if (s->sync_interval_usec > 0) {
1389 usec_t when;
1390
1391 r = sd_event_now(s->event, CLOCK_MONOTONIC, &when);
1392 if (r < 0)
1393 return r;
1394
1395 when += s->sync_interval_usec;
1396
1397 if (!s->sync_event_source) {
1398 r = sd_event_add_time(
1399 s->event,
1400 &s->sync_event_source,
1401 CLOCK_MONOTONIC,
1402 when, 0,
1403 server_dispatch_sync, s);
1404 if (r < 0)
1405 return r;
1406
1407 r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
1408 } else {
1409 r = sd_event_source_set_time(s->sync_event_source, when);
1410 if (r < 0)
1411 return r;
1412
1413 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
1414 }
1415 if (r < 0)
1416 return r;
1417
1418 s->sync_scheduled = true;
1419 }
1420
1421 return 0;
1422 }
1423
1424 static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1425 Server *s = userdata;
1426
1427 assert(s);
1428
1429 server_cache_hostname(s);
1430 return 0;
1431 }
1432
1433 static int server_open_hostname(Server *s) {
1434 int r;
1435
1436 assert(s);
1437
1438 s->hostname_fd = open("/proc/sys/kernel/hostname", O_RDONLY|O_CLOEXEC|O_NDELAY|O_NOCTTY);
1439 if (s->hostname_fd < 0)
1440 return log_error_errno(errno, "Failed to open /proc/sys/kernel/hostname: %m");
1441
1442 r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s);
1443 if (r < 0) {
1444 /* kernels prior to 3.2 don't support polling this file. Ignore
1445 * the failure. */
1446 if (r == -EPERM) {
1447 log_warning_errno(r, "Failed to register hostname fd in event loop, ignoring: %m");
1448 s->hostname_fd = safe_close(s->hostname_fd);
1449 return 0;
1450 }
1451
1452 return log_error_errno(r, "Failed to register hostname fd in event loop: %m");
1453 }
1454
1455 r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
1456 if (r < 0)
1457 return log_error_errno(r, "Failed to adjust priority of host name event source: %m");
1458
1459 return 0;
1460 }
1461
1462 static int dispatch_notify_event(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1463 Server *s = userdata;
1464 int r;
1465
1466 assert(s);
1467 assert(s->notify_event_source == es);
1468 assert(s->notify_fd == fd);
1469
1470 if (revents != EPOLLOUT) {
1471 log_error("Invalid events on notify file descriptor.");
1472 return -EINVAL;
1473 }
1474
1475 /* The $NOTIFY_SOCKET is writable again, now send exactly one
1476 * message on it. Either it's the wtachdog event, the initial
1477 * READY=1 event or an stdout stream event. If there's nothing
1478 * to write anymore, turn our event source off. The next time
1479 * there's something to send it will be turned on again. */
1480
1481 if (!s->sent_notify_ready) {
1482 static const char p[] =
1483 "READY=1\n"
1484 "STATUS=Processing requests...";
1485 ssize_t l;
1486
1487 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1488 if (l < 0) {
1489 if (errno == EAGAIN)
1490 return 0;
1491
1492 return log_error_errno(errno, "Failed to send READY=1 notification message: %m");
1493 }
1494
1495 s->sent_notify_ready = true;
1496 log_debug("Sent READY=1 notification.");
1497
1498 } else if (s->send_watchdog) {
1499
1500 static const char p[] =
1501 "WATCHDOG=1";
1502
1503 ssize_t l;
1504
1505 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1506 if (l < 0) {
1507 if (errno == EAGAIN)
1508 return 0;
1509
1510 return log_error_errno(errno, "Failed to send WATCHDOG=1 notification message: %m");
1511 }
1512
1513 s->send_watchdog = false;
1514 log_debug("Sent WATCHDOG=1 notification.");
1515
1516 } else if (s->stdout_streams_notify_queue)
1517 /* Dispatch one stream notification event */
1518 stdout_stream_send_notify(s->stdout_streams_notify_queue);
1519
1520 /* Leave us enabled if there's still more to to do. */
1521 if (s->send_watchdog || s->stdout_streams_notify_queue)
1522 return 0;
1523
1524 /* There was nothing to do anymore, let's turn ourselves off. */
1525 r = sd_event_source_set_enabled(es, SD_EVENT_OFF);
1526 if (r < 0)
1527 return log_error_errno(r, "Failed to turn off notify event source: %m");
1528
1529 return 0;
1530 }
1531
1532 static int dispatch_watchdog(sd_event_source *es, uint64_t usec, void *userdata) {
1533 Server *s = userdata;
1534 int r;
1535
1536 assert(s);
1537
1538 s->send_watchdog = true;
1539
1540 r = sd_event_source_set_enabled(s->notify_event_source, SD_EVENT_ON);
1541 if (r < 0)
1542 log_warning_errno(r, "Failed to turn on notify event source: %m");
1543
1544 r = sd_event_source_set_time(s->watchdog_event_source, usec + s->watchdog_usec / 2);
1545 if (r < 0)
1546 return log_error_errno(r, "Failed to restart watchdog event source: %m");
1547
1548 r = sd_event_source_set_enabled(s->watchdog_event_source, SD_EVENT_ON);
1549 if (r < 0)
1550 return log_error_errno(r, "Failed to enable watchdog event source: %m");
1551
1552 return 0;
1553 }
1554
1555 static int server_connect_notify(Server *s) {
1556 union sockaddr_union sa = {
1557 .un.sun_family = AF_UNIX,
1558 };
1559 const char *e;
1560 int r;
1561
1562 assert(s);
1563 assert(s->notify_fd < 0);
1564 assert(!s->notify_event_source);
1565
1566 /*
1567 So here's the problem: we'd like to send notification
1568 messages to PID 1, but we cannot do that via sd_notify(),
1569 since that's synchronous, and we might end up blocking on
1570 it. Specifically: given that PID 1 might block on
1571 dbus-daemon during IPC, and dbus-daemon is logging to us,
1572 and might hence block on us, we might end up in a deadlock
1573 if we block on sending PID 1 notification messages -- by
1574 generating a full blocking circle. To avoid this, let's
1575 create a non-blocking socket, and connect it to the
1576 notification socket, and then wait for POLLOUT before we
1577 send anything. This should efficiently avoid any deadlocks,
1578 as we'll never block on PID 1, hence PID 1 can safely block
1579 on dbus-daemon which can safely block on us again.
1580
1581 Don't think that this issue is real? It is, see:
1582 https://github.com/systemd/systemd/issues/1505
1583 */
1584
1585 e = getenv("NOTIFY_SOCKET");
1586 if (!e)
1587 return 0;
1588
1589 if ((e[0] != '@' && e[0] != '/') || e[1] == 0) {
1590 log_error("NOTIFY_SOCKET set to an invalid value: %s", e);
1591 return -EINVAL;
1592 }
1593
1594 if (strlen(e) > sizeof(sa.un.sun_path)) {
1595 log_error("NOTIFY_SOCKET path too long: %s", e);
1596 return -EINVAL;
1597 }
1598
1599 s->notify_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
1600 if (s->notify_fd < 0)
1601 return log_error_errno(errno, "Failed to create notify socket: %m");
1602
1603 (void) fd_inc_sndbuf(s->notify_fd, NOTIFY_SNDBUF_SIZE);
1604
1605 strncpy(sa.un.sun_path, e, sizeof(sa.un.sun_path));
1606 if (sa.un.sun_path[0] == '@')
1607 sa.un.sun_path[0] = 0;
1608
1609 r = connect(s->notify_fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(e));
1610 if (r < 0)
1611 return log_error_errno(errno, "Failed to connect to notify socket: %m");
1612
1613 r = sd_event_add_io(s->event, &s->notify_event_source, s->notify_fd, EPOLLOUT, dispatch_notify_event, s);
1614 if (r < 0)
1615 return log_error_errno(r, "Failed to watch notification socket: %m");
1616
1617 if (sd_watchdog_enabled(false, &s->watchdog_usec) > 0) {
1618 s->send_watchdog = true;
1619
1620 r = sd_event_add_time(s->event, &s->watchdog_event_source, CLOCK_MONOTONIC, now(CLOCK_MONOTONIC) + s->watchdog_usec/2, s->watchdog_usec*3/4, dispatch_watchdog, s);
1621 if (r < 0)
1622 return log_error_errno(r, "Failed to add watchdog time event: %m");
1623 }
1624
1625 /* This should fire pretty soon, which we'll use to send the
1626 * READY=1 event. */
1627
1628 return 0;
1629 }
1630
1631 int server_init(Server *s) {
1632 _cleanup_fdset_free_ FDSet *fds = NULL;
1633 int n, r, fd;
1634 bool no_sockets;
1635
1636 assert(s);
1637
1638 zero(*s);
1639 s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->audit_fd = s->hostname_fd = s->notify_fd = -1;
1640 s->compress = true;
1641 s->seal = true;
1642
1643 s->watchdog_usec = USEC_INFINITY;
1644
1645 s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1646 s->sync_scheduled = false;
1647
1648 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1649 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1650
1651 s->forward_to_wall = true;
1652
1653 s->max_file_usec = DEFAULT_MAX_FILE_USEC;
1654
1655 s->max_level_store = LOG_DEBUG;
1656 s->max_level_syslog = LOG_DEBUG;
1657 s->max_level_kmsg = LOG_NOTICE;
1658 s->max_level_console = LOG_INFO;
1659 s->max_level_wall = LOG_EMERG;
1660
1661 journal_reset_metrics(&s->system_metrics);
1662 journal_reset_metrics(&s->runtime_metrics);
1663
1664 server_parse_config_file(s);
1665 server_parse_proc_cmdline(s);
1666
1667 if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1668 log_debug("Setting both rate limit interval and burst from "USEC_FMT",%u to 0,0",
1669 s->rate_limit_interval, s->rate_limit_burst);
1670 s->rate_limit_interval = s->rate_limit_burst = 0;
1671 }
1672
1673 (void) mkdir_p("/run/systemd/journal", 0755);
1674
1675 s->user_journals = ordered_hashmap_new(NULL);
1676 if (!s->user_journals)
1677 return log_oom();
1678
1679 s->mmap = mmap_cache_new();
1680 if (!s->mmap)
1681 return log_oom();
1682
1683 r = sd_event_default(&s->event);
1684 if (r < 0)
1685 return log_error_errno(r, "Failed to create event loop: %m");
1686
1687 n = sd_listen_fds(true);
1688 if (n < 0)
1689 return log_error_errno(n, "Failed to read listening file descriptors from environment: %m");
1690
1691 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1692
1693 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1694
1695 if (s->native_fd >= 0) {
1696 log_error("Too many native sockets passed.");
1697 return -EINVAL;
1698 }
1699
1700 s->native_fd = fd;
1701
1702 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1703
1704 if (s->stdout_fd >= 0) {
1705 log_error("Too many stdout sockets passed.");
1706 return -EINVAL;
1707 }
1708
1709 s->stdout_fd = fd;
1710
1711 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0 ||
1712 sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/dev-log", 0) > 0) {
1713
1714 if (s->syslog_fd >= 0) {
1715 log_error("Too many /dev/log sockets passed.");
1716 return -EINVAL;
1717 }
1718
1719 s->syslog_fd = fd;
1720
1721 } else if (sd_is_socket(fd, AF_NETLINK, SOCK_RAW, -1) > 0) {
1722
1723 if (s->audit_fd >= 0) {
1724 log_error("Too many audit sockets passed.");
1725 return -EINVAL;
1726 }
1727
1728 s->audit_fd = fd;
1729
1730 } else {
1731
1732 if (!fds) {
1733 fds = fdset_new();
1734 if (!fds)
1735 return log_oom();
1736 }
1737
1738 r = fdset_put(fds, fd);
1739 if (r < 0)
1740 return log_oom();
1741 }
1742 }
1743
1744 /* Try to restore streams, but don't bother if this fails */
1745 (void) server_restore_streams(s, fds);
1746
1747 if (fdset_size(fds) > 0) {
1748 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds));
1749 fds = fdset_free(fds);
1750 }
1751
1752 no_sockets = s->native_fd < 0 && s->stdout_fd < 0 && s->syslog_fd < 0 && s->audit_fd < 0;
1753
1754 /* always open stdout, syslog, native, and kmsg sockets */
1755
1756 /* systemd-journald.socket: /run/systemd/journal/stdout */
1757 r = server_open_stdout_socket(s);
1758 if (r < 0)
1759 return r;
1760
1761 /* systemd-journald-dev-log.socket: /run/systemd/journal/dev-log */
1762 r = server_open_syslog_socket(s);
1763 if (r < 0)
1764 return r;
1765
1766 /* systemd-journald.socket: /run/systemd/journal/socket */
1767 r = server_open_native_socket(s);
1768 if (r < 0)
1769 return r;
1770
1771 /* /dev/ksmg */
1772 r = server_open_dev_kmsg(s);
1773 if (r < 0)
1774 return r;
1775
1776 /* Unless we got *some* sockets and not audit, open audit socket */
1777 if (s->audit_fd >= 0 || no_sockets) {
1778 r = server_open_audit(s);
1779 if (r < 0)
1780 return r;
1781 }
1782
1783 r = server_open_kernel_seqnum(s);
1784 if (r < 0)
1785 return r;
1786
1787 r = server_open_hostname(s);
1788 if (r < 0)
1789 return r;
1790
1791 r = setup_signals(s);
1792 if (r < 0)
1793 return r;
1794
1795 s->udev = udev_new();
1796 if (!s->udev)
1797 return -ENOMEM;
1798
1799 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
1800 if (!s->rate_limit)
1801 return -ENOMEM;
1802
1803 r = cg_get_root_path(&s->cgroup_root);
1804 if (r < 0)
1805 return r;
1806
1807 server_cache_hostname(s);
1808 server_cache_boot_id(s);
1809 server_cache_machine_id(s);
1810
1811 (void) server_connect_notify(s);
1812
1813 return system_journal_open(s, false);
1814 }
1815
1816 void server_maybe_append_tags(Server *s) {
1817 #ifdef HAVE_GCRYPT
1818 JournalFile *f;
1819 Iterator i;
1820 usec_t n;
1821
1822 n = now(CLOCK_REALTIME);
1823
1824 if (s->system_journal)
1825 journal_file_maybe_append_tag(s->system_journal, n);
1826
1827 ORDERED_HASHMAP_FOREACH(f, s->user_journals, i)
1828 journal_file_maybe_append_tag(f, n);
1829 #endif
1830 }
1831
1832 void server_done(Server *s) {
1833 JournalFile *f;
1834 assert(s);
1835
1836 while (s->stdout_streams)
1837 stdout_stream_free(s->stdout_streams);
1838
1839 if (s->system_journal)
1840 journal_file_close(s->system_journal);
1841
1842 if (s->runtime_journal)
1843 journal_file_close(s->runtime_journal);
1844
1845 while ((f = ordered_hashmap_steal_first(s->user_journals)))
1846 journal_file_close(f);
1847
1848 ordered_hashmap_free(s->user_journals);
1849
1850 sd_event_source_unref(s->syslog_event_source);
1851 sd_event_source_unref(s->native_event_source);
1852 sd_event_source_unref(s->stdout_event_source);
1853 sd_event_source_unref(s->dev_kmsg_event_source);
1854 sd_event_source_unref(s->audit_event_source);
1855 sd_event_source_unref(s->sync_event_source);
1856 sd_event_source_unref(s->sigusr1_event_source);
1857 sd_event_source_unref(s->sigusr2_event_source);
1858 sd_event_source_unref(s->sigterm_event_source);
1859 sd_event_source_unref(s->sigint_event_source);
1860 sd_event_source_unref(s->hostname_event_source);
1861 sd_event_source_unref(s->notify_event_source);
1862 sd_event_source_unref(s->watchdog_event_source);
1863 sd_event_unref(s->event);
1864
1865 safe_close(s->syslog_fd);
1866 safe_close(s->native_fd);
1867 safe_close(s->stdout_fd);
1868 safe_close(s->dev_kmsg_fd);
1869 safe_close(s->audit_fd);
1870 safe_close(s->hostname_fd);
1871 safe_close(s->notify_fd);
1872
1873 if (s->rate_limit)
1874 journal_rate_limit_free(s->rate_limit);
1875
1876 if (s->kernel_seqnum)
1877 munmap(s->kernel_seqnum, sizeof(uint64_t));
1878
1879 free(s->buffer);
1880 free(s->tty_path);
1881 free(s->cgroup_root);
1882 free(s->hostname_field);
1883
1884 if (s->mmap)
1885 mmap_cache_unref(s->mmap);
1886
1887 udev_unref(s->udev);
1888 }
1889
1890 static const char* const storage_table[_STORAGE_MAX] = {
1891 [STORAGE_AUTO] = "auto",
1892 [STORAGE_VOLATILE] = "volatile",
1893 [STORAGE_PERSISTENT] = "persistent",
1894 [STORAGE_NONE] = "none"
1895 };
1896
1897 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
1898 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
1899
1900 static const char* const split_mode_table[_SPLIT_MAX] = {
1901 [SPLIT_LOGIN] = "login",
1902 [SPLIT_UID] = "uid",
1903 [SPLIT_NONE] = "none",
1904 };
1905
1906 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
1907 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");