]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/journal/journald-server.c
Merge pull request #2369 from zonque/resolved
[thirdparty/systemd.git] / src / journal / journald-server.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2011 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #ifdef HAVE_SELINUX
23 #include <selinux/selinux.h>
24 #endif
25 #include <sys/ioctl.h>
26 #include <sys/mman.h>
27 #include <sys/signalfd.h>
28 #include <sys/statvfs.h>
29 #include <linux/sockios.h>
30
31 #include "libudev.h"
32 #include "sd-daemon.h"
33 #include "sd-journal.h"
34 #include "sd-messages.h"
35
36 #include "acl-util.h"
37 #include "alloc-util.h"
38 #include "audit-util.h"
39 #include "cgroup-util.h"
40 #include "conf-parser.h"
41 #include "dirent-util.h"
42 #include "extract-word.h"
43 #include "fd-util.h"
44 #include "fileio.h"
45 #include "formats-util.h"
46 #include "fs-util.h"
47 #include "hashmap.h"
48 #include "hostname-util.h"
49 #include "io-util.h"
50 #include "journal-authenticate.h"
51 #include "journal-file.h"
52 #include "journal-internal.h"
53 #include "journal-vacuum.h"
54 #include "journald-audit.h"
55 #include "journald-kmsg.h"
56 #include "journald-native.h"
57 #include "journald-rate-limit.h"
58 #include "journald-server.h"
59 #include "journald-stream.h"
60 #include "journald-syslog.h"
61 #include "missing.h"
62 #include "mkdir.h"
63 #include "parse-util.h"
64 #include "proc-cmdline.h"
65 #include "process-util.h"
66 #include "rm-rf.h"
67 #include "selinux-util.h"
68 #include "signal-util.h"
69 #include "socket-util.h"
70 #include "string-table.h"
71 #include "string-util.h"
72 #include "user-util.h"
73
74 #define USER_JOURNALS_MAX 1024
75
76 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
77 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
78 #define DEFAULT_RATE_LIMIT_BURST 1000
79 #define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
80
81 #define RECHECK_SPACE_USEC (30*USEC_PER_SEC)
82
83 #define NOTIFY_SNDBUF_SIZE (8*1024*1024)
84
85 static int determine_space_for(
86 Server *s,
87 JournalMetrics *metrics,
88 const char *path,
89 const char *name,
90 bool verbose,
91 bool patch_min_use,
92 uint64_t *available,
93 uint64_t *limit) {
94
95 uint64_t sum = 0, ss_avail, avail;
96 _cleanup_closedir_ DIR *d = NULL;
97 struct dirent *de;
98 struct statvfs ss;
99 const char *p;
100 usec_t ts;
101
102 assert(s);
103 assert(metrics);
104 assert(path);
105 assert(name);
106
107 ts = now(CLOCK_MONOTONIC);
108
109 if (!verbose && s->cached_space_timestamp + RECHECK_SPACE_USEC > ts) {
110
111 if (available)
112 *available = s->cached_space_available;
113 if (limit)
114 *limit = s->cached_space_limit;
115
116 return 0;
117 }
118
119 p = strjoina(path, SERVER_MACHINE_ID(s));
120 d = opendir(p);
121 if (!d)
122 return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR, errno, "Failed to open %s: %m", p);
123
124 if (fstatvfs(dirfd(d), &ss) < 0)
125 return log_error_errno(errno, "Failed to fstatvfs(%s): %m", p);
126
127 FOREACH_DIRENT_ALL(de, d, break) {
128 struct stat st;
129
130 if (!endswith(de->d_name, ".journal") &&
131 !endswith(de->d_name, ".journal~"))
132 continue;
133
134 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) {
135 log_debug_errno(errno, "Failed to stat %s/%s, ignoring: %m", p, de->d_name);
136 continue;
137 }
138
139 if (!S_ISREG(st.st_mode))
140 continue;
141
142 sum += (uint64_t) st.st_blocks * 512UL;
143 }
144
145 /* If request, then let's bump the min_use limit to the
146 * current usage on disk. We do this when starting up and
147 * first opening the journal files. This way sudden spikes in
148 * disk usage will not cause journald to vacuum files without
149 * bounds. Note that this means that only a restart of
150 * journald will make it reset this value. */
151
152 if (patch_min_use)
153 metrics->min_use = MAX(metrics->min_use, sum);
154
155 ss_avail = ss.f_bsize * ss.f_bavail;
156 avail = LESS_BY(ss_avail, metrics->keep_free);
157
158 s->cached_space_limit = MIN(MAX(sum + avail, metrics->min_use), metrics->max_use);
159 s->cached_space_available = LESS_BY(s->cached_space_limit, sum);
160 s->cached_space_timestamp = ts;
161
162 if (verbose) {
163 char fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
164 fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX], fb6[FORMAT_BYTES_MAX];
165
166 server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
167 "%s (%s) is currently using %s.\n"
168 "Maximum allowed usage is set to %s.\n"
169 "Leaving at least %s free (of currently available %s of space).\n"
170 "Enforced usage limit is thus %s, of which %s are still available.",
171 name, path,
172 format_bytes(fb1, sizeof(fb1), sum),
173 format_bytes(fb2, sizeof(fb2), metrics->max_use),
174 format_bytes(fb3, sizeof(fb3), metrics->keep_free),
175 format_bytes(fb4, sizeof(fb4), ss_avail),
176 format_bytes(fb5, sizeof(fb5), s->cached_space_limit),
177 format_bytes(fb6, sizeof(fb6), s->cached_space_available));
178 }
179
180 if (available)
181 *available = s->cached_space_available;
182 if (limit)
183 *limit = s->cached_space_limit;
184
185 return 1;
186 }
187
188 static int determine_space(Server *s, bool verbose, bool patch_min_use, uint64_t *available, uint64_t *limit) {
189 JournalMetrics *metrics;
190 const char *path, *name;
191
192 assert(s);
193
194 if (s->system_journal) {
195 path = "/var/log/journal/";
196 metrics = &s->system_metrics;
197 name = "System journal";
198 } else {
199 path = "/run/log/journal/";
200 metrics = &s->runtime_metrics;
201 name = "Runtime journal";
202 }
203
204 return determine_space_for(s, metrics, path, name, verbose, patch_min_use, available, limit);
205 }
206
207 static void server_add_acls(JournalFile *f, uid_t uid) {
208 #ifdef HAVE_ACL
209 int r;
210 #endif
211 assert(f);
212
213 #ifdef HAVE_ACL
214 if (uid <= SYSTEM_UID_MAX)
215 return;
216
217 r = add_acls_for_user(f->fd, uid);
218 if (r < 0)
219 log_warning_errno(r, "Failed to set ACL on %s, ignoring: %m", f->path);
220 #endif
221 }
222
223 static JournalFile* find_journal(Server *s, uid_t uid) {
224 _cleanup_free_ char *p = NULL;
225 int r;
226 JournalFile *f;
227 sd_id128_t machine;
228
229 assert(s);
230
231 /* We split up user logs only on /var, not on /run. If the
232 * runtime file is open, we write to it exclusively, in order
233 * to guarantee proper order as soon as we flush /run to
234 * /var and close the runtime file. */
235
236 if (s->runtime_journal)
237 return s->runtime_journal;
238
239 if (uid <= SYSTEM_UID_MAX)
240 return s->system_journal;
241
242 r = sd_id128_get_machine(&machine);
243 if (r < 0)
244 return s->system_journal;
245
246 f = ordered_hashmap_get(s->user_journals, UID_TO_PTR(uid));
247 if (f)
248 return f;
249
250 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-"UID_FMT".journal",
251 SD_ID128_FORMAT_VAL(machine), uid) < 0)
252 return s->system_journal;
253
254 while (ordered_hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
255 /* Too many open? Then let's close one */
256 f = ordered_hashmap_steal_first(s->user_journals);
257 assert(f);
258 journal_file_close(f);
259 }
260
261 r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &f);
262 if (r < 0)
263 return s->system_journal;
264
265 server_add_acls(f, uid);
266
267 r = ordered_hashmap_put(s->user_journals, UID_TO_PTR(uid), f);
268 if (r < 0) {
269 journal_file_close(f);
270 return s->system_journal;
271 }
272
273 return f;
274 }
275
276 static int do_rotate(
277 Server *s,
278 JournalFile **f,
279 const char* name,
280 bool seal,
281 uint32_t uid) {
282
283 int r;
284 assert(s);
285
286 if (!*f)
287 return -EINVAL;
288
289 r = journal_file_rotate(f, s->compress, seal);
290 if (r < 0)
291 if (*f)
292 log_error_errno(r, "Failed to rotate %s: %m", (*f)->path);
293 else
294 log_error_errno(r, "Failed to create new %s journal: %m", name);
295 else
296 server_add_acls(*f, uid);
297
298 return r;
299 }
300
301 void server_rotate(Server *s) {
302 JournalFile *f;
303 void *k;
304 Iterator i;
305 int r;
306
307 log_debug("Rotating...");
308
309 (void) do_rotate(s, &s->runtime_journal, "runtime", false, 0);
310 (void) do_rotate(s, &s->system_journal, "system", s->seal, 0);
311
312 ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
313 r = do_rotate(s, &f, "user", s->seal, PTR_TO_UID(k));
314 if (r >= 0)
315 ordered_hashmap_replace(s->user_journals, k, f);
316 else if (!f)
317 /* Old file has been closed and deallocated */
318 ordered_hashmap_remove(s->user_journals, k);
319 }
320 }
321
322 void server_sync(Server *s) {
323 JournalFile *f;
324 Iterator i;
325 int r;
326
327 if (s->system_journal) {
328 r = journal_file_set_offline(s->system_journal);
329 if (r < 0)
330 log_warning_errno(r, "Failed to sync system journal, ignoring: %m");
331 }
332
333 ORDERED_HASHMAP_FOREACH(f, s->user_journals, i) {
334 r = journal_file_set_offline(f);
335 if (r < 0)
336 log_warning_errno(r, "Failed to sync user journal, ignoring: %m");
337 }
338
339 if (s->sync_event_source) {
340 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
341 if (r < 0)
342 log_error_errno(r, "Failed to disable sync timer source: %m");
343 }
344
345 s->sync_scheduled = false;
346 }
347
348 static void do_vacuum(
349 Server *s,
350 JournalFile *f,
351 JournalMetrics *metrics,
352 const char *path,
353 const char *name,
354 bool verbose,
355 bool patch_min_use) {
356
357 const char *p;
358 uint64_t limit;
359 int r;
360
361 assert(s);
362 assert(metrics);
363 assert(path);
364 assert(name);
365
366 if (!f)
367 return;
368
369 p = strjoina(path, SERVER_MACHINE_ID(s));
370
371 limit = metrics->max_use;
372 (void) determine_space_for(s, metrics, path, name, verbose, patch_min_use, NULL, &limit);
373
374 r = journal_directory_vacuum(p, limit, metrics->n_max_files, s->max_retention_usec, &s->oldest_file_usec, verbose);
375 if (r < 0 && r != -ENOENT)
376 log_warning_errno(r, "Failed to vacuum %s, ignoring: %m", p);
377 }
378
379 int server_vacuum(Server *s, bool verbose, bool patch_min_use) {
380 assert(s);
381
382 log_debug("Vacuuming...");
383
384 s->oldest_file_usec = 0;
385
386 do_vacuum(s, s->system_journal, &s->system_metrics, "/var/log/journal/", "System journal", verbose, patch_min_use);
387 do_vacuum(s, s->runtime_journal, &s->runtime_metrics, "/run/log/journal/", "Runtime journal", verbose, patch_min_use);
388
389 s->cached_space_limit = 0;
390 s->cached_space_available = 0;
391 s->cached_space_timestamp = 0;
392
393 return 0;
394 }
395
396 static void server_cache_machine_id(Server *s) {
397 sd_id128_t id;
398 int r;
399
400 assert(s);
401
402 r = sd_id128_get_machine(&id);
403 if (r < 0)
404 return;
405
406 sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
407 }
408
409 static void server_cache_boot_id(Server *s) {
410 sd_id128_t id;
411 int r;
412
413 assert(s);
414
415 r = sd_id128_get_boot(&id);
416 if (r < 0)
417 return;
418
419 sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
420 }
421
422 static void server_cache_hostname(Server *s) {
423 _cleanup_free_ char *t = NULL;
424 char *x;
425
426 assert(s);
427
428 t = gethostname_malloc();
429 if (!t)
430 return;
431
432 x = strappend("_HOSTNAME=", t);
433 if (!x)
434 return;
435
436 free(s->hostname_field);
437 s->hostname_field = x;
438 }
439
440 static bool shall_try_append_again(JournalFile *f, int r) {
441
442 /* -E2BIG Hit configured limit
443 -EFBIG Hit fs limit
444 -EDQUOT Quota limit hit
445 -ENOSPC Disk full
446 -EIO I/O error of some kind (mmap)
447 -EHOSTDOWN Other machine
448 -EBUSY Unclean shutdown
449 -EPROTONOSUPPORT Unsupported feature
450 -EBADMSG Corrupted
451 -ENODATA Truncated
452 -ESHUTDOWN Already archived
453 -EIDRM Journal file has been deleted */
454
455 if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
456 log_debug("%s: Allocation limit reached, rotating.", f->path);
457 else if (r == -EHOSTDOWN)
458 log_info("%s: Journal file from other machine, rotating.", f->path);
459 else if (r == -EBUSY)
460 log_info("%s: Unclean shutdown, rotating.", f->path);
461 else if (r == -EPROTONOSUPPORT)
462 log_info("%s: Unsupported feature, rotating.", f->path);
463 else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
464 log_warning("%s: Journal file corrupted, rotating.", f->path);
465 else if (r == -EIO)
466 log_warning("%s: IO error, rotating.", f->path);
467 else if (r == -EIDRM)
468 log_warning("%s: Journal file has been deleted, rotating.", f->path);
469 else
470 return false;
471
472 return true;
473 }
474
475 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
476 JournalFile *f;
477 bool vacuumed = false;
478 int r;
479
480 assert(s);
481 assert(iovec);
482 assert(n > 0);
483
484 f = find_journal(s, uid);
485 if (!f)
486 return;
487
488 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
489 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
490 server_rotate(s);
491 server_vacuum(s, false, false);
492 vacuumed = true;
493
494 f = find_journal(s, uid);
495 if (!f)
496 return;
497 }
498
499 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
500 if (r >= 0) {
501 server_schedule_sync(s, priority);
502 return;
503 }
504
505 if (vacuumed || !shall_try_append_again(f, r)) {
506 log_error_errno(r, "Failed to write entry (%d items, %zu bytes), ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
507 return;
508 }
509
510 server_rotate(s);
511 server_vacuum(s, false, false);
512
513 f = find_journal(s, uid);
514 if (!f)
515 return;
516
517 log_debug("Retrying write.");
518 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
519 if (r < 0)
520 log_error_errno(r, "Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
521 else
522 server_schedule_sync(s, priority);
523 }
524
525 static void dispatch_message_real(
526 Server *s,
527 struct iovec *iovec, unsigned n, unsigned m,
528 const struct ucred *ucred,
529 const struct timeval *tv,
530 const char *label, size_t label_len,
531 const char *unit_id,
532 int priority,
533 pid_t object_pid) {
534
535 char pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
536 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
537 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
538 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
539 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
540 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
541 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
542 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
543 uid_t object_uid;
544 gid_t object_gid;
545 char *x;
546 int r;
547 char *t, *c;
548 uid_t realuid = 0, owner = 0, journal_uid;
549 bool owner_valid = false;
550 #ifdef HAVE_AUDIT
551 char audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
552 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
553 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
554 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
555
556 uint32_t audit;
557 uid_t loginuid;
558 #endif
559
560 assert(s);
561 assert(iovec);
562 assert(n > 0);
563 assert(n + N_IOVEC_META_FIELDS + (object_pid ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
564
565 if (ucred) {
566 realuid = ucred->uid;
567
568 sprintf(pid, "_PID="PID_FMT, ucred->pid);
569 IOVEC_SET_STRING(iovec[n++], pid);
570
571 sprintf(uid, "_UID="UID_FMT, ucred->uid);
572 IOVEC_SET_STRING(iovec[n++], uid);
573
574 sprintf(gid, "_GID="GID_FMT, ucred->gid);
575 IOVEC_SET_STRING(iovec[n++], gid);
576
577 r = get_process_comm(ucred->pid, &t);
578 if (r >= 0) {
579 x = strjoina("_COMM=", t);
580 free(t);
581 IOVEC_SET_STRING(iovec[n++], x);
582 }
583
584 r = get_process_exe(ucred->pid, &t);
585 if (r >= 0) {
586 x = strjoina("_EXE=", t);
587 free(t);
588 IOVEC_SET_STRING(iovec[n++], x);
589 }
590
591 r = get_process_cmdline(ucred->pid, 0, false, &t);
592 if (r >= 0) {
593 x = strjoina("_CMDLINE=", t);
594 free(t);
595 IOVEC_SET_STRING(iovec[n++], x);
596 }
597
598 r = get_process_capeff(ucred->pid, &t);
599 if (r >= 0) {
600 x = strjoina("_CAP_EFFECTIVE=", t);
601 free(t);
602 IOVEC_SET_STRING(iovec[n++], x);
603 }
604
605 #ifdef HAVE_AUDIT
606 r = audit_session_from_pid(ucred->pid, &audit);
607 if (r >= 0) {
608 sprintf(audit_session, "_AUDIT_SESSION=%"PRIu32, audit);
609 IOVEC_SET_STRING(iovec[n++], audit_session);
610 }
611
612 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
613 if (r >= 0) {
614 sprintf(audit_loginuid, "_AUDIT_LOGINUID="UID_FMT, loginuid);
615 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
616 }
617 #endif
618
619 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &c);
620 if (r >= 0) {
621 char *session = NULL;
622
623 x = strjoina("_SYSTEMD_CGROUP=", c);
624 IOVEC_SET_STRING(iovec[n++], x);
625
626 r = cg_path_get_session(c, &t);
627 if (r >= 0) {
628 session = strjoina("_SYSTEMD_SESSION=", t);
629 free(t);
630 IOVEC_SET_STRING(iovec[n++], session);
631 }
632
633 if (cg_path_get_owner_uid(c, &owner) >= 0) {
634 owner_valid = true;
635
636 sprintf(owner_uid, "_SYSTEMD_OWNER_UID="UID_FMT, owner);
637 IOVEC_SET_STRING(iovec[n++], owner_uid);
638 }
639
640 if (cg_path_get_unit(c, &t) >= 0) {
641 x = strjoina("_SYSTEMD_UNIT=", t);
642 free(t);
643 IOVEC_SET_STRING(iovec[n++], x);
644 } else if (unit_id && !session) {
645 x = strjoina("_SYSTEMD_UNIT=", unit_id);
646 IOVEC_SET_STRING(iovec[n++], x);
647 }
648
649 if (cg_path_get_user_unit(c, &t) >= 0) {
650 x = strjoina("_SYSTEMD_USER_UNIT=", t);
651 free(t);
652 IOVEC_SET_STRING(iovec[n++], x);
653 } else if (unit_id && session) {
654 x = strjoina("_SYSTEMD_USER_UNIT=", unit_id);
655 IOVEC_SET_STRING(iovec[n++], x);
656 }
657
658 if (cg_path_get_slice(c, &t) >= 0) {
659 x = strjoina("_SYSTEMD_SLICE=", t);
660 free(t);
661 IOVEC_SET_STRING(iovec[n++], x);
662 }
663
664 free(c);
665 } else if (unit_id) {
666 x = strjoina("_SYSTEMD_UNIT=", unit_id);
667 IOVEC_SET_STRING(iovec[n++], x);
668 }
669
670 #ifdef HAVE_SELINUX
671 if (mac_selinux_have()) {
672 if (label) {
673 x = alloca(strlen("_SELINUX_CONTEXT=") + label_len + 1);
674
675 *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
676 IOVEC_SET_STRING(iovec[n++], x);
677 } else {
678 security_context_t con;
679
680 if (getpidcon(ucred->pid, &con) >= 0) {
681 x = strjoina("_SELINUX_CONTEXT=", con);
682
683 freecon(con);
684 IOVEC_SET_STRING(iovec[n++], x);
685 }
686 }
687 }
688 #endif
689 }
690 assert(n <= m);
691
692 if (object_pid) {
693 r = get_process_uid(object_pid, &object_uid);
694 if (r >= 0) {
695 sprintf(o_uid, "OBJECT_UID="UID_FMT, object_uid);
696 IOVEC_SET_STRING(iovec[n++], o_uid);
697 }
698
699 r = get_process_gid(object_pid, &object_gid);
700 if (r >= 0) {
701 sprintf(o_gid, "OBJECT_GID="GID_FMT, object_gid);
702 IOVEC_SET_STRING(iovec[n++], o_gid);
703 }
704
705 r = get_process_comm(object_pid, &t);
706 if (r >= 0) {
707 x = strjoina("OBJECT_COMM=", t);
708 free(t);
709 IOVEC_SET_STRING(iovec[n++], x);
710 }
711
712 r = get_process_exe(object_pid, &t);
713 if (r >= 0) {
714 x = strjoina("OBJECT_EXE=", t);
715 free(t);
716 IOVEC_SET_STRING(iovec[n++], x);
717 }
718
719 r = get_process_cmdline(object_pid, 0, false, &t);
720 if (r >= 0) {
721 x = strjoina("OBJECT_CMDLINE=", t);
722 free(t);
723 IOVEC_SET_STRING(iovec[n++], x);
724 }
725
726 #ifdef HAVE_AUDIT
727 r = audit_session_from_pid(object_pid, &audit);
728 if (r >= 0) {
729 sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%"PRIu32, audit);
730 IOVEC_SET_STRING(iovec[n++], o_audit_session);
731 }
732
733 r = audit_loginuid_from_pid(object_pid, &loginuid);
734 if (r >= 0) {
735 sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID="UID_FMT, loginuid);
736 IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
737 }
738 #endif
739
740 r = cg_pid_get_path_shifted(object_pid, s->cgroup_root, &c);
741 if (r >= 0) {
742 x = strjoina("OBJECT_SYSTEMD_CGROUP=", c);
743 IOVEC_SET_STRING(iovec[n++], x);
744
745 r = cg_path_get_session(c, &t);
746 if (r >= 0) {
747 x = strjoina("OBJECT_SYSTEMD_SESSION=", t);
748 free(t);
749 IOVEC_SET_STRING(iovec[n++], x);
750 }
751
752 if (cg_path_get_owner_uid(c, &owner) >= 0) {
753 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID="UID_FMT, owner);
754 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
755 }
756
757 if (cg_path_get_unit(c, &t) >= 0) {
758 x = strjoina("OBJECT_SYSTEMD_UNIT=", t);
759 free(t);
760 IOVEC_SET_STRING(iovec[n++], x);
761 }
762
763 if (cg_path_get_user_unit(c, &t) >= 0) {
764 x = strjoina("OBJECT_SYSTEMD_USER_UNIT=", t);
765 free(t);
766 IOVEC_SET_STRING(iovec[n++], x);
767 }
768
769 free(c);
770 }
771 }
772 assert(n <= m);
773
774 if (tv) {
775 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv));
776 IOVEC_SET_STRING(iovec[n++], source_time);
777 }
778
779 /* Note that strictly speaking storing the boot id here is
780 * redundant since the entry includes this in-line
781 * anyway. However, we need this indexed, too. */
782 if (!isempty(s->boot_id_field))
783 IOVEC_SET_STRING(iovec[n++], s->boot_id_field);
784
785 if (!isempty(s->machine_id_field))
786 IOVEC_SET_STRING(iovec[n++], s->machine_id_field);
787
788 if (!isempty(s->hostname_field))
789 IOVEC_SET_STRING(iovec[n++], s->hostname_field);
790
791 assert(n <= m);
792
793 if (s->split_mode == SPLIT_UID && realuid > 0)
794 /* Split up strictly by any UID */
795 journal_uid = realuid;
796 else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
797 /* Split up by login UIDs. We do this only if the
798 * realuid is not root, in order not to accidentally
799 * leak privileged information to the user that is
800 * logged by a privileged process that is part of an
801 * unprivileged session. */
802 journal_uid = owner;
803 else
804 journal_uid = 0;
805
806 write_to_journal(s, journal_uid, iovec, n, priority);
807 }
808
809 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
810 char mid[11 + 32 + 1];
811 char buffer[16 + LINE_MAX + 1];
812 struct iovec iovec[N_IOVEC_META_FIELDS + 6];
813 int n = 0;
814 va_list ap;
815 struct ucred ucred = {};
816
817 assert(s);
818 assert(format);
819
820 IOVEC_SET_STRING(iovec[n++], "SYSLOG_FACILITY=3");
821 IOVEC_SET_STRING(iovec[n++], "SYSLOG_IDENTIFIER=systemd-journald");
822
823 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
824 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
825
826 memcpy(buffer, "MESSAGE=", 8);
827 va_start(ap, format);
828 vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
829 va_end(ap);
830 IOVEC_SET_STRING(iovec[n++], buffer);
831
832 if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
833 snprintf(mid, sizeof(mid), LOG_MESSAGE_ID(message_id));
834 IOVEC_SET_STRING(iovec[n++], mid);
835 }
836
837 ucred.pid = getpid();
838 ucred.uid = getuid();
839 ucred.gid = getgid();
840
841 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
842 }
843
844 void server_dispatch_message(
845 Server *s,
846 struct iovec *iovec, unsigned n, unsigned m,
847 const struct ucred *ucred,
848 const struct timeval *tv,
849 const char *label, size_t label_len,
850 const char *unit_id,
851 int priority,
852 pid_t object_pid) {
853
854 int rl, r;
855 _cleanup_free_ char *path = NULL;
856 uint64_t available = 0;
857 char *c;
858
859 assert(s);
860 assert(iovec || n == 0);
861
862 if (n == 0)
863 return;
864
865 if (LOG_PRI(priority) > s->max_level_store)
866 return;
867
868 /* Stop early in case the information will not be stored
869 * in a journal. */
870 if (s->storage == STORAGE_NONE)
871 return;
872
873 if (!ucred)
874 goto finish;
875
876 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &path);
877 if (r < 0)
878 goto finish;
879
880 /* example: /user/lennart/3/foobar
881 * /system/dbus.service/foobar
882 *
883 * So let's cut of everything past the third /, since that is
884 * where user directories start */
885
886 c = strchr(path, '/');
887 if (c) {
888 c = strchr(c+1, '/');
889 if (c) {
890 c = strchr(c+1, '/');
891 if (c)
892 *c = 0;
893 }
894 }
895
896 (void) determine_space(s, false, false, &available, NULL);
897 rl = journal_rate_limit_test(s->rate_limit, path, priority & LOG_PRIMASK, available);
898 if (rl == 0)
899 return;
900
901 /* Write a suppression message if we suppressed something */
902 if (rl > 1)
903 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
904 "Suppressed %u messages from %s", rl - 1, path);
905
906 finish:
907 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, priority, object_pid);
908 }
909
910
911 static int system_journal_open(Server *s, bool flush_requested) {
912 const char *fn;
913 int r = 0;
914
915 if (!s->system_journal &&
916 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
917 (flush_requested
918 || access("/run/systemd/journal/flushed", F_OK) >= 0)) {
919
920 /* If in auto mode: first try to create the machine
921 * path, but not the prefix.
922 *
923 * If in persistent mode: create /var/log/journal and
924 * the machine path */
925
926 if (s->storage == STORAGE_PERSISTENT)
927 (void) mkdir_p("/var/log/journal/", 0755);
928
929 fn = strjoina("/var/log/journal/", SERVER_MACHINE_ID(s));
930 (void) mkdir(fn, 0755);
931
932 fn = strjoina(fn, "/system.journal");
933 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
934 if (r >= 0) {
935 server_add_acls(s->system_journal, 0);
936 (void) determine_space_for(s, &s->system_metrics, "/var/log/journal/", "System journal", true, true, NULL, NULL);
937 } else if (r < 0) {
938 if (r != -ENOENT && r != -EROFS)
939 log_warning_errno(r, "Failed to open system journal: %m");
940
941 r = 0;
942 }
943 }
944
945 if (!s->runtime_journal &&
946 (s->storage != STORAGE_NONE)) {
947
948 fn = strjoina("/run/log/journal/", SERVER_MACHINE_ID(s), "/system.journal");
949
950 if (s->system_journal) {
951
952 /* Try to open the runtime journal, but only
953 * if it already exists, so that we can flush
954 * it into the system journal */
955
956 r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
957 if (r < 0) {
958 if (r != -ENOENT)
959 log_warning_errno(r, "Failed to open runtime journal: %m");
960
961 r = 0;
962 }
963
964 } else {
965
966 /* OK, we really need the runtime journal, so create
967 * it if necessary. */
968
969 (void) mkdir("/run/log", 0755);
970 (void) mkdir("/run/log/journal", 0755);
971 (void) mkdir_parents(fn, 0750);
972
973 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
974 if (r < 0)
975 return log_error_errno(r, "Failed to open runtime journal: %m");
976 }
977
978 if (s->runtime_journal) {
979 server_add_acls(s->runtime_journal, 0);
980 (void) determine_space_for(s, &s->runtime_metrics, "/run/log/journal/", "Runtime journal", true, true, NULL, NULL);
981 }
982 }
983
984 return r;
985 }
986
987 int server_flush_to_var(Server *s) {
988 sd_id128_t machine;
989 sd_journal *j = NULL;
990 char ts[FORMAT_TIMESPAN_MAX];
991 usec_t start;
992 unsigned n = 0;
993 int r;
994
995 assert(s);
996
997 if (s->storage != STORAGE_AUTO &&
998 s->storage != STORAGE_PERSISTENT)
999 return 0;
1000
1001 if (!s->runtime_journal)
1002 return 0;
1003
1004 (void) system_journal_open(s, true);
1005
1006 if (!s->system_journal)
1007 return 0;
1008
1009 log_debug("Flushing to /var...");
1010
1011 start = now(CLOCK_MONOTONIC);
1012
1013 r = sd_id128_get_machine(&machine);
1014 if (r < 0)
1015 return r;
1016
1017 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
1018 if (r < 0)
1019 return log_error_errno(r, "Failed to read runtime journal: %m");
1020
1021 sd_journal_set_data_threshold(j, 0);
1022
1023 SD_JOURNAL_FOREACH(j) {
1024 Object *o = NULL;
1025 JournalFile *f;
1026
1027 f = j->current_file;
1028 assert(f && f->current_offset > 0);
1029
1030 n++;
1031
1032 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1033 if (r < 0) {
1034 log_error_errno(r, "Can't read entry: %m");
1035 goto finish;
1036 }
1037
1038 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1039 if (r >= 0)
1040 continue;
1041
1042 if (!shall_try_append_again(s->system_journal, r)) {
1043 log_error_errno(r, "Can't write entry: %m");
1044 goto finish;
1045 }
1046
1047 server_rotate(s);
1048 server_vacuum(s, false, false);
1049
1050 if (!s->system_journal) {
1051 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1052 r = -EIO;
1053 goto finish;
1054 }
1055
1056 log_debug("Retrying write.");
1057 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1058 if (r < 0) {
1059 log_error_errno(r, "Can't write entry: %m");
1060 goto finish;
1061 }
1062 }
1063
1064 r = 0;
1065
1066 finish:
1067 journal_file_post_change(s->system_journal);
1068
1069 s->runtime_journal = journal_file_close(s->runtime_journal);
1070
1071 if (r >= 0)
1072 (void) rm_rf("/run/log/journal", REMOVE_ROOT);
1073
1074 sd_journal_close(j);
1075
1076 server_driver_message(s, SD_ID128_NULL, "Time spent on flushing to /var is %s for %u entries.", format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0), n);
1077
1078 return r;
1079 }
1080
1081 int server_process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1082 Server *s = userdata;
1083 struct ucred *ucred = NULL;
1084 struct timeval *tv = NULL;
1085 struct cmsghdr *cmsg;
1086 char *label = NULL;
1087 size_t label_len = 0, m;
1088 struct iovec iovec;
1089 ssize_t n;
1090 int *fds = NULL, v = 0;
1091 unsigned n_fds = 0;
1092
1093 union {
1094 struct cmsghdr cmsghdr;
1095
1096 /* We use NAME_MAX space for the SELinux label
1097 * here. The kernel currently enforces no
1098 * limit, but according to suggestions from
1099 * the SELinux people this will change and it
1100 * will probably be identical to NAME_MAX. For
1101 * now we use that, but this should be updated
1102 * one day when the final limit is known. */
1103 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1104 CMSG_SPACE(sizeof(struct timeval)) +
1105 CMSG_SPACE(sizeof(int)) + /* fd */
1106 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1107 } control = {};
1108
1109 union sockaddr_union sa = {};
1110
1111 struct msghdr msghdr = {
1112 .msg_iov = &iovec,
1113 .msg_iovlen = 1,
1114 .msg_control = &control,
1115 .msg_controllen = sizeof(control),
1116 .msg_name = &sa,
1117 .msg_namelen = sizeof(sa),
1118 };
1119
1120 assert(s);
1121 assert(fd == s->native_fd || fd == s->syslog_fd || fd == s->audit_fd);
1122
1123 if (revents != EPOLLIN) {
1124 log_error("Got invalid event from epoll for datagram fd: %"PRIx32, revents);
1125 return -EIO;
1126 }
1127
1128 /* Try to get the right size, if we can. (Not all
1129 * sockets support SIOCINQ, hence we just try, but
1130 * don't rely on it. */
1131 (void) ioctl(fd, SIOCINQ, &v);
1132
1133 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1134 m = PAGE_ALIGN(MAX3((size_t) v + 1,
1135 (size_t) LINE_MAX,
1136 ALIGN(sizeof(struct nlmsghdr)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH)) + 1);
1137
1138 if (!GREEDY_REALLOC(s->buffer, s->buffer_size, m))
1139 return log_oom();
1140
1141 iovec.iov_base = s->buffer;
1142 iovec.iov_len = s->buffer_size - 1; /* Leave room for trailing NUL we add later */
1143
1144 n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1145 if (n < 0) {
1146 if (errno == EINTR || errno == EAGAIN)
1147 return 0;
1148
1149 return log_error_errno(errno, "recvmsg() failed: %m");
1150 }
1151
1152 CMSG_FOREACH(cmsg, &msghdr) {
1153
1154 if (cmsg->cmsg_level == SOL_SOCKET &&
1155 cmsg->cmsg_type == SCM_CREDENTIALS &&
1156 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1157 ucred = (struct ucred*) CMSG_DATA(cmsg);
1158 else if (cmsg->cmsg_level == SOL_SOCKET &&
1159 cmsg->cmsg_type == SCM_SECURITY) {
1160 label = (char*) CMSG_DATA(cmsg);
1161 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1162 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1163 cmsg->cmsg_type == SO_TIMESTAMP &&
1164 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1165 tv = (struct timeval*) CMSG_DATA(cmsg);
1166 else if (cmsg->cmsg_level == SOL_SOCKET &&
1167 cmsg->cmsg_type == SCM_RIGHTS) {
1168 fds = (int*) CMSG_DATA(cmsg);
1169 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1170 }
1171 }
1172
1173 /* And a trailing NUL, just in case */
1174 s->buffer[n] = 0;
1175
1176 if (fd == s->syslog_fd) {
1177 if (n > 0 && n_fds == 0)
1178 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1179 else if (n_fds > 0)
1180 log_warning("Got file descriptors via syslog socket. Ignoring.");
1181
1182 } else if (fd == s->native_fd) {
1183 if (n > 0 && n_fds == 0)
1184 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1185 else if (n == 0 && n_fds == 1)
1186 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1187 else if (n_fds > 0)
1188 log_warning("Got too many file descriptors via native socket. Ignoring.");
1189
1190 } else {
1191 assert(fd == s->audit_fd);
1192
1193 if (n > 0 && n_fds == 0)
1194 server_process_audit_message(s, s->buffer, n, ucred, &sa, msghdr.msg_namelen);
1195 else if (n_fds > 0)
1196 log_warning("Got file descriptors via audit socket. Ignoring.");
1197 }
1198
1199 close_many(fds, n_fds);
1200 return 0;
1201 }
1202
1203 static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1204 Server *s = userdata;
1205 int r;
1206
1207 assert(s);
1208
1209 log_info("Received request to flush runtime journal from PID " PID_FMT, si->ssi_pid);
1210
1211 server_flush_to_var(s);
1212 server_sync(s);
1213 server_vacuum(s, false, false);
1214
1215 r = touch("/run/systemd/journal/flushed");
1216 if (r < 0)
1217 log_warning_errno(r, "Failed to touch /run/systemd/journal/flushed, ignoring: %m");
1218
1219 return 0;
1220 }
1221
1222 static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1223 Server *s = userdata;
1224 int r;
1225
1226 assert(s);
1227
1228 log_info("Received request to rotate journal from PID " PID_FMT, si->ssi_pid);
1229 server_rotate(s);
1230 server_vacuum(s, true, true);
1231
1232 /* Let clients know when the most recent rotation happened. */
1233 r = write_timestamp_file_atomic("/run/systemd/journal/rotated", now(CLOCK_MONOTONIC));
1234 if (r < 0)
1235 log_warning_errno(r, "Failed to write /run/systemd/journal/rotated, ignoring: %m");
1236
1237 return 0;
1238 }
1239
1240 static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1241 Server *s = userdata;
1242
1243 assert(s);
1244
1245 log_received_signal(LOG_INFO, si);
1246
1247 sd_event_exit(s->event, 0);
1248 return 0;
1249 }
1250
1251 static int dispatch_sigrtmin1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1252 Server *s = userdata;
1253 int r;
1254
1255 assert(s);
1256
1257 log_debug("Received request to sync from PID " PID_FMT, si->ssi_pid);
1258
1259 server_sync(s);
1260
1261 /* Let clients know when the most recent sync happened. */
1262 r = write_timestamp_file_atomic("/run/systemd/journal/synced", now(CLOCK_MONOTONIC));
1263 if (r < 0)
1264 log_warning_errno(r, "Failed to write /run/systemd/journal/synced, ignoring: %m");
1265
1266 return 0;
1267 }
1268
1269 static int setup_signals(Server *s) {
1270 int r;
1271
1272 assert(s);
1273
1274 assert(sigprocmask_many(SIG_SETMASK, NULL, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, SIGRTMIN+1, -1) >= 0);
1275
1276 r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
1277 if (r < 0)
1278 return r;
1279
1280 r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s);
1281 if (r < 0)
1282 return r;
1283
1284 r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s);
1285 if (r < 0)
1286 return r;
1287
1288 /* Let's process SIGTERM late, so that we flush all queued
1289 * messages to disk before we exit */
1290 r = sd_event_source_set_priority(s->sigterm_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1291 if (r < 0)
1292 return r;
1293
1294 /* When journald is invoked on the terminal (when debugging),
1295 * it's useful if C-c is handled equivalent to SIGTERM. */
1296 r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s);
1297 if (r < 0)
1298 return r;
1299
1300 r = sd_event_source_set_priority(s->sigint_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1301 if (r < 0)
1302 return r;
1303
1304 /* SIGRTMIN+1 causes an immediate sync. We process this very
1305 * late, so that everything else queued at this point is
1306 * really written to disk. Clients can watch
1307 * /run/systemd/journal/synced with inotify until its mtime
1308 * changes to see when a sync happened. */
1309 r = sd_event_add_signal(s->event, &s->sigrtmin1_event_source, SIGRTMIN+1, dispatch_sigrtmin1, s);
1310 if (r < 0)
1311 return r;
1312
1313 r = sd_event_source_set_priority(s->sigrtmin1_event_source, SD_EVENT_PRIORITY_NORMAL+15);
1314 if (r < 0)
1315 return r;
1316
1317 return 0;
1318 }
1319
1320 static int server_parse_proc_cmdline(Server *s) {
1321 _cleanup_free_ char *line = NULL;
1322 const char *p;
1323 int r;
1324
1325 r = proc_cmdline(&line);
1326 if (r < 0) {
1327 log_warning_errno(r, "Failed to read /proc/cmdline, ignoring: %m");
1328 return 0;
1329 }
1330
1331 p = line;
1332 for(;;) {
1333 _cleanup_free_ char *word = NULL;
1334
1335 r = extract_first_word(&p, &word, NULL, 0);
1336 if (r < 0)
1337 return log_error_errno(r, "Failed to parse journald syntax \"%s\": %m", line);
1338
1339 if (r == 0)
1340 break;
1341
1342 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1343 r = parse_boolean(word + 35);
1344 if (r < 0)
1345 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1346 else
1347 s->forward_to_syslog = r;
1348 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1349 r = parse_boolean(word + 33);
1350 if (r < 0)
1351 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1352 else
1353 s->forward_to_kmsg = r;
1354 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1355 r = parse_boolean(word + 36);
1356 if (r < 0)
1357 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1358 else
1359 s->forward_to_console = r;
1360 } else if (startswith(word, "systemd.journald.forward_to_wall=")) {
1361 r = parse_boolean(word + 33);
1362 if (r < 0)
1363 log_warning("Failed to parse forward to wall switch %s. Ignoring.", word + 33);
1364 else
1365 s->forward_to_wall = r;
1366 } else if (startswith(word, "systemd.journald"))
1367 log_warning("Invalid systemd.journald parameter. Ignoring.");
1368 }
1369
1370 /* do not warn about state here, since probably systemd already did */
1371 return 0;
1372 }
1373
1374 static int server_parse_config_file(Server *s) {
1375 assert(s);
1376
1377 return config_parse_many(PKGSYSCONFDIR "/journald.conf",
1378 CONF_PATHS_NULSTR("systemd/journald.conf.d"),
1379 "Journal\0",
1380 config_item_perf_lookup, journald_gperf_lookup,
1381 false, s);
1382 }
1383
1384 static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
1385 Server *s = userdata;
1386
1387 assert(s);
1388
1389 server_sync(s);
1390 return 0;
1391 }
1392
1393 int server_schedule_sync(Server *s, int priority) {
1394 int r;
1395
1396 assert(s);
1397
1398 if (priority <= LOG_CRIT) {
1399 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1400 server_sync(s);
1401 return 0;
1402 }
1403
1404 if (s->sync_scheduled)
1405 return 0;
1406
1407 if (s->sync_interval_usec > 0) {
1408 usec_t when;
1409
1410 r = sd_event_now(s->event, CLOCK_MONOTONIC, &when);
1411 if (r < 0)
1412 return r;
1413
1414 when += s->sync_interval_usec;
1415
1416 if (!s->sync_event_source) {
1417 r = sd_event_add_time(
1418 s->event,
1419 &s->sync_event_source,
1420 CLOCK_MONOTONIC,
1421 when, 0,
1422 server_dispatch_sync, s);
1423 if (r < 0)
1424 return r;
1425
1426 r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
1427 } else {
1428 r = sd_event_source_set_time(s->sync_event_source, when);
1429 if (r < 0)
1430 return r;
1431
1432 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
1433 }
1434 if (r < 0)
1435 return r;
1436
1437 s->sync_scheduled = true;
1438 }
1439
1440 return 0;
1441 }
1442
1443 static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1444 Server *s = userdata;
1445
1446 assert(s);
1447
1448 server_cache_hostname(s);
1449 return 0;
1450 }
1451
1452 static int server_open_hostname(Server *s) {
1453 int r;
1454
1455 assert(s);
1456
1457 s->hostname_fd = open("/proc/sys/kernel/hostname", O_RDONLY|O_CLOEXEC|O_NDELAY|O_NOCTTY);
1458 if (s->hostname_fd < 0)
1459 return log_error_errno(errno, "Failed to open /proc/sys/kernel/hostname: %m");
1460
1461 r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s);
1462 if (r < 0) {
1463 /* kernels prior to 3.2 don't support polling this file. Ignore
1464 * the failure. */
1465 if (r == -EPERM) {
1466 log_warning_errno(r, "Failed to register hostname fd in event loop, ignoring: %m");
1467 s->hostname_fd = safe_close(s->hostname_fd);
1468 return 0;
1469 }
1470
1471 return log_error_errno(r, "Failed to register hostname fd in event loop: %m");
1472 }
1473
1474 r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
1475 if (r < 0)
1476 return log_error_errno(r, "Failed to adjust priority of host name event source: %m");
1477
1478 return 0;
1479 }
1480
1481 static int dispatch_notify_event(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1482 Server *s = userdata;
1483 int r;
1484
1485 assert(s);
1486 assert(s->notify_event_source == es);
1487 assert(s->notify_fd == fd);
1488
1489 /* The $NOTIFY_SOCKET is writable again, now send exactly one
1490 * message on it. Either it's the wtachdog event, the initial
1491 * READY=1 event or an stdout stream event. If there's nothing
1492 * to write anymore, turn our event source off. The next time
1493 * there's something to send it will be turned on again. */
1494
1495 if (!s->sent_notify_ready) {
1496 static const char p[] =
1497 "READY=1\n"
1498 "STATUS=Processing requests...";
1499 ssize_t l;
1500
1501 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1502 if (l < 0) {
1503 if (errno == EAGAIN)
1504 return 0;
1505
1506 return log_error_errno(errno, "Failed to send READY=1 notification message: %m");
1507 }
1508
1509 s->sent_notify_ready = true;
1510 log_debug("Sent READY=1 notification.");
1511
1512 } else if (s->send_watchdog) {
1513
1514 static const char p[] =
1515 "WATCHDOG=1";
1516
1517 ssize_t l;
1518
1519 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1520 if (l < 0) {
1521 if (errno == EAGAIN)
1522 return 0;
1523
1524 return log_error_errno(errno, "Failed to send WATCHDOG=1 notification message: %m");
1525 }
1526
1527 s->send_watchdog = false;
1528 log_debug("Sent WATCHDOG=1 notification.");
1529
1530 } else if (s->stdout_streams_notify_queue)
1531 /* Dispatch one stream notification event */
1532 stdout_stream_send_notify(s->stdout_streams_notify_queue);
1533
1534 /* Leave us enabled if there's still more to to do. */
1535 if (s->send_watchdog || s->stdout_streams_notify_queue)
1536 return 0;
1537
1538 /* There was nothing to do anymore, let's turn ourselves off. */
1539 r = sd_event_source_set_enabled(es, SD_EVENT_OFF);
1540 if (r < 0)
1541 return log_error_errno(r, "Failed to turn off notify event source: %m");
1542
1543 return 0;
1544 }
1545
1546 static int dispatch_watchdog(sd_event_source *es, uint64_t usec, void *userdata) {
1547 Server *s = userdata;
1548 int r;
1549
1550 assert(s);
1551
1552 s->send_watchdog = true;
1553
1554 r = sd_event_source_set_enabled(s->notify_event_source, SD_EVENT_ON);
1555 if (r < 0)
1556 log_warning_errno(r, "Failed to turn on notify event source: %m");
1557
1558 r = sd_event_source_set_time(s->watchdog_event_source, usec + s->watchdog_usec / 2);
1559 if (r < 0)
1560 return log_error_errno(r, "Failed to restart watchdog event source: %m");
1561
1562 r = sd_event_source_set_enabled(s->watchdog_event_source, SD_EVENT_ON);
1563 if (r < 0)
1564 return log_error_errno(r, "Failed to enable watchdog event source: %m");
1565
1566 return 0;
1567 }
1568
1569 static int server_connect_notify(Server *s) {
1570 union sockaddr_union sa = {
1571 .un.sun_family = AF_UNIX,
1572 };
1573 const char *e;
1574 int r;
1575
1576 assert(s);
1577 assert(s->notify_fd < 0);
1578 assert(!s->notify_event_source);
1579
1580 /*
1581 So here's the problem: we'd like to send notification
1582 messages to PID 1, but we cannot do that via sd_notify(),
1583 since that's synchronous, and we might end up blocking on
1584 it. Specifically: given that PID 1 might block on
1585 dbus-daemon during IPC, and dbus-daemon is logging to us,
1586 and might hence block on us, we might end up in a deadlock
1587 if we block on sending PID 1 notification messages -- by
1588 generating a full blocking circle. To avoid this, let's
1589 create a non-blocking socket, and connect it to the
1590 notification socket, and then wait for POLLOUT before we
1591 send anything. This should efficiently avoid any deadlocks,
1592 as we'll never block on PID 1, hence PID 1 can safely block
1593 on dbus-daemon which can safely block on us again.
1594
1595 Don't think that this issue is real? It is, see:
1596 https://github.com/systemd/systemd/issues/1505
1597 */
1598
1599 e = getenv("NOTIFY_SOCKET");
1600 if (!e)
1601 return 0;
1602
1603 if ((e[0] != '@' && e[0] != '/') || e[1] == 0) {
1604 log_error("NOTIFY_SOCKET set to an invalid value: %s", e);
1605 return -EINVAL;
1606 }
1607
1608 if (strlen(e) > sizeof(sa.un.sun_path)) {
1609 log_error("NOTIFY_SOCKET path too long: %s", e);
1610 return -EINVAL;
1611 }
1612
1613 s->notify_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
1614 if (s->notify_fd < 0)
1615 return log_error_errno(errno, "Failed to create notify socket: %m");
1616
1617 (void) fd_inc_sndbuf(s->notify_fd, NOTIFY_SNDBUF_SIZE);
1618
1619 strncpy(sa.un.sun_path, e, sizeof(sa.un.sun_path));
1620 if (sa.un.sun_path[0] == '@')
1621 sa.un.sun_path[0] = 0;
1622
1623 r = connect(s->notify_fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(e));
1624 if (r < 0)
1625 return log_error_errno(errno, "Failed to connect to notify socket: %m");
1626
1627 r = sd_event_add_io(s->event, &s->notify_event_source, s->notify_fd, EPOLLOUT, dispatch_notify_event, s);
1628 if (r < 0)
1629 return log_error_errno(r, "Failed to watch notification socket: %m");
1630
1631 if (sd_watchdog_enabled(false, &s->watchdog_usec) > 0) {
1632 s->send_watchdog = true;
1633
1634 r = sd_event_add_time(s->event, &s->watchdog_event_source, CLOCK_MONOTONIC, now(CLOCK_MONOTONIC) + s->watchdog_usec/2, s->watchdog_usec/4, dispatch_watchdog, s);
1635 if (r < 0)
1636 return log_error_errno(r, "Failed to add watchdog time event: %m");
1637 }
1638
1639 /* This should fire pretty soon, which we'll use to send the
1640 * READY=1 event. */
1641
1642 return 0;
1643 }
1644
1645 int server_init(Server *s) {
1646 _cleanup_fdset_free_ FDSet *fds = NULL;
1647 int n, r, fd;
1648 bool no_sockets;
1649
1650 assert(s);
1651
1652 zero(*s);
1653 s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->audit_fd = s->hostname_fd = s->notify_fd = -1;
1654 s->compress = true;
1655 s->seal = true;
1656
1657 s->watchdog_usec = USEC_INFINITY;
1658
1659 s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1660 s->sync_scheduled = false;
1661
1662 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1663 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1664
1665 s->forward_to_wall = true;
1666
1667 s->max_file_usec = DEFAULT_MAX_FILE_USEC;
1668
1669 s->max_level_store = LOG_DEBUG;
1670 s->max_level_syslog = LOG_DEBUG;
1671 s->max_level_kmsg = LOG_NOTICE;
1672 s->max_level_console = LOG_INFO;
1673 s->max_level_wall = LOG_EMERG;
1674
1675 journal_reset_metrics(&s->system_metrics);
1676 journal_reset_metrics(&s->runtime_metrics);
1677
1678 server_parse_config_file(s);
1679 server_parse_proc_cmdline(s);
1680
1681 if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1682 log_debug("Setting both rate limit interval and burst from "USEC_FMT",%u to 0,0",
1683 s->rate_limit_interval, s->rate_limit_burst);
1684 s->rate_limit_interval = s->rate_limit_burst = 0;
1685 }
1686
1687 (void) mkdir_p("/run/systemd/journal", 0755);
1688
1689 s->user_journals = ordered_hashmap_new(NULL);
1690 if (!s->user_journals)
1691 return log_oom();
1692
1693 s->mmap = mmap_cache_new();
1694 if (!s->mmap)
1695 return log_oom();
1696
1697 r = sd_event_default(&s->event);
1698 if (r < 0)
1699 return log_error_errno(r, "Failed to create event loop: %m");
1700
1701 n = sd_listen_fds(true);
1702 if (n < 0)
1703 return log_error_errno(n, "Failed to read listening file descriptors from environment: %m");
1704
1705 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1706
1707 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1708
1709 if (s->native_fd >= 0) {
1710 log_error("Too many native sockets passed.");
1711 return -EINVAL;
1712 }
1713
1714 s->native_fd = fd;
1715
1716 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1717
1718 if (s->stdout_fd >= 0) {
1719 log_error("Too many stdout sockets passed.");
1720 return -EINVAL;
1721 }
1722
1723 s->stdout_fd = fd;
1724
1725 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0 ||
1726 sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/dev-log", 0) > 0) {
1727
1728 if (s->syslog_fd >= 0) {
1729 log_error("Too many /dev/log sockets passed.");
1730 return -EINVAL;
1731 }
1732
1733 s->syslog_fd = fd;
1734
1735 } else if (sd_is_socket(fd, AF_NETLINK, SOCK_RAW, -1) > 0) {
1736
1737 if (s->audit_fd >= 0) {
1738 log_error("Too many audit sockets passed.");
1739 return -EINVAL;
1740 }
1741
1742 s->audit_fd = fd;
1743
1744 } else {
1745
1746 if (!fds) {
1747 fds = fdset_new();
1748 if (!fds)
1749 return log_oom();
1750 }
1751
1752 r = fdset_put(fds, fd);
1753 if (r < 0)
1754 return log_oom();
1755 }
1756 }
1757
1758 /* Try to restore streams, but don't bother if this fails */
1759 (void) server_restore_streams(s, fds);
1760
1761 if (fdset_size(fds) > 0) {
1762 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds));
1763 fds = fdset_free(fds);
1764 }
1765
1766 no_sockets = s->native_fd < 0 && s->stdout_fd < 0 && s->syslog_fd < 0 && s->audit_fd < 0;
1767
1768 /* always open stdout, syslog, native, and kmsg sockets */
1769
1770 /* systemd-journald.socket: /run/systemd/journal/stdout */
1771 r = server_open_stdout_socket(s);
1772 if (r < 0)
1773 return r;
1774
1775 /* systemd-journald-dev-log.socket: /run/systemd/journal/dev-log */
1776 r = server_open_syslog_socket(s);
1777 if (r < 0)
1778 return r;
1779
1780 /* systemd-journald.socket: /run/systemd/journal/socket */
1781 r = server_open_native_socket(s);
1782 if (r < 0)
1783 return r;
1784
1785 /* /dev/ksmg */
1786 r = server_open_dev_kmsg(s);
1787 if (r < 0)
1788 return r;
1789
1790 /* Unless we got *some* sockets and not audit, open audit socket */
1791 if (s->audit_fd >= 0 || no_sockets) {
1792 r = server_open_audit(s);
1793 if (r < 0)
1794 return r;
1795 }
1796
1797 r = server_open_kernel_seqnum(s);
1798 if (r < 0)
1799 return r;
1800
1801 r = server_open_hostname(s);
1802 if (r < 0)
1803 return r;
1804
1805 r = setup_signals(s);
1806 if (r < 0)
1807 return r;
1808
1809 s->udev = udev_new();
1810 if (!s->udev)
1811 return -ENOMEM;
1812
1813 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
1814 if (!s->rate_limit)
1815 return -ENOMEM;
1816
1817 r = cg_get_root_path(&s->cgroup_root);
1818 if (r < 0)
1819 return r;
1820
1821 server_cache_hostname(s);
1822 server_cache_boot_id(s);
1823 server_cache_machine_id(s);
1824
1825 (void) server_connect_notify(s);
1826
1827 return system_journal_open(s, false);
1828 }
1829
1830 void server_maybe_append_tags(Server *s) {
1831 #ifdef HAVE_GCRYPT
1832 JournalFile *f;
1833 Iterator i;
1834 usec_t n;
1835
1836 n = now(CLOCK_REALTIME);
1837
1838 if (s->system_journal)
1839 journal_file_maybe_append_tag(s->system_journal, n);
1840
1841 ORDERED_HASHMAP_FOREACH(f, s->user_journals, i)
1842 journal_file_maybe_append_tag(f, n);
1843 #endif
1844 }
1845
1846 void server_done(Server *s) {
1847 JournalFile *f;
1848 assert(s);
1849
1850 while (s->stdout_streams)
1851 stdout_stream_free(s->stdout_streams);
1852
1853 if (s->system_journal)
1854 journal_file_close(s->system_journal);
1855
1856 if (s->runtime_journal)
1857 journal_file_close(s->runtime_journal);
1858
1859 while ((f = ordered_hashmap_steal_first(s->user_journals)))
1860 journal_file_close(f);
1861
1862 ordered_hashmap_free(s->user_journals);
1863
1864 sd_event_source_unref(s->syslog_event_source);
1865 sd_event_source_unref(s->native_event_source);
1866 sd_event_source_unref(s->stdout_event_source);
1867 sd_event_source_unref(s->dev_kmsg_event_source);
1868 sd_event_source_unref(s->audit_event_source);
1869 sd_event_source_unref(s->sync_event_source);
1870 sd_event_source_unref(s->sigusr1_event_source);
1871 sd_event_source_unref(s->sigusr2_event_source);
1872 sd_event_source_unref(s->sigterm_event_source);
1873 sd_event_source_unref(s->sigint_event_source);
1874 sd_event_source_unref(s->sigrtmin1_event_source);
1875 sd_event_source_unref(s->hostname_event_source);
1876 sd_event_source_unref(s->notify_event_source);
1877 sd_event_source_unref(s->watchdog_event_source);
1878 sd_event_unref(s->event);
1879
1880 safe_close(s->syslog_fd);
1881 safe_close(s->native_fd);
1882 safe_close(s->stdout_fd);
1883 safe_close(s->dev_kmsg_fd);
1884 safe_close(s->audit_fd);
1885 safe_close(s->hostname_fd);
1886 safe_close(s->notify_fd);
1887
1888 if (s->rate_limit)
1889 journal_rate_limit_free(s->rate_limit);
1890
1891 if (s->kernel_seqnum)
1892 munmap(s->kernel_seqnum, sizeof(uint64_t));
1893
1894 free(s->buffer);
1895 free(s->tty_path);
1896 free(s->cgroup_root);
1897 free(s->hostname_field);
1898
1899 if (s->mmap)
1900 mmap_cache_unref(s->mmap);
1901
1902 udev_unref(s->udev);
1903 }
1904
1905 static const char* const storage_table[_STORAGE_MAX] = {
1906 [STORAGE_AUTO] = "auto",
1907 [STORAGE_VOLATILE] = "volatile",
1908 [STORAGE_PERSISTENT] = "persistent",
1909 [STORAGE_NONE] = "none"
1910 };
1911
1912 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
1913 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
1914
1915 static const char* const split_mode_table[_SPLIT_MAX] = {
1916 [SPLIT_LOGIN] = "login",
1917 [SPLIT_UID] = "uid",
1918 [SPLIT_NONE] = "none",
1919 };
1920
1921 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
1922 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");