]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/journal/journald-server.c
Merge pull request #2318 from vcaputo/coalesce-ftruncates-redux
[thirdparty/systemd.git] / src / journal / journald-server.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2011 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #ifdef HAVE_SELINUX
23 #include <selinux/selinux.h>
24 #endif
25 #include <sys/ioctl.h>
26 #include <sys/mman.h>
27 #include <sys/signalfd.h>
28 #include <sys/statvfs.h>
29 #include <linux/sockios.h>
30
31 #include "libudev.h"
32 #include "sd-daemon.h"
33 #include "sd-journal.h"
34 #include "sd-messages.h"
35
36 #include "acl-util.h"
37 #include "alloc-util.h"
38 #include "audit-util.h"
39 #include "cgroup-util.h"
40 #include "conf-parser.h"
41 #include "dirent-util.h"
42 #include "extract-word.h"
43 #include "fd-util.h"
44 #include "fileio.h"
45 #include "formats-util.h"
46 #include "fs-util.h"
47 #include "hashmap.h"
48 #include "hostname-util.h"
49 #include "io-util.h"
50 #include "journal-authenticate.h"
51 #include "journal-file.h"
52 #include "journal-internal.h"
53 #include "journal-vacuum.h"
54 #include "journald-audit.h"
55 #include "journald-kmsg.h"
56 #include "journald-native.h"
57 #include "journald-rate-limit.h"
58 #include "journald-server.h"
59 #include "journald-stream.h"
60 #include "journald-syslog.h"
61 #include "missing.h"
62 #include "mkdir.h"
63 #include "parse-util.h"
64 #include "proc-cmdline.h"
65 #include "process-util.h"
66 #include "rm-rf.h"
67 #include "selinux-util.h"
68 #include "signal-util.h"
69 #include "socket-util.h"
70 #include "string-table.h"
71 #include "string-util.h"
72 #include "user-util.h"
73
74 #define USER_JOURNALS_MAX 1024
75
76 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
77 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
78 #define DEFAULT_RATE_LIMIT_BURST 1000
79 #define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
80
81 #define RECHECK_SPACE_USEC (30*USEC_PER_SEC)
82
83 #define NOTIFY_SNDBUF_SIZE (8*1024*1024)
84
85 /* The period to insert between posting changes for coalescing */
86 #define POST_CHANGE_TIMER_INTERVAL_USEC (250*USEC_PER_MSEC)
87
88 static int determine_space_for(
89 Server *s,
90 JournalMetrics *metrics,
91 const char *path,
92 const char *name,
93 bool verbose,
94 bool patch_min_use,
95 uint64_t *available,
96 uint64_t *limit) {
97
98 uint64_t sum = 0, ss_avail, avail;
99 _cleanup_closedir_ DIR *d = NULL;
100 struct dirent *de;
101 struct statvfs ss;
102 const char *p;
103 usec_t ts;
104
105 assert(s);
106 assert(metrics);
107 assert(path);
108 assert(name);
109
110 ts = now(CLOCK_MONOTONIC);
111
112 if (!verbose && s->cached_space_timestamp + RECHECK_SPACE_USEC > ts) {
113
114 if (available)
115 *available = s->cached_space_available;
116 if (limit)
117 *limit = s->cached_space_limit;
118
119 return 0;
120 }
121
122 p = strjoina(path, SERVER_MACHINE_ID(s));
123 d = opendir(p);
124 if (!d)
125 return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR, errno, "Failed to open %s: %m", p);
126
127 if (fstatvfs(dirfd(d), &ss) < 0)
128 return log_error_errno(errno, "Failed to fstatvfs(%s): %m", p);
129
130 FOREACH_DIRENT_ALL(de, d, break) {
131 struct stat st;
132
133 if (!endswith(de->d_name, ".journal") &&
134 !endswith(de->d_name, ".journal~"))
135 continue;
136
137 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) {
138 log_debug_errno(errno, "Failed to stat %s/%s, ignoring: %m", p, de->d_name);
139 continue;
140 }
141
142 if (!S_ISREG(st.st_mode))
143 continue;
144
145 sum += (uint64_t) st.st_blocks * 512UL;
146 }
147
148 /* If request, then let's bump the min_use limit to the
149 * current usage on disk. We do this when starting up and
150 * first opening the journal files. This way sudden spikes in
151 * disk usage will not cause journald to vacuum files without
152 * bounds. Note that this means that only a restart of
153 * journald will make it reset this value. */
154
155 if (patch_min_use)
156 metrics->min_use = MAX(metrics->min_use, sum);
157
158 ss_avail = ss.f_bsize * ss.f_bavail;
159 avail = LESS_BY(ss_avail, metrics->keep_free);
160
161 s->cached_space_limit = MIN(MAX(sum + avail, metrics->min_use), metrics->max_use);
162 s->cached_space_available = LESS_BY(s->cached_space_limit, sum);
163 s->cached_space_timestamp = ts;
164
165 if (verbose) {
166 char fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
167 fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX], fb6[FORMAT_BYTES_MAX];
168
169 server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
170 "%s (%s) is currently using %s.\n"
171 "Maximum allowed usage is set to %s.\n"
172 "Leaving at least %s free (of currently available %s of space).\n"
173 "Enforced usage limit is thus %s, of which %s are still available.",
174 name, path,
175 format_bytes(fb1, sizeof(fb1), sum),
176 format_bytes(fb2, sizeof(fb2), metrics->max_use),
177 format_bytes(fb3, sizeof(fb3), metrics->keep_free),
178 format_bytes(fb4, sizeof(fb4), ss_avail),
179 format_bytes(fb5, sizeof(fb5), s->cached_space_limit),
180 format_bytes(fb6, sizeof(fb6), s->cached_space_available));
181 }
182
183 if (available)
184 *available = s->cached_space_available;
185 if (limit)
186 *limit = s->cached_space_limit;
187
188 return 1;
189 }
190
191 static int determine_space(Server *s, bool verbose, bool patch_min_use, uint64_t *available, uint64_t *limit) {
192 JournalMetrics *metrics;
193 const char *path, *name;
194
195 assert(s);
196
197 if (s->system_journal) {
198 path = "/var/log/journal/";
199 metrics = &s->system_metrics;
200 name = "System journal";
201 } else {
202 path = "/run/log/journal/";
203 metrics = &s->runtime_metrics;
204 name = "Runtime journal";
205 }
206
207 return determine_space_for(s, metrics, path, name, verbose, patch_min_use, available, limit);
208 }
209
210 static void server_add_acls(JournalFile *f, uid_t uid) {
211 #ifdef HAVE_ACL
212 int r;
213 #endif
214 assert(f);
215
216 #ifdef HAVE_ACL
217 if (uid <= SYSTEM_UID_MAX)
218 return;
219
220 r = add_acls_for_user(f->fd, uid);
221 if (r < 0)
222 log_warning_errno(r, "Failed to set ACL on %s, ignoring: %m", f->path);
223 #endif
224 }
225
226 static int open_journal(
227 Server *s,
228 bool reliably,
229 const char *fname,
230 int flags,
231 bool seal,
232 JournalMetrics *metrics,
233 JournalFile *template,
234 JournalFile **ret) {
235 int r;
236
237 assert(s);
238 assert(fname);
239 assert(ret);
240
241 if (reliably)
242 r = journal_file_open_reliably(fname, flags, 0640, s->compress, seal, metrics, s->mmap, template, ret);
243 else
244 r = journal_file_open(fname, flags, 0640, s->compress, seal, metrics, s->mmap, template, ret);
245
246 if (r < 0)
247 return r;
248
249 r = journal_file_enable_post_change_timer(*ret, s->event, POST_CHANGE_TIMER_INTERVAL_USEC);
250 if (r < 0) {
251 *ret = journal_file_close(*ret);
252 return r;
253 }
254
255 return r;
256 }
257
258 static JournalFile* find_journal(Server *s, uid_t uid) {
259 _cleanup_free_ char *p = NULL;
260 int r;
261 JournalFile *f;
262 sd_id128_t machine;
263
264 assert(s);
265
266 /* We split up user logs only on /var, not on /run. If the
267 * runtime file is open, we write to it exclusively, in order
268 * to guarantee proper order as soon as we flush /run to
269 * /var and close the runtime file. */
270
271 if (s->runtime_journal)
272 return s->runtime_journal;
273
274 if (uid <= SYSTEM_UID_MAX)
275 return s->system_journal;
276
277 r = sd_id128_get_machine(&machine);
278 if (r < 0)
279 return s->system_journal;
280
281 f = ordered_hashmap_get(s->user_journals, UID_TO_PTR(uid));
282 if (f)
283 return f;
284
285 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-"UID_FMT".journal",
286 SD_ID128_FORMAT_VAL(machine), uid) < 0)
287 return s->system_journal;
288
289 while (ordered_hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
290 /* Too many open? Then let's close one */
291 f = ordered_hashmap_steal_first(s->user_journals);
292 assert(f);
293 journal_file_close(f);
294 }
295
296 r = open_journal(s, true, p, O_RDWR|O_CREAT, s->seal, &s->system_metrics, NULL, &f);
297 if (r < 0)
298 return s->system_journal;
299
300 server_add_acls(f, uid);
301
302 r = ordered_hashmap_put(s->user_journals, UID_TO_PTR(uid), f);
303 if (r < 0) {
304 journal_file_close(f);
305 return s->system_journal;
306 }
307
308 return f;
309 }
310
311 static int do_rotate(
312 Server *s,
313 JournalFile **f,
314 const char* name,
315 bool seal,
316 uint32_t uid) {
317
318 int r;
319 assert(s);
320
321 if (!*f)
322 return -EINVAL;
323
324 r = journal_file_rotate(f, s->compress, seal);
325 if (r < 0)
326 if (*f)
327 log_error_errno(r, "Failed to rotate %s: %m", (*f)->path);
328 else
329 log_error_errno(r, "Failed to create new %s journal: %m", name);
330 else
331 server_add_acls(*f, uid);
332
333 return r;
334 }
335
336 void server_rotate(Server *s) {
337 JournalFile *f;
338 void *k;
339 Iterator i;
340 int r;
341
342 log_debug("Rotating...");
343
344 (void) do_rotate(s, &s->runtime_journal, "runtime", false, 0);
345 (void) do_rotate(s, &s->system_journal, "system", s->seal, 0);
346
347 ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
348 r = do_rotate(s, &f, "user", s->seal, PTR_TO_UID(k));
349 if (r >= 0)
350 ordered_hashmap_replace(s->user_journals, k, f);
351 else if (!f)
352 /* Old file has been closed and deallocated */
353 ordered_hashmap_remove(s->user_journals, k);
354 }
355 }
356
357 void server_sync(Server *s) {
358 JournalFile *f;
359 Iterator i;
360 int r;
361
362 if (s->system_journal) {
363 r = journal_file_set_offline(s->system_journal);
364 if (r < 0)
365 log_warning_errno(r, "Failed to sync system journal, ignoring: %m");
366 }
367
368 ORDERED_HASHMAP_FOREACH(f, s->user_journals, i) {
369 r = journal_file_set_offline(f);
370 if (r < 0)
371 log_warning_errno(r, "Failed to sync user journal, ignoring: %m");
372 }
373
374 if (s->sync_event_source) {
375 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
376 if (r < 0)
377 log_error_errno(r, "Failed to disable sync timer source: %m");
378 }
379
380 s->sync_scheduled = false;
381 }
382
383 static void do_vacuum(
384 Server *s,
385 JournalFile *f,
386 JournalMetrics *metrics,
387 const char *path,
388 const char *name,
389 bool verbose,
390 bool patch_min_use) {
391
392 const char *p;
393 uint64_t limit;
394 int r;
395
396 assert(s);
397 assert(metrics);
398 assert(path);
399 assert(name);
400
401 if (!f)
402 return;
403
404 p = strjoina(path, SERVER_MACHINE_ID(s));
405
406 limit = metrics->max_use;
407 (void) determine_space_for(s, metrics, path, name, verbose, patch_min_use, NULL, &limit);
408
409 r = journal_directory_vacuum(p, limit, metrics->n_max_files, s->max_retention_usec, &s->oldest_file_usec, verbose);
410 if (r < 0 && r != -ENOENT)
411 log_warning_errno(r, "Failed to vacuum %s, ignoring: %m", p);
412 }
413
414 int server_vacuum(Server *s, bool verbose, bool patch_min_use) {
415 assert(s);
416
417 log_debug("Vacuuming...");
418
419 s->oldest_file_usec = 0;
420
421 do_vacuum(s, s->system_journal, &s->system_metrics, "/var/log/journal/", "System journal", verbose, patch_min_use);
422 do_vacuum(s, s->runtime_journal, &s->runtime_metrics, "/run/log/journal/", "Runtime journal", verbose, patch_min_use);
423
424 s->cached_space_limit = 0;
425 s->cached_space_available = 0;
426 s->cached_space_timestamp = 0;
427
428 return 0;
429 }
430
431 static void server_cache_machine_id(Server *s) {
432 sd_id128_t id;
433 int r;
434
435 assert(s);
436
437 r = sd_id128_get_machine(&id);
438 if (r < 0)
439 return;
440
441 sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
442 }
443
444 static void server_cache_boot_id(Server *s) {
445 sd_id128_t id;
446 int r;
447
448 assert(s);
449
450 r = sd_id128_get_boot(&id);
451 if (r < 0)
452 return;
453
454 sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
455 }
456
457 static void server_cache_hostname(Server *s) {
458 _cleanup_free_ char *t = NULL;
459 char *x;
460
461 assert(s);
462
463 t = gethostname_malloc();
464 if (!t)
465 return;
466
467 x = strappend("_HOSTNAME=", t);
468 if (!x)
469 return;
470
471 free(s->hostname_field);
472 s->hostname_field = x;
473 }
474
475 static bool shall_try_append_again(JournalFile *f, int r) {
476
477 /* -E2BIG Hit configured limit
478 -EFBIG Hit fs limit
479 -EDQUOT Quota limit hit
480 -ENOSPC Disk full
481 -EIO I/O error of some kind (mmap)
482 -EHOSTDOWN Other machine
483 -EBUSY Unclean shutdown
484 -EPROTONOSUPPORT Unsupported feature
485 -EBADMSG Corrupted
486 -ENODATA Truncated
487 -ESHUTDOWN Already archived
488 -EIDRM Journal file has been deleted */
489
490 if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
491 log_debug("%s: Allocation limit reached, rotating.", f->path);
492 else if (r == -EHOSTDOWN)
493 log_info("%s: Journal file from other machine, rotating.", f->path);
494 else if (r == -EBUSY)
495 log_info("%s: Unclean shutdown, rotating.", f->path);
496 else if (r == -EPROTONOSUPPORT)
497 log_info("%s: Unsupported feature, rotating.", f->path);
498 else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
499 log_warning("%s: Journal file corrupted, rotating.", f->path);
500 else if (r == -EIO)
501 log_warning("%s: IO error, rotating.", f->path);
502 else if (r == -EIDRM)
503 log_warning("%s: Journal file has been deleted, rotating.", f->path);
504 else
505 return false;
506
507 return true;
508 }
509
510 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
511 JournalFile *f;
512 bool vacuumed = false;
513 int r;
514
515 assert(s);
516 assert(iovec);
517 assert(n > 0);
518
519 f = find_journal(s, uid);
520 if (!f)
521 return;
522
523 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
524 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
525 server_rotate(s);
526 server_vacuum(s, false, false);
527 vacuumed = true;
528
529 f = find_journal(s, uid);
530 if (!f)
531 return;
532 }
533
534 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
535 if (r >= 0) {
536 server_schedule_sync(s, priority);
537 return;
538 }
539
540 if (vacuumed || !shall_try_append_again(f, r)) {
541 log_error_errno(r, "Failed to write entry (%d items, %zu bytes), ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
542 return;
543 }
544
545 server_rotate(s);
546 server_vacuum(s, false, false);
547
548 f = find_journal(s, uid);
549 if (!f)
550 return;
551
552 log_debug("Retrying write.");
553 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
554 if (r < 0)
555 log_error_errno(r, "Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
556 else
557 server_schedule_sync(s, priority);
558 }
559
560 static void dispatch_message_real(
561 Server *s,
562 struct iovec *iovec, unsigned n, unsigned m,
563 const struct ucred *ucred,
564 const struct timeval *tv,
565 const char *label, size_t label_len,
566 const char *unit_id,
567 int priority,
568 pid_t object_pid) {
569
570 char pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
571 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
572 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
573 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
574 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
575 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
576 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
577 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
578 uid_t object_uid;
579 gid_t object_gid;
580 char *x;
581 int r;
582 char *t, *c;
583 uid_t realuid = 0, owner = 0, journal_uid;
584 bool owner_valid = false;
585 #ifdef HAVE_AUDIT
586 char audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
587 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
588 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
589 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
590
591 uint32_t audit;
592 uid_t loginuid;
593 #endif
594
595 assert(s);
596 assert(iovec);
597 assert(n > 0);
598 assert(n + N_IOVEC_META_FIELDS + (object_pid ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
599
600 if (ucred) {
601 realuid = ucred->uid;
602
603 sprintf(pid, "_PID="PID_FMT, ucred->pid);
604 IOVEC_SET_STRING(iovec[n++], pid);
605
606 sprintf(uid, "_UID="UID_FMT, ucred->uid);
607 IOVEC_SET_STRING(iovec[n++], uid);
608
609 sprintf(gid, "_GID="GID_FMT, ucred->gid);
610 IOVEC_SET_STRING(iovec[n++], gid);
611
612 r = get_process_comm(ucred->pid, &t);
613 if (r >= 0) {
614 x = strjoina("_COMM=", t);
615 free(t);
616 IOVEC_SET_STRING(iovec[n++], x);
617 }
618
619 r = get_process_exe(ucred->pid, &t);
620 if (r >= 0) {
621 x = strjoina("_EXE=", t);
622 free(t);
623 IOVEC_SET_STRING(iovec[n++], x);
624 }
625
626 r = get_process_cmdline(ucred->pid, 0, false, &t);
627 if (r >= 0) {
628 x = strjoina("_CMDLINE=", t);
629 free(t);
630 IOVEC_SET_STRING(iovec[n++], x);
631 }
632
633 r = get_process_capeff(ucred->pid, &t);
634 if (r >= 0) {
635 x = strjoina("_CAP_EFFECTIVE=", t);
636 free(t);
637 IOVEC_SET_STRING(iovec[n++], x);
638 }
639
640 #ifdef HAVE_AUDIT
641 r = audit_session_from_pid(ucred->pid, &audit);
642 if (r >= 0) {
643 sprintf(audit_session, "_AUDIT_SESSION=%"PRIu32, audit);
644 IOVEC_SET_STRING(iovec[n++], audit_session);
645 }
646
647 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
648 if (r >= 0) {
649 sprintf(audit_loginuid, "_AUDIT_LOGINUID="UID_FMT, loginuid);
650 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
651 }
652 #endif
653
654 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &c);
655 if (r >= 0) {
656 char *session = NULL;
657
658 x = strjoina("_SYSTEMD_CGROUP=", c);
659 IOVEC_SET_STRING(iovec[n++], x);
660
661 r = cg_path_get_session(c, &t);
662 if (r >= 0) {
663 session = strjoina("_SYSTEMD_SESSION=", t);
664 free(t);
665 IOVEC_SET_STRING(iovec[n++], session);
666 }
667
668 if (cg_path_get_owner_uid(c, &owner) >= 0) {
669 owner_valid = true;
670
671 sprintf(owner_uid, "_SYSTEMD_OWNER_UID="UID_FMT, owner);
672 IOVEC_SET_STRING(iovec[n++], owner_uid);
673 }
674
675 if (cg_path_get_unit(c, &t) >= 0) {
676 x = strjoina("_SYSTEMD_UNIT=", t);
677 free(t);
678 IOVEC_SET_STRING(iovec[n++], x);
679 } else if (unit_id && !session) {
680 x = strjoina("_SYSTEMD_UNIT=", unit_id);
681 IOVEC_SET_STRING(iovec[n++], x);
682 }
683
684 if (cg_path_get_user_unit(c, &t) >= 0) {
685 x = strjoina("_SYSTEMD_USER_UNIT=", t);
686 free(t);
687 IOVEC_SET_STRING(iovec[n++], x);
688 } else if (unit_id && session) {
689 x = strjoina("_SYSTEMD_USER_UNIT=", unit_id);
690 IOVEC_SET_STRING(iovec[n++], x);
691 }
692
693 if (cg_path_get_slice(c, &t) >= 0) {
694 x = strjoina("_SYSTEMD_SLICE=", t);
695 free(t);
696 IOVEC_SET_STRING(iovec[n++], x);
697 }
698
699 free(c);
700 } else if (unit_id) {
701 x = strjoina("_SYSTEMD_UNIT=", unit_id);
702 IOVEC_SET_STRING(iovec[n++], x);
703 }
704
705 #ifdef HAVE_SELINUX
706 if (mac_selinux_have()) {
707 if (label) {
708 x = alloca(strlen("_SELINUX_CONTEXT=") + label_len + 1);
709
710 *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
711 IOVEC_SET_STRING(iovec[n++], x);
712 } else {
713 security_context_t con;
714
715 if (getpidcon(ucred->pid, &con) >= 0) {
716 x = strjoina("_SELINUX_CONTEXT=", con);
717
718 freecon(con);
719 IOVEC_SET_STRING(iovec[n++], x);
720 }
721 }
722 }
723 #endif
724 }
725 assert(n <= m);
726
727 if (object_pid) {
728 r = get_process_uid(object_pid, &object_uid);
729 if (r >= 0) {
730 sprintf(o_uid, "OBJECT_UID="UID_FMT, object_uid);
731 IOVEC_SET_STRING(iovec[n++], o_uid);
732 }
733
734 r = get_process_gid(object_pid, &object_gid);
735 if (r >= 0) {
736 sprintf(o_gid, "OBJECT_GID="GID_FMT, object_gid);
737 IOVEC_SET_STRING(iovec[n++], o_gid);
738 }
739
740 r = get_process_comm(object_pid, &t);
741 if (r >= 0) {
742 x = strjoina("OBJECT_COMM=", t);
743 free(t);
744 IOVEC_SET_STRING(iovec[n++], x);
745 }
746
747 r = get_process_exe(object_pid, &t);
748 if (r >= 0) {
749 x = strjoina("OBJECT_EXE=", t);
750 free(t);
751 IOVEC_SET_STRING(iovec[n++], x);
752 }
753
754 r = get_process_cmdline(object_pid, 0, false, &t);
755 if (r >= 0) {
756 x = strjoina("OBJECT_CMDLINE=", t);
757 free(t);
758 IOVEC_SET_STRING(iovec[n++], x);
759 }
760
761 #ifdef HAVE_AUDIT
762 r = audit_session_from_pid(object_pid, &audit);
763 if (r >= 0) {
764 sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%"PRIu32, audit);
765 IOVEC_SET_STRING(iovec[n++], o_audit_session);
766 }
767
768 r = audit_loginuid_from_pid(object_pid, &loginuid);
769 if (r >= 0) {
770 sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID="UID_FMT, loginuid);
771 IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
772 }
773 #endif
774
775 r = cg_pid_get_path_shifted(object_pid, s->cgroup_root, &c);
776 if (r >= 0) {
777 x = strjoina("OBJECT_SYSTEMD_CGROUP=", c);
778 IOVEC_SET_STRING(iovec[n++], x);
779
780 r = cg_path_get_session(c, &t);
781 if (r >= 0) {
782 x = strjoina("OBJECT_SYSTEMD_SESSION=", t);
783 free(t);
784 IOVEC_SET_STRING(iovec[n++], x);
785 }
786
787 if (cg_path_get_owner_uid(c, &owner) >= 0) {
788 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID="UID_FMT, owner);
789 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
790 }
791
792 if (cg_path_get_unit(c, &t) >= 0) {
793 x = strjoina("OBJECT_SYSTEMD_UNIT=", t);
794 free(t);
795 IOVEC_SET_STRING(iovec[n++], x);
796 }
797
798 if (cg_path_get_user_unit(c, &t) >= 0) {
799 x = strjoina("OBJECT_SYSTEMD_USER_UNIT=", t);
800 free(t);
801 IOVEC_SET_STRING(iovec[n++], x);
802 }
803
804 free(c);
805 }
806 }
807 assert(n <= m);
808
809 if (tv) {
810 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv));
811 IOVEC_SET_STRING(iovec[n++], source_time);
812 }
813
814 /* Note that strictly speaking storing the boot id here is
815 * redundant since the entry includes this in-line
816 * anyway. However, we need this indexed, too. */
817 if (!isempty(s->boot_id_field))
818 IOVEC_SET_STRING(iovec[n++], s->boot_id_field);
819
820 if (!isempty(s->machine_id_field))
821 IOVEC_SET_STRING(iovec[n++], s->machine_id_field);
822
823 if (!isempty(s->hostname_field))
824 IOVEC_SET_STRING(iovec[n++], s->hostname_field);
825
826 assert(n <= m);
827
828 if (s->split_mode == SPLIT_UID && realuid > 0)
829 /* Split up strictly by any UID */
830 journal_uid = realuid;
831 else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
832 /* Split up by login UIDs. We do this only if the
833 * realuid is not root, in order not to accidentally
834 * leak privileged information to the user that is
835 * logged by a privileged process that is part of an
836 * unprivileged session. */
837 journal_uid = owner;
838 else
839 journal_uid = 0;
840
841 write_to_journal(s, journal_uid, iovec, n, priority);
842 }
843
844 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
845 char mid[11 + 32 + 1];
846 char buffer[16 + LINE_MAX + 1];
847 struct iovec iovec[N_IOVEC_META_FIELDS + 6];
848 int n = 0;
849 va_list ap;
850 struct ucred ucred = {};
851
852 assert(s);
853 assert(format);
854
855 IOVEC_SET_STRING(iovec[n++], "SYSLOG_FACILITY=3");
856 IOVEC_SET_STRING(iovec[n++], "SYSLOG_IDENTIFIER=systemd-journald");
857
858 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
859 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
860
861 memcpy(buffer, "MESSAGE=", 8);
862 va_start(ap, format);
863 vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
864 va_end(ap);
865 IOVEC_SET_STRING(iovec[n++], buffer);
866
867 if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
868 snprintf(mid, sizeof(mid), LOG_MESSAGE_ID(message_id));
869 IOVEC_SET_STRING(iovec[n++], mid);
870 }
871
872 ucred.pid = getpid();
873 ucred.uid = getuid();
874 ucred.gid = getgid();
875
876 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
877 }
878
879 void server_dispatch_message(
880 Server *s,
881 struct iovec *iovec, unsigned n, unsigned m,
882 const struct ucred *ucred,
883 const struct timeval *tv,
884 const char *label, size_t label_len,
885 const char *unit_id,
886 int priority,
887 pid_t object_pid) {
888
889 int rl, r;
890 _cleanup_free_ char *path = NULL;
891 uint64_t available = 0;
892 char *c;
893
894 assert(s);
895 assert(iovec || n == 0);
896
897 if (n == 0)
898 return;
899
900 if (LOG_PRI(priority) > s->max_level_store)
901 return;
902
903 /* Stop early in case the information will not be stored
904 * in a journal. */
905 if (s->storage == STORAGE_NONE)
906 return;
907
908 if (!ucred)
909 goto finish;
910
911 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &path);
912 if (r < 0)
913 goto finish;
914
915 /* example: /user/lennart/3/foobar
916 * /system/dbus.service/foobar
917 *
918 * So let's cut of everything past the third /, since that is
919 * where user directories start */
920
921 c = strchr(path, '/');
922 if (c) {
923 c = strchr(c+1, '/');
924 if (c) {
925 c = strchr(c+1, '/');
926 if (c)
927 *c = 0;
928 }
929 }
930
931 (void) determine_space(s, false, false, &available, NULL);
932 rl = journal_rate_limit_test(s->rate_limit, path, priority & LOG_PRIMASK, available);
933 if (rl == 0)
934 return;
935
936 /* Write a suppression message if we suppressed something */
937 if (rl > 1)
938 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
939 "Suppressed %u messages from %s", rl - 1, path);
940
941 finish:
942 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, priority, object_pid);
943 }
944
945
946 static int system_journal_open(Server *s, bool flush_requested) {
947 const char *fn;
948 int r = 0;
949
950 if (!s->system_journal &&
951 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
952 (flush_requested
953 || access("/run/systemd/journal/flushed", F_OK) >= 0)) {
954
955 /* If in auto mode: first try to create the machine
956 * path, but not the prefix.
957 *
958 * If in persistent mode: create /var/log/journal and
959 * the machine path */
960
961 if (s->storage == STORAGE_PERSISTENT)
962 (void) mkdir_p("/var/log/journal/", 0755);
963
964 fn = strjoina("/var/log/journal/", SERVER_MACHINE_ID(s));
965 (void) mkdir(fn, 0755);
966
967 fn = strjoina(fn, "/system.journal");
968 r = open_journal(s, true, fn, O_RDWR|O_CREAT, s->seal, &s->system_metrics, NULL, &s->system_journal);
969 if (r >= 0) {
970 server_add_acls(s->system_journal, 0);
971 (void) determine_space_for(s, &s->system_metrics, "/var/log/journal/", "System journal", true, true, NULL, NULL);
972 } else if (r < 0) {
973 if (r != -ENOENT && r != -EROFS)
974 log_warning_errno(r, "Failed to open system journal: %m");
975
976 r = 0;
977 }
978 }
979
980 if (!s->runtime_journal &&
981 (s->storage != STORAGE_NONE)) {
982
983 fn = strjoina("/run/log/journal/", SERVER_MACHINE_ID(s), "/system.journal");
984
985 if (s->system_journal) {
986
987 /* Try to open the runtime journal, but only
988 * if it already exists, so that we can flush
989 * it into the system journal */
990
991 r = open_journal(s, false, fn, O_RDWR, false, &s->runtime_metrics, NULL, &s->runtime_journal);
992 if (r < 0) {
993 if (r != -ENOENT)
994 log_warning_errno(r, "Failed to open runtime journal: %m");
995
996 r = 0;
997 }
998
999 } else {
1000
1001 /* OK, we really need the runtime journal, so create
1002 * it if necessary. */
1003
1004 (void) mkdir("/run/log", 0755);
1005 (void) mkdir("/run/log/journal", 0755);
1006 (void) mkdir_parents(fn, 0750);
1007
1008 r = open_journal(s, true, fn, O_RDWR|O_CREAT, false, &s->runtime_metrics, NULL, &s->runtime_journal);
1009 if (r < 0)
1010 return log_error_errno(r, "Failed to open runtime journal: %m");
1011 }
1012
1013 if (s->runtime_journal) {
1014 server_add_acls(s->runtime_journal, 0);
1015 (void) determine_space_for(s, &s->runtime_metrics, "/run/log/journal/", "Runtime journal", true, true, NULL, NULL);
1016 }
1017 }
1018
1019 return r;
1020 }
1021
1022 int server_flush_to_var(Server *s) {
1023 sd_id128_t machine;
1024 sd_journal *j = NULL;
1025 char ts[FORMAT_TIMESPAN_MAX];
1026 usec_t start;
1027 unsigned n = 0;
1028 int r;
1029
1030 assert(s);
1031
1032 if (s->storage != STORAGE_AUTO &&
1033 s->storage != STORAGE_PERSISTENT)
1034 return 0;
1035
1036 if (!s->runtime_journal)
1037 return 0;
1038
1039 (void) system_journal_open(s, true);
1040
1041 if (!s->system_journal)
1042 return 0;
1043
1044 log_debug("Flushing to /var...");
1045
1046 start = now(CLOCK_MONOTONIC);
1047
1048 r = sd_id128_get_machine(&machine);
1049 if (r < 0)
1050 return r;
1051
1052 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
1053 if (r < 0)
1054 return log_error_errno(r, "Failed to read runtime journal: %m");
1055
1056 sd_journal_set_data_threshold(j, 0);
1057
1058 SD_JOURNAL_FOREACH(j) {
1059 Object *o = NULL;
1060 JournalFile *f;
1061
1062 f = j->current_file;
1063 assert(f && f->current_offset > 0);
1064
1065 n++;
1066
1067 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1068 if (r < 0) {
1069 log_error_errno(r, "Can't read entry: %m");
1070 goto finish;
1071 }
1072
1073 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1074 if (r >= 0)
1075 continue;
1076
1077 if (!shall_try_append_again(s->system_journal, r)) {
1078 log_error_errno(r, "Can't write entry: %m");
1079 goto finish;
1080 }
1081
1082 server_rotate(s);
1083 server_vacuum(s, false, false);
1084
1085 if (!s->system_journal) {
1086 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1087 r = -EIO;
1088 goto finish;
1089 }
1090
1091 log_debug("Retrying write.");
1092 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1093 if (r < 0) {
1094 log_error_errno(r, "Can't write entry: %m");
1095 goto finish;
1096 }
1097 }
1098
1099 r = 0;
1100
1101 finish:
1102 journal_file_post_change(s->system_journal);
1103
1104 s->runtime_journal = journal_file_close(s->runtime_journal);
1105
1106 if (r >= 0)
1107 (void) rm_rf("/run/log/journal", REMOVE_ROOT);
1108
1109 sd_journal_close(j);
1110
1111 server_driver_message(s, SD_ID128_NULL, "Time spent on flushing to /var is %s for %u entries.", format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0), n);
1112
1113 return r;
1114 }
1115
1116 int server_process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1117 Server *s = userdata;
1118 struct ucred *ucred = NULL;
1119 struct timeval *tv = NULL;
1120 struct cmsghdr *cmsg;
1121 char *label = NULL;
1122 size_t label_len = 0, m;
1123 struct iovec iovec;
1124 ssize_t n;
1125 int *fds = NULL, v = 0;
1126 unsigned n_fds = 0;
1127
1128 union {
1129 struct cmsghdr cmsghdr;
1130
1131 /* We use NAME_MAX space for the SELinux label
1132 * here. The kernel currently enforces no
1133 * limit, but according to suggestions from
1134 * the SELinux people this will change and it
1135 * will probably be identical to NAME_MAX. For
1136 * now we use that, but this should be updated
1137 * one day when the final limit is known. */
1138 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1139 CMSG_SPACE(sizeof(struct timeval)) +
1140 CMSG_SPACE(sizeof(int)) + /* fd */
1141 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1142 } control = {};
1143
1144 union sockaddr_union sa = {};
1145
1146 struct msghdr msghdr = {
1147 .msg_iov = &iovec,
1148 .msg_iovlen = 1,
1149 .msg_control = &control,
1150 .msg_controllen = sizeof(control),
1151 .msg_name = &sa,
1152 .msg_namelen = sizeof(sa),
1153 };
1154
1155 assert(s);
1156 assert(fd == s->native_fd || fd == s->syslog_fd || fd == s->audit_fd);
1157
1158 if (revents != EPOLLIN) {
1159 log_error("Got invalid event from epoll for datagram fd: %"PRIx32, revents);
1160 return -EIO;
1161 }
1162
1163 /* Try to get the right size, if we can. (Not all
1164 * sockets support SIOCINQ, hence we just try, but
1165 * don't rely on it. */
1166 (void) ioctl(fd, SIOCINQ, &v);
1167
1168 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1169 m = PAGE_ALIGN(MAX3((size_t) v + 1,
1170 (size_t) LINE_MAX,
1171 ALIGN(sizeof(struct nlmsghdr)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH)) + 1);
1172
1173 if (!GREEDY_REALLOC(s->buffer, s->buffer_size, m))
1174 return log_oom();
1175
1176 iovec.iov_base = s->buffer;
1177 iovec.iov_len = s->buffer_size - 1; /* Leave room for trailing NUL we add later */
1178
1179 n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1180 if (n < 0) {
1181 if (errno == EINTR || errno == EAGAIN)
1182 return 0;
1183
1184 return log_error_errno(errno, "recvmsg() failed: %m");
1185 }
1186
1187 CMSG_FOREACH(cmsg, &msghdr) {
1188
1189 if (cmsg->cmsg_level == SOL_SOCKET &&
1190 cmsg->cmsg_type == SCM_CREDENTIALS &&
1191 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1192 ucred = (struct ucred*) CMSG_DATA(cmsg);
1193 else if (cmsg->cmsg_level == SOL_SOCKET &&
1194 cmsg->cmsg_type == SCM_SECURITY) {
1195 label = (char*) CMSG_DATA(cmsg);
1196 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1197 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1198 cmsg->cmsg_type == SO_TIMESTAMP &&
1199 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1200 tv = (struct timeval*) CMSG_DATA(cmsg);
1201 else if (cmsg->cmsg_level == SOL_SOCKET &&
1202 cmsg->cmsg_type == SCM_RIGHTS) {
1203 fds = (int*) CMSG_DATA(cmsg);
1204 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1205 }
1206 }
1207
1208 /* And a trailing NUL, just in case */
1209 s->buffer[n] = 0;
1210
1211 if (fd == s->syslog_fd) {
1212 if (n > 0 && n_fds == 0)
1213 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1214 else if (n_fds > 0)
1215 log_warning("Got file descriptors via syslog socket. Ignoring.");
1216
1217 } else if (fd == s->native_fd) {
1218 if (n > 0 && n_fds == 0)
1219 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1220 else if (n == 0 && n_fds == 1)
1221 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1222 else if (n_fds > 0)
1223 log_warning("Got too many file descriptors via native socket. Ignoring.");
1224
1225 } else {
1226 assert(fd == s->audit_fd);
1227
1228 if (n > 0 && n_fds == 0)
1229 server_process_audit_message(s, s->buffer, n, ucred, &sa, msghdr.msg_namelen);
1230 else if (n_fds > 0)
1231 log_warning("Got file descriptors via audit socket. Ignoring.");
1232 }
1233
1234 close_many(fds, n_fds);
1235 return 0;
1236 }
1237
1238 static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1239 Server *s = userdata;
1240 int r;
1241
1242 assert(s);
1243
1244 log_info("Received request to flush runtime journal from PID " PID_FMT, si->ssi_pid);
1245
1246 server_flush_to_var(s);
1247 server_sync(s);
1248 server_vacuum(s, false, false);
1249
1250 r = touch("/run/systemd/journal/flushed");
1251 if (r < 0)
1252 log_warning_errno(r, "Failed to touch /run/systemd/journal/flushed, ignoring: %m");
1253
1254 return 0;
1255 }
1256
1257 static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1258 Server *s = userdata;
1259 int r;
1260
1261 assert(s);
1262
1263 log_info("Received request to rotate journal from PID " PID_FMT, si->ssi_pid);
1264 server_rotate(s);
1265 server_vacuum(s, true, true);
1266
1267 /* Let clients know when the most recent rotation happened. */
1268 r = write_timestamp_file_atomic("/run/systemd/journal/rotated", now(CLOCK_MONOTONIC));
1269 if (r < 0)
1270 log_warning_errno(r, "Failed to write /run/systemd/journal/rotated, ignoring: %m");
1271
1272 return 0;
1273 }
1274
1275 static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1276 Server *s = userdata;
1277
1278 assert(s);
1279
1280 log_received_signal(LOG_INFO, si);
1281
1282 sd_event_exit(s->event, 0);
1283 return 0;
1284 }
1285
1286 static int dispatch_sigrtmin1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1287 Server *s = userdata;
1288 int r;
1289
1290 assert(s);
1291
1292 log_debug("Received request to sync from PID " PID_FMT, si->ssi_pid);
1293
1294 server_sync(s);
1295
1296 /* Let clients know when the most recent sync happened. */
1297 r = write_timestamp_file_atomic("/run/systemd/journal/synced", now(CLOCK_MONOTONIC));
1298 if (r < 0)
1299 log_warning_errno(r, "Failed to write /run/systemd/journal/synced, ignoring: %m");
1300
1301 return 0;
1302 }
1303
1304 static int setup_signals(Server *s) {
1305 int r;
1306
1307 assert(s);
1308
1309 assert(sigprocmask_many(SIG_SETMASK, NULL, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, SIGRTMIN+1, -1) >= 0);
1310
1311 r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
1312 if (r < 0)
1313 return r;
1314
1315 r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s);
1316 if (r < 0)
1317 return r;
1318
1319 r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s);
1320 if (r < 0)
1321 return r;
1322
1323 /* Let's process SIGTERM late, so that we flush all queued
1324 * messages to disk before we exit */
1325 r = sd_event_source_set_priority(s->sigterm_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1326 if (r < 0)
1327 return r;
1328
1329 /* When journald is invoked on the terminal (when debugging),
1330 * it's useful if C-c is handled equivalent to SIGTERM. */
1331 r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s);
1332 if (r < 0)
1333 return r;
1334
1335 r = sd_event_source_set_priority(s->sigint_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1336 if (r < 0)
1337 return r;
1338
1339 /* SIGRTMIN+1 causes an immediate sync. We process this very
1340 * late, so that everything else queued at this point is
1341 * really written to disk. Clients can watch
1342 * /run/systemd/journal/synced with inotify until its mtime
1343 * changes to see when a sync happened. */
1344 r = sd_event_add_signal(s->event, &s->sigrtmin1_event_source, SIGRTMIN+1, dispatch_sigrtmin1, s);
1345 if (r < 0)
1346 return r;
1347
1348 r = sd_event_source_set_priority(s->sigrtmin1_event_source, SD_EVENT_PRIORITY_NORMAL+15);
1349 if (r < 0)
1350 return r;
1351
1352 return 0;
1353 }
1354
1355 static int server_parse_proc_cmdline(Server *s) {
1356 _cleanup_free_ char *line = NULL;
1357 const char *p;
1358 int r;
1359
1360 r = proc_cmdline(&line);
1361 if (r < 0) {
1362 log_warning_errno(r, "Failed to read /proc/cmdline, ignoring: %m");
1363 return 0;
1364 }
1365
1366 p = line;
1367 for(;;) {
1368 _cleanup_free_ char *word = NULL;
1369
1370 r = extract_first_word(&p, &word, NULL, 0);
1371 if (r < 0)
1372 return log_error_errno(r, "Failed to parse journald syntax \"%s\": %m", line);
1373
1374 if (r == 0)
1375 break;
1376
1377 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1378 r = parse_boolean(word + 35);
1379 if (r < 0)
1380 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1381 else
1382 s->forward_to_syslog = r;
1383 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1384 r = parse_boolean(word + 33);
1385 if (r < 0)
1386 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1387 else
1388 s->forward_to_kmsg = r;
1389 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1390 r = parse_boolean(word + 36);
1391 if (r < 0)
1392 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1393 else
1394 s->forward_to_console = r;
1395 } else if (startswith(word, "systemd.journald.forward_to_wall=")) {
1396 r = parse_boolean(word + 33);
1397 if (r < 0)
1398 log_warning("Failed to parse forward to wall switch %s. Ignoring.", word + 33);
1399 else
1400 s->forward_to_wall = r;
1401 } else if (startswith(word, "systemd.journald"))
1402 log_warning("Invalid systemd.journald parameter. Ignoring.");
1403 }
1404
1405 /* do not warn about state here, since probably systemd already did */
1406 return 0;
1407 }
1408
1409 static int server_parse_config_file(Server *s) {
1410 assert(s);
1411
1412 return config_parse_many(PKGSYSCONFDIR "/journald.conf",
1413 CONF_PATHS_NULSTR("systemd/journald.conf.d"),
1414 "Journal\0",
1415 config_item_perf_lookup, journald_gperf_lookup,
1416 false, s);
1417 }
1418
1419 static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
1420 Server *s = userdata;
1421
1422 assert(s);
1423
1424 server_sync(s);
1425 return 0;
1426 }
1427
1428 int server_schedule_sync(Server *s, int priority) {
1429 int r;
1430
1431 assert(s);
1432
1433 if (priority <= LOG_CRIT) {
1434 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1435 server_sync(s);
1436 return 0;
1437 }
1438
1439 if (s->sync_scheduled)
1440 return 0;
1441
1442 if (s->sync_interval_usec > 0) {
1443 usec_t when;
1444
1445 r = sd_event_now(s->event, CLOCK_MONOTONIC, &when);
1446 if (r < 0)
1447 return r;
1448
1449 when += s->sync_interval_usec;
1450
1451 if (!s->sync_event_source) {
1452 r = sd_event_add_time(
1453 s->event,
1454 &s->sync_event_source,
1455 CLOCK_MONOTONIC,
1456 when, 0,
1457 server_dispatch_sync, s);
1458 if (r < 0)
1459 return r;
1460
1461 r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
1462 } else {
1463 r = sd_event_source_set_time(s->sync_event_source, when);
1464 if (r < 0)
1465 return r;
1466
1467 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
1468 }
1469 if (r < 0)
1470 return r;
1471
1472 s->sync_scheduled = true;
1473 }
1474
1475 return 0;
1476 }
1477
1478 static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1479 Server *s = userdata;
1480
1481 assert(s);
1482
1483 server_cache_hostname(s);
1484 return 0;
1485 }
1486
1487 static int server_open_hostname(Server *s) {
1488 int r;
1489
1490 assert(s);
1491
1492 s->hostname_fd = open("/proc/sys/kernel/hostname", O_RDONLY|O_CLOEXEC|O_NDELAY|O_NOCTTY);
1493 if (s->hostname_fd < 0)
1494 return log_error_errno(errno, "Failed to open /proc/sys/kernel/hostname: %m");
1495
1496 r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s);
1497 if (r < 0) {
1498 /* kernels prior to 3.2 don't support polling this file. Ignore
1499 * the failure. */
1500 if (r == -EPERM) {
1501 log_warning_errno(r, "Failed to register hostname fd in event loop, ignoring: %m");
1502 s->hostname_fd = safe_close(s->hostname_fd);
1503 return 0;
1504 }
1505
1506 return log_error_errno(r, "Failed to register hostname fd in event loop: %m");
1507 }
1508
1509 r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
1510 if (r < 0)
1511 return log_error_errno(r, "Failed to adjust priority of host name event source: %m");
1512
1513 return 0;
1514 }
1515
1516 static int dispatch_notify_event(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1517 Server *s = userdata;
1518 int r;
1519
1520 assert(s);
1521 assert(s->notify_event_source == es);
1522 assert(s->notify_fd == fd);
1523
1524 /* The $NOTIFY_SOCKET is writable again, now send exactly one
1525 * message on it. Either it's the wtachdog event, the initial
1526 * READY=1 event or an stdout stream event. If there's nothing
1527 * to write anymore, turn our event source off. The next time
1528 * there's something to send it will be turned on again. */
1529
1530 if (!s->sent_notify_ready) {
1531 static const char p[] =
1532 "READY=1\n"
1533 "STATUS=Processing requests...";
1534 ssize_t l;
1535
1536 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1537 if (l < 0) {
1538 if (errno == EAGAIN)
1539 return 0;
1540
1541 return log_error_errno(errno, "Failed to send READY=1 notification message: %m");
1542 }
1543
1544 s->sent_notify_ready = true;
1545 log_debug("Sent READY=1 notification.");
1546
1547 } else if (s->send_watchdog) {
1548
1549 static const char p[] =
1550 "WATCHDOG=1";
1551
1552 ssize_t l;
1553
1554 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1555 if (l < 0) {
1556 if (errno == EAGAIN)
1557 return 0;
1558
1559 return log_error_errno(errno, "Failed to send WATCHDOG=1 notification message: %m");
1560 }
1561
1562 s->send_watchdog = false;
1563 log_debug("Sent WATCHDOG=1 notification.");
1564
1565 } else if (s->stdout_streams_notify_queue)
1566 /* Dispatch one stream notification event */
1567 stdout_stream_send_notify(s->stdout_streams_notify_queue);
1568
1569 /* Leave us enabled if there's still more to to do. */
1570 if (s->send_watchdog || s->stdout_streams_notify_queue)
1571 return 0;
1572
1573 /* There was nothing to do anymore, let's turn ourselves off. */
1574 r = sd_event_source_set_enabled(es, SD_EVENT_OFF);
1575 if (r < 0)
1576 return log_error_errno(r, "Failed to turn off notify event source: %m");
1577
1578 return 0;
1579 }
1580
1581 static int dispatch_watchdog(sd_event_source *es, uint64_t usec, void *userdata) {
1582 Server *s = userdata;
1583 int r;
1584
1585 assert(s);
1586
1587 s->send_watchdog = true;
1588
1589 r = sd_event_source_set_enabled(s->notify_event_source, SD_EVENT_ON);
1590 if (r < 0)
1591 log_warning_errno(r, "Failed to turn on notify event source: %m");
1592
1593 r = sd_event_source_set_time(s->watchdog_event_source, usec + s->watchdog_usec / 2);
1594 if (r < 0)
1595 return log_error_errno(r, "Failed to restart watchdog event source: %m");
1596
1597 r = sd_event_source_set_enabled(s->watchdog_event_source, SD_EVENT_ON);
1598 if (r < 0)
1599 return log_error_errno(r, "Failed to enable watchdog event source: %m");
1600
1601 return 0;
1602 }
1603
1604 static int server_connect_notify(Server *s) {
1605 union sockaddr_union sa = {
1606 .un.sun_family = AF_UNIX,
1607 };
1608 const char *e;
1609 int r;
1610
1611 assert(s);
1612 assert(s->notify_fd < 0);
1613 assert(!s->notify_event_source);
1614
1615 /*
1616 So here's the problem: we'd like to send notification
1617 messages to PID 1, but we cannot do that via sd_notify(),
1618 since that's synchronous, and we might end up blocking on
1619 it. Specifically: given that PID 1 might block on
1620 dbus-daemon during IPC, and dbus-daemon is logging to us,
1621 and might hence block on us, we might end up in a deadlock
1622 if we block on sending PID 1 notification messages -- by
1623 generating a full blocking circle. To avoid this, let's
1624 create a non-blocking socket, and connect it to the
1625 notification socket, and then wait for POLLOUT before we
1626 send anything. This should efficiently avoid any deadlocks,
1627 as we'll never block on PID 1, hence PID 1 can safely block
1628 on dbus-daemon which can safely block on us again.
1629
1630 Don't think that this issue is real? It is, see:
1631 https://github.com/systemd/systemd/issues/1505
1632 */
1633
1634 e = getenv("NOTIFY_SOCKET");
1635 if (!e)
1636 return 0;
1637
1638 if ((e[0] != '@' && e[0] != '/') || e[1] == 0) {
1639 log_error("NOTIFY_SOCKET set to an invalid value: %s", e);
1640 return -EINVAL;
1641 }
1642
1643 if (strlen(e) > sizeof(sa.un.sun_path)) {
1644 log_error("NOTIFY_SOCKET path too long: %s", e);
1645 return -EINVAL;
1646 }
1647
1648 s->notify_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
1649 if (s->notify_fd < 0)
1650 return log_error_errno(errno, "Failed to create notify socket: %m");
1651
1652 (void) fd_inc_sndbuf(s->notify_fd, NOTIFY_SNDBUF_SIZE);
1653
1654 strncpy(sa.un.sun_path, e, sizeof(sa.un.sun_path));
1655 if (sa.un.sun_path[0] == '@')
1656 sa.un.sun_path[0] = 0;
1657
1658 r = connect(s->notify_fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(e));
1659 if (r < 0)
1660 return log_error_errno(errno, "Failed to connect to notify socket: %m");
1661
1662 r = sd_event_add_io(s->event, &s->notify_event_source, s->notify_fd, EPOLLOUT, dispatch_notify_event, s);
1663 if (r < 0)
1664 return log_error_errno(r, "Failed to watch notification socket: %m");
1665
1666 if (sd_watchdog_enabled(false, &s->watchdog_usec) > 0) {
1667 s->send_watchdog = true;
1668
1669 r = sd_event_add_time(s->event, &s->watchdog_event_source, CLOCK_MONOTONIC, now(CLOCK_MONOTONIC) + s->watchdog_usec/2, s->watchdog_usec/4, dispatch_watchdog, s);
1670 if (r < 0)
1671 return log_error_errno(r, "Failed to add watchdog time event: %m");
1672 }
1673
1674 /* This should fire pretty soon, which we'll use to send the
1675 * READY=1 event. */
1676
1677 return 0;
1678 }
1679
1680 int server_init(Server *s) {
1681 _cleanup_fdset_free_ FDSet *fds = NULL;
1682 int n, r, fd;
1683 bool no_sockets;
1684
1685 assert(s);
1686
1687 zero(*s);
1688 s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->audit_fd = s->hostname_fd = s->notify_fd = -1;
1689 s->compress = true;
1690 s->seal = true;
1691
1692 s->watchdog_usec = USEC_INFINITY;
1693
1694 s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1695 s->sync_scheduled = false;
1696
1697 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1698 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1699
1700 s->forward_to_wall = true;
1701
1702 s->max_file_usec = DEFAULT_MAX_FILE_USEC;
1703
1704 s->max_level_store = LOG_DEBUG;
1705 s->max_level_syslog = LOG_DEBUG;
1706 s->max_level_kmsg = LOG_NOTICE;
1707 s->max_level_console = LOG_INFO;
1708 s->max_level_wall = LOG_EMERG;
1709
1710 journal_reset_metrics(&s->system_metrics);
1711 journal_reset_metrics(&s->runtime_metrics);
1712
1713 server_parse_config_file(s);
1714 server_parse_proc_cmdline(s);
1715
1716 if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1717 log_debug("Setting both rate limit interval and burst from "USEC_FMT",%u to 0,0",
1718 s->rate_limit_interval, s->rate_limit_burst);
1719 s->rate_limit_interval = s->rate_limit_burst = 0;
1720 }
1721
1722 (void) mkdir_p("/run/systemd/journal", 0755);
1723
1724 s->user_journals = ordered_hashmap_new(NULL);
1725 if (!s->user_journals)
1726 return log_oom();
1727
1728 s->mmap = mmap_cache_new();
1729 if (!s->mmap)
1730 return log_oom();
1731
1732 r = sd_event_default(&s->event);
1733 if (r < 0)
1734 return log_error_errno(r, "Failed to create event loop: %m");
1735
1736 n = sd_listen_fds(true);
1737 if (n < 0)
1738 return log_error_errno(n, "Failed to read listening file descriptors from environment: %m");
1739
1740 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1741
1742 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1743
1744 if (s->native_fd >= 0) {
1745 log_error("Too many native sockets passed.");
1746 return -EINVAL;
1747 }
1748
1749 s->native_fd = fd;
1750
1751 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1752
1753 if (s->stdout_fd >= 0) {
1754 log_error("Too many stdout sockets passed.");
1755 return -EINVAL;
1756 }
1757
1758 s->stdout_fd = fd;
1759
1760 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0 ||
1761 sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/dev-log", 0) > 0) {
1762
1763 if (s->syslog_fd >= 0) {
1764 log_error("Too many /dev/log sockets passed.");
1765 return -EINVAL;
1766 }
1767
1768 s->syslog_fd = fd;
1769
1770 } else if (sd_is_socket(fd, AF_NETLINK, SOCK_RAW, -1) > 0) {
1771
1772 if (s->audit_fd >= 0) {
1773 log_error("Too many audit sockets passed.");
1774 return -EINVAL;
1775 }
1776
1777 s->audit_fd = fd;
1778
1779 } else {
1780
1781 if (!fds) {
1782 fds = fdset_new();
1783 if (!fds)
1784 return log_oom();
1785 }
1786
1787 r = fdset_put(fds, fd);
1788 if (r < 0)
1789 return log_oom();
1790 }
1791 }
1792
1793 /* Try to restore streams, but don't bother if this fails */
1794 (void) server_restore_streams(s, fds);
1795
1796 if (fdset_size(fds) > 0) {
1797 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds));
1798 fds = fdset_free(fds);
1799 }
1800
1801 no_sockets = s->native_fd < 0 && s->stdout_fd < 0 && s->syslog_fd < 0 && s->audit_fd < 0;
1802
1803 /* always open stdout, syslog, native, and kmsg sockets */
1804
1805 /* systemd-journald.socket: /run/systemd/journal/stdout */
1806 r = server_open_stdout_socket(s);
1807 if (r < 0)
1808 return r;
1809
1810 /* systemd-journald-dev-log.socket: /run/systemd/journal/dev-log */
1811 r = server_open_syslog_socket(s);
1812 if (r < 0)
1813 return r;
1814
1815 /* systemd-journald.socket: /run/systemd/journal/socket */
1816 r = server_open_native_socket(s);
1817 if (r < 0)
1818 return r;
1819
1820 /* /dev/ksmg */
1821 r = server_open_dev_kmsg(s);
1822 if (r < 0)
1823 return r;
1824
1825 /* Unless we got *some* sockets and not audit, open audit socket */
1826 if (s->audit_fd >= 0 || no_sockets) {
1827 r = server_open_audit(s);
1828 if (r < 0)
1829 return r;
1830 }
1831
1832 r = server_open_kernel_seqnum(s);
1833 if (r < 0)
1834 return r;
1835
1836 r = server_open_hostname(s);
1837 if (r < 0)
1838 return r;
1839
1840 r = setup_signals(s);
1841 if (r < 0)
1842 return r;
1843
1844 s->udev = udev_new();
1845 if (!s->udev)
1846 return -ENOMEM;
1847
1848 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
1849 if (!s->rate_limit)
1850 return -ENOMEM;
1851
1852 r = cg_get_root_path(&s->cgroup_root);
1853 if (r < 0)
1854 return r;
1855
1856 server_cache_hostname(s);
1857 server_cache_boot_id(s);
1858 server_cache_machine_id(s);
1859
1860 (void) server_connect_notify(s);
1861
1862 return system_journal_open(s, false);
1863 }
1864
1865 void server_maybe_append_tags(Server *s) {
1866 #ifdef HAVE_GCRYPT
1867 JournalFile *f;
1868 Iterator i;
1869 usec_t n;
1870
1871 n = now(CLOCK_REALTIME);
1872
1873 if (s->system_journal)
1874 journal_file_maybe_append_tag(s->system_journal, n);
1875
1876 ORDERED_HASHMAP_FOREACH(f, s->user_journals, i)
1877 journal_file_maybe_append_tag(f, n);
1878 #endif
1879 }
1880
1881 void server_done(Server *s) {
1882 JournalFile *f;
1883 assert(s);
1884
1885 while (s->stdout_streams)
1886 stdout_stream_free(s->stdout_streams);
1887
1888 if (s->system_journal)
1889 journal_file_close(s->system_journal);
1890
1891 if (s->runtime_journal)
1892 journal_file_close(s->runtime_journal);
1893
1894 while ((f = ordered_hashmap_steal_first(s->user_journals)))
1895 journal_file_close(f);
1896
1897 ordered_hashmap_free(s->user_journals);
1898
1899 sd_event_source_unref(s->syslog_event_source);
1900 sd_event_source_unref(s->native_event_source);
1901 sd_event_source_unref(s->stdout_event_source);
1902 sd_event_source_unref(s->dev_kmsg_event_source);
1903 sd_event_source_unref(s->audit_event_source);
1904 sd_event_source_unref(s->sync_event_source);
1905 sd_event_source_unref(s->sigusr1_event_source);
1906 sd_event_source_unref(s->sigusr2_event_source);
1907 sd_event_source_unref(s->sigterm_event_source);
1908 sd_event_source_unref(s->sigint_event_source);
1909 sd_event_source_unref(s->sigrtmin1_event_source);
1910 sd_event_source_unref(s->hostname_event_source);
1911 sd_event_source_unref(s->notify_event_source);
1912 sd_event_source_unref(s->watchdog_event_source);
1913 sd_event_unref(s->event);
1914
1915 safe_close(s->syslog_fd);
1916 safe_close(s->native_fd);
1917 safe_close(s->stdout_fd);
1918 safe_close(s->dev_kmsg_fd);
1919 safe_close(s->audit_fd);
1920 safe_close(s->hostname_fd);
1921 safe_close(s->notify_fd);
1922
1923 if (s->rate_limit)
1924 journal_rate_limit_free(s->rate_limit);
1925
1926 if (s->kernel_seqnum)
1927 munmap(s->kernel_seqnum, sizeof(uint64_t));
1928
1929 free(s->buffer);
1930 free(s->tty_path);
1931 free(s->cgroup_root);
1932 free(s->hostname_field);
1933
1934 if (s->mmap)
1935 mmap_cache_unref(s->mmap);
1936
1937 udev_unref(s->udev);
1938 }
1939
1940 static const char* const storage_table[_STORAGE_MAX] = {
1941 [STORAGE_AUTO] = "auto",
1942 [STORAGE_VOLATILE] = "volatile",
1943 [STORAGE_PERSISTENT] = "persistent",
1944 [STORAGE_NONE] = "none"
1945 };
1946
1947 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
1948 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
1949
1950 static const char* const split_mode_table[_SPLIT_MAX] = {
1951 [SPLIT_LOGIN] = "login",
1952 [SPLIT_UID] = "uid",
1953 [SPLIT_NONE] = "none",
1954 };
1955
1956 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
1957 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");