]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/journal/journald-server.c
journalctl: add new --sync switch for syncing the journal to disk
[thirdparty/systemd.git] / src / journal / journald-server.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2011 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #ifdef HAVE_SELINUX
23 #include <selinux/selinux.h>
24 #endif
25 #include <sys/ioctl.h>
26 #include <sys/mman.h>
27 #include <sys/signalfd.h>
28 #include <sys/statvfs.h>
29 #include <linux/sockios.h>
30
31 #include "libudev.h"
32 #include "sd-daemon.h"
33 #include "sd-journal.h"
34 #include "sd-messages.h"
35
36 #include "acl-util.h"
37 #include "alloc-util.h"
38 #include "audit-util.h"
39 #include "cgroup-util.h"
40 #include "conf-parser.h"
41 #include "dirent-util.h"
42 #include "extract-word.h"
43 #include "fd-util.h"
44 #include "formats-util.h"
45 #include "fs-util.h"
46 #include "hashmap.h"
47 #include "hostname-util.h"
48 #include "io-util.h"
49 #include "journal-authenticate.h"
50 #include "journal-file.h"
51 #include "journal-internal.h"
52 #include "journal-vacuum.h"
53 #include "journald-audit.h"
54 #include "journald-kmsg.h"
55 #include "journald-native.h"
56 #include "journald-rate-limit.h"
57 #include "journald-server.h"
58 #include "journald-stream.h"
59 #include "journald-syslog.h"
60 #include "missing.h"
61 #include "mkdir.h"
62 #include "parse-util.h"
63 #include "proc-cmdline.h"
64 #include "process-util.h"
65 #include "rm-rf.h"
66 #include "selinux-util.h"
67 #include "signal-util.h"
68 #include "socket-util.h"
69 #include "string-table.h"
70 #include "string-util.h"
71
72 #define USER_JOURNALS_MAX 1024
73
74 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
75 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
76 #define DEFAULT_RATE_LIMIT_BURST 1000
77 #define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
78
79 #define RECHECK_SPACE_USEC (30*USEC_PER_SEC)
80
81 #define NOTIFY_SNDBUF_SIZE (8*1024*1024)
82
83 static int determine_space_for(
84 Server *s,
85 JournalMetrics *metrics,
86 const char *path,
87 const char *name,
88 bool verbose,
89 bool patch_min_use,
90 uint64_t *available,
91 uint64_t *limit) {
92
93 uint64_t sum = 0, ss_avail, avail;
94 _cleanup_closedir_ DIR *d = NULL;
95 struct dirent *de;
96 struct statvfs ss;
97 const char *p;
98 usec_t ts;
99
100 assert(s);
101 assert(metrics);
102 assert(path);
103 assert(name);
104
105 ts = now(CLOCK_MONOTONIC);
106
107 if (!verbose && s->cached_space_timestamp + RECHECK_SPACE_USEC > ts) {
108
109 if (available)
110 *available = s->cached_space_available;
111 if (limit)
112 *limit = s->cached_space_limit;
113
114 return 0;
115 }
116
117 p = strjoina(path, SERVER_MACHINE_ID(s));
118 d = opendir(p);
119 if (!d)
120 return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR, errno, "Failed to open %s: %m", p);
121
122 if (fstatvfs(dirfd(d), &ss) < 0)
123 return log_error_errno(errno, "Failed to fstatvfs(%s): %m", p);
124
125 FOREACH_DIRENT_ALL(de, d, break) {
126 struct stat st;
127
128 if (!endswith(de->d_name, ".journal") &&
129 !endswith(de->d_name, ".journal~"))
130 continue;
131
132 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) {
133 log_debug_errno(errno, "Failed to stat %s/%s, ignoring: %m", p, de->d_name);
134 continue;
135 }
136
137 if (!S_ISREG(st.st_mode))
138 continue;
139
140 sum += (uint64_t) st.st_blocks * 512UL;
141 }
142
143 /* If request, then let's bump the min_use limit to the
144 * current usage on disk. We do this when starting up and
145 * first opening the journal files. This way sudden spikes in
146 * disk usage will not cause journald to vacuum files without
147 * bounds. Note that this means that only a restart of
148 * journald will make it reset this value. */
149
150 if (patch_min_use)
151 metrics->min_use = MAX(metrics->min_use, sum);
152
153 ss_avail = ss.f_bsize * ss.f_bavail;
154 avail = LESS_BY(ss_avail, metrics->keep_free);
155
156 s->cached_space_limit = MIN(MAX(sum + avail, metrics->min_use), metrics->max_use);
157 s->cached_space_available = LESS_BY(s->cached_space_limit, sum);
158 s->cached_space_timestamp = ts;
159
160 if (verbose) {
161 char fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
162 fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX], fb6[FORMAT_BYTES_MAX];
163
164 server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
165 "%s (%s) is currently using %s.\n"
166 "Maximum allowed usage is set to %s.\n"
167 "Leaving at least %s free (of currently available %s of space).\n"
168 "Enforced usage limit is thus %s, of which %s are still available.",
169 name, path,
170 format_bytes(fb1, sizeof(fb1), sum),
171 format_bytes(fb2, sizeof(fb2), metrics->max_use),
172 format_bytes(fb3, sizeof(fb3), metrics->keep_free),
173 format_bytes(fb4, sizeof(fb4), ss_avail),
174 format_bytes(fb5, sizeof(fb5), s->cached_space_limit),
175 format_bytes(fb6, sizeof(fb6), s->cached_space_available));
176 }
177
178 if (available)
179 *available = s->cached_space_available;
180 if (limit)
181 *limit = s->cached_space_limit;
182
183 return 1;
184 }
185
186 static int determine_space(Server *s, bool verbose, bool patch_min_use, uint64_t *available, uint64_t *limit) {
187 JournalMetrics *metrics;
188 const char *path, *name;
189
190 assert(s);
191
192 if (s->system_journal) {
193 path = "/var/log/journal/";
194 metrics = &s->system_metrics;
195 name = "System journal";
196 } else {
197 path = "/run/log/journal/";
198 metrics = &s->runtime_metrics;
199 name = "Runtime journal";
200 }
201
202 return determine_space_for(s, metrics, path, name, verbose, patch_min_use, available, limit);
203 }
204
205 void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
206 int r;
207 #ifdef HAVE_ACL
208 _cleanup_(acl_freep) acl_t acl = NULL;
209 acl_entry_t entry;
210 acl_permset_t permset;
211 #endif
212
213 assert(f);
214
215 r = fchmod(f->fd, 0640);
216 if (r < 0)
217 log_warning_errno(errno, "Failed to fix access mode on %s, ignoring: %m", f->path);
218
219 #ifdef HAVE_ACL
220 if (uid <= SYSTEM_UID_MAX)
221 return;
222
223 acl = acl_get_fd(f->fd);
224 if (!acl) {
225 log_warning_errno(errno, "Failed to read ACL on %s, ignoring: %m", f->path);
226 return;
227 }
228
229 r = acl_find_uid(acl, uid, &entry);
230 if (r <= 0) {
231
232 if (acl_create_entry(&acl, &entry) < 0 ||
233 acl_set_tag_type(entry, ACL_USER) < 0 ||
234 acl_set_qualifier(entry, &uid) < 0) {
235 log_warning_errno(errno, "Failed to patch ACL on %s, ignoring: %m", f->path);
236 return;
237 }
238 }
239
240 /* We do not recalculate the mask unconditionally here,
241 * so that the fchmod() mask above stays intact. */
242 if (acl_get_permset(entry, &permset) < 0 ||
243 acl_add_perm(permset, ACL_READ) < 0) {
244 log_warning_errno(errno, "Failed to patch ACL on %s, ignoring: %m", f->path);
245 return;
246 }
247
248 r = calc_acl_mask_if_needed(&acl);
249 if (r < 0) {
250 log_warning_errno(r, "Failed to patch ACL on %s, ignoring: %m", f->path);
251 return;
252 }
253
254 if (acl_set_fd(f->fd, acl) < 0)
255 log_warning_errno(errno, "Failed to set ACL on %s, ignoring: %m", f->path);
256
257 #endif
258 }
259
260 static JournalFile* find_journal(Server *s, uid_t uid) {
261 _cleanup_free_ char *p = NULL;
262 int r;
263 JournalFile *f;
264 sd_id128_t machine;
265
266 assert(s);
267
268 /* We split up user logs only on /var, not on /run. If the
269 * runtime file is open, we write to it exclusively, in order
270 * to guarantee proper order as soon as we flush /run to
271 * /var and close the runtime file. */
272
273 if (s->runtime_journal)
274 return s->runtime_journal;
275
276 if (uid <= SYSTEM_UID_MAX)
277 return s->system_journal;
278
279 r = sd_id128_get_machine(&machine);
280 if (r < 0)
281 return s->system_journal;
282
283 f = ordered_hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
284 if (f)
285 return f;
286
287 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-"UID_FMT".journal",
288 SD_ID128_FORMAT_VAL(machine), uid) < 0)
289 return s->system_journal;
290
291 while (ordered_hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
292 /* Too many open? Then let's close one */
293 f = ordered_hashmap_steal_first(s->user_journals);
294 assert(f);
295 journal_file_close(f);
296 }
297
298 r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &f);
299 if (r < 0)
300 return s->system_journal;
301
302 server_fix_perms(s, f, uid);
303
304 r = ordered_hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
305 if (r < 0) {
306 journal_file_close(f);
307 return s->system_journal;
308 }
309
310 return f;
311 }
312
313 static int do_rotate(
314 Server *s,
315 JournalFile **f,
316 const char* name,
317 bool seal,
318 uint32_t uid) {
319
320 int r;
321 assert(s);
322
323 if (!*f)
324 return -EINVAL;
325
326 r = journal_file_rotate(f, s->compress, seal);
327 if (r < 0)
328 if (*f)
329 log_error_errno(r, "Failed to rotate %s: %m", (*f)->path);
330 else
331 log_error_errno(r, "Failed to create new %s journal: %m", name);
332 else
333 server_fix_perms(s, *f, uid);
334
335 return r;
336 }
337
338 void server_rotate(Server *s) {
339 JournalFile *f;
340 void *k;
341 Iterator i;
342 int r;
343
344 log_debug("Rotating...");
345
346 (void) do_rotate(s, &s->runtime_journal, "runtime", false, 0);
347 (void) do_rotate(s, &s->system_journal, "system", s->seal, 0);
348
349 ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
350 r = do_rotate(s, &f, "user", s->seal, PTR_TO_UINT32(k));
351 if (r >= 0)
352 ordered_hashmap_replace(s->user_journals, k, f);
353 else if (!f)
354 /* Old file has been closed and deallocated */
355 ordered_hashmap_remove(s->user_journals, k);
356 }
357 }
358
359 void server_sync(Server *s) {
360 JournalFile *f;
361 void *k;
362 Iterator i;
363 int r;
364
365 if (s->system_journal) {
366 r = journal_file_set_offline(s->system_journal);
367 if (r < 0)
368 log_warning_errno(r, "Failed to sync system journal, ignoring: %m");
369 }
370
371 ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
372 r = journal_file_set_offline(f);
373 if (r < 0)
374 log_warning_errno(r, "Failed to sync user journal, ignoring: %m");
375 }
376
377 if (s->sync_event_source) {
378 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
379 if (r < 0)
380 log_error_errno(r, "Failed to disable sync timer source: %m");
381 }
382
383 s->sync_scheduled = false;
384 }
385
386 static void do_vacuum(
387 Server *s,
388 JournalFile *f,
389 JournalMetrics *metrics,
390 const char *path,
391 const char *name,
392 bool verbose,
393 bool patch_min_use) {
394
395 const char *p;
396 uint64_t limit;
397 int r;
398
399 assert(s);
400 assert(metrics);
401 assert(path);
402 assert(name);
403
404 if (!f)
405 return;
406
407 p = strjoina(path, SERVER_MACHINE_ID(s));
408
409 limit = metrics->max_use;
410 (void) determine_space_for(s, metrics, path, name, verbose, patch_min_use, NULL, &limit);
411
412 r = journal_directory_vacuum(p, limit, metrics->n_max_files, s->max_retention_usec, &s->oldest_file_usec, verbose);
413 if (r < 0 && r != -ENOENT)
414 log_warning_errno(r, "Failed to vacuum %s, ignoring: %m", p);
415 }
416
417 int server_vacuum(Server *s, bool verbose, bool patch_min_use) {
418 assert(s);
419
420 log_debug("Vacuuming...");
421
422 s->oldest_file_usec = 0;
423
424 do_vacuum(s, s->system_journal, &s->system_metrics, "/var/log/journal/", "System journal", verbose, patch_min_use);
425 do_vacuum(s, s->runtime_journal, &s->runtime_metrics, "/run/log/journal/", "Runtime journal", verbose, patch_min_use);
426
427 s->cached_space_limit = 0;
428 s->cached_space_available = 0;
429 s->cached_space_timestamp = 0;
430
431 return 0;
432 }
433
434 static void server_cache_machine_id(Server *s) {
435 sd_id128_t id;
436 int r;
437
438 assert(s);
439
440 r = sd_id128_get_machine(&id);
441 if (r < 0)
442 return;
443
444 sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
445 }
446
447 static void server_cache_boot_id(Server *s) {
448 sd_id128_t id;
449 int r;
450
451 assert(s);
452
453 r = sd_id128_get_boot(&id);
454 if (r < 0)
455 return;
456
457 sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
458 }
459
460 static void server_cache_hostname(Server *s) {
461 _cleanup_free_ char *t = NULL;
462 char *x;
463
464 assert(s);
465
466 t = gethostname_malloc();
467 if (!t)
468 return;
469
470 x = strappend("_HOSTNAME=", t);
471 if (!x)
472 return;
473
474 free(s->hostname_field);
475 s->hostname_field = x;
476 }
477
478 static bool shall_try_append_again(JournalFile *f, int r) {
479
480 /* -E2BIG Hit configured limit
481 -EFBIG Hit fs limit
482 -EDQUOT Quota limit hit
483 -ENOSPC Disk full
484 -EIO I/O error of some kind (mmap)
485 -EHOSTDOWN Other machine
486 -EBUSY Unclean shutdown
487 -EPROTONOSUPPORT Unsupported feature
488 -EBADMSG Corrupted
489 -ENODATA Truncated
490 -ESHUTDOWN Already archived
491 -EIDRM Journal file has been deleted */
492
493 if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
494 log_debug("%s: Allocation limit reached, rotating.", f->path);
495 else if (r == -EHOSTDOWN)
496 log_info("%s: Journal file from other machine, rotating.", f->path);
497 else if (r == -EBUSY)
498 log_info("%s: Unclean shutdown, rotating.", f->path);
499 else if (r == -EPROTONOSUPPORT)
500 log_info("%s: Unsupported feature, rotating.", f->path);
501 else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
502 log_warning("%s: Journal file corrupted, rotating.", f->path);
503 else if (r == -EIO)
504 log_warning("%s: IO error, rotating.", f->path);
505 else if (r == -EIDRM)
506 log_warning("%s: Journal file has been deleted, rotating.", f->path);
507 else
508 return false;
509
510 return true;
511 }
512
513 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
514 JournalFile *f;
515 bool vacuumed = false;
516 int r;
517
518 assert(s);
519 assert(iovec);
520 assert(n > 0);
521
522 f = find_journal(s, uid);
523 if (!f)
524 return;
525
526 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
527 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
528 server_rotate(s);
529 server_vacuum(s, false, false);
530 vacuumed = true;
531
532 f = find_journal(s, uid);
533 if (!f)
534 return;
535 }
536
537 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
538 if (r >= 0) {
539 server_schedule_sync(s, priority);
540 return;
541 }
542
543 if (vacuumed || !shall_try_append_again(f, r)) {
544 log_error_errno(r, "Failed to write entry (%d items, %zu bytes), ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
545 return;
546 }
547
548 server_rotate(s);
549 server_vacuum(s, false, false);
550
551 f = find_journal(s, uid);
552 if (!f)
553 return;
554
555 log_debug("Retrying write.");
556 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
557 if (r < 0)
558 log_error_errno(r, "Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
559 else
560 server_schedule_sync(s, priority);
561 }
562
563 static void dispatch_message_real(
564 Server *s,
565 struct iovec *iovec, unsigned n, unsigned m,
566 const struct ucred *ucred,
567 const struct timeval *tv,
568 const char *label, size_t label_len,
569 const char *unit_id,
570 int priority,
571 pid_t object_pid) {
572
573 char pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
574 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
575 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
576 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
577 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
578 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
579 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
580 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
581 uid_t object_uid;
582 gid_t object_gid;
583 char *x;
584 int r;
585 char *t, *c;
586 uid_t realuid = 0, owner = 0, journal_uid;
587 bool owner_valid = false;
588 #ifdef HAVE_AUDIT
589 char audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
590 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
591 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
592 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
593
594 uint32_t audit;
595 uid_t loginuid;
596 #endif
597
598 assert(s);
599 assert(iovec);
600 assert(n > 0);
601 assert(n + N_IOVEC_META_FIELDS + (object_pid ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
602
603 if (ucred) {
604 realuid = ucred->uid;
605
606 sprintf(pid, "_PID="PID_FMT, ucred->pid);
607 IOVEC_SET_STRING(iovec[n++], pid);
608
609 sprintf(uid, "_UID="UID_FMT, ucred->uid);
610 IOVEC_SET_STRING(iovec[n++], uid);
611
612 sprintf(gid, "_GID="GID_FMT, ucred->gid);
613 IOVEC_SET_STRING(iovec[n++], gid);
614
615 r = get_process_comm(ucred->pid, &t);
616 if (r >= 0) {
617 x = strjoina("_COMM=", t);
618 free(t);
619 IOVEC_SET_STRING(iovec[n++], x);
620 }
621
622 r = get_process_exe(ucred->pid, &t);
623 if (r >= 0) {
624 x = strjoina("_EXE=", t);
625 free(t);
626 IOVEC_SET_STRING(iovec[n++], x);
627 }
628
629 r = get_process_cmdline(ucred->pid, 0, false, &t);
630 if (r >= 0) {
631 x = strjoina("_CMDLINE=", t);
632 free(t);
633 IOVEC_SET_STRING(iovec[n++], x);
634 }
635
636 r = get_process_capeff(ucred->pid, &t);
637 if (r >= 0) {
638 x = strjoina("_CAP_EFFECTIVE=", t);
639 free(t);
640 IOVEC_SET_STRING(iovec[n++], x);
641 }
642
643 #ifdef HAVE_AUDIT
644 r = audit_session_from_pid(ucred->pid, &audit);
645 if (r >= 0) {
646 sprintf(audit_session, "_AUDIT_SESSION=%"PRIu32, audit);
647 IOVEC_SET_STRING(iovec[n++], audit_session);
648 }
649
650 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
651 if (r >= 0) {
652 sprintf(audit_loginuid, "_AUDIT_LOGINUID="UID_FMT, loginuid);
653 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
654 }
655 #endif
656
657 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &c);
658 if (r >= 0) {
659 char *session = NULL;
660
661 x = strjoina("_SYSTEMD_CGROUP=", c);
662 IOVEC_SET_STRING(iovec[n++], x);
663
664 r = cg_path_get_session(c, &t);
665 if (r >= 0) {
666 session = strjoina("_SYSTEMD_SESSION=", t);
667 free(t);
668 IOVEC_SET_STRING(iovec[n++], session);
669 }
670
671 if (cg_path_get_owner_uid(c, &owner) >= 0) {
672 owner_valid = true;
673
674 sprintf(owner_uid, "_SYSTEMD_OWNER_UID="UID_FMT, owner);
675 IOVEC_SET_STRING(iovec[n++], owner_uid);
676 }
677
678 if (cg_path_get_unit(c, &t) >= 0) {
679 x = strjoina("_SYSTEMD_UNIT=", t);
680 free(t);
681 IOVEC_SET_STRING(iovec[n++], x);
682 } else if (unit_id && !session) {
683 x = strjoina("_SYSTEMD_UNIT=", unit_id);
684 IOVEC_SET_STRING(iovec[n++], x);
685 }
686
687 if (cg_path_get_user_unit(c, &t) >= 0) {
688 x = strjoina("_SYSTEMD_USER_UNIT=", t);
689 free(t);
690 IOVEC_SET_STRING(iovec[n++], x);
691 } else if (unit_id && session) {
692 x = strjoina("_SYSTEMD_USER_UNIT=", unit_id);
693 IOVEC_SET_STRING(iovec[n++], x);
694 }
695
696 if (cg_path_get_slice(c, &t) >= 0) {
697 x = strjoina("_SYSTEMD_SLICE=", t);
698 free(t);
699 IOVEC_SET_STRING(iovec[n++], x);
700 }
701
702 free(c);
703 } else if (unit_id) {
704 x = strjoina("_SYSTEMD_UNIT=", unit_id);
705 IOVEC_SET_STRING(iovec[n++], x);
706 }
707
708 #ifdef HAVE_SELINUX
709 if (mac_selinux_use()) {
710 if (label) {
711 x = alloca(strlen("_SELINUX_CONTEXT=") + label_len + 1);
712
713 *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
714 IOVEC_SET_STRING(iovec[n++], x);
715 } else {
716 security_context_t con;
717
718 if (getpidcon(ucred->pid, &con) >= 0) {
719 x = strjoina("_SELINUX_CONTEXT=", con);
720
721 freecon(con);
722 IOVEC_SET_STRING(iovec[n++], x);
723 }
724 }
725 }
726 #endif
727 }
728 assert(n <= m);
729
730 if (object_pid) {
731 r = get_process_uid(object_pid, &object_uid);
732 if (r >= 0) {
733 sprintf(o_uid, "OBJECT_UID="UID_FMT, object_uid);
734 IOVEC_SET_STRING(iovec[n++], o_uid);
735 }
736
737 r = get_process_gid(object_pid, &object_gid);
738 if (r >= 0) {
739 sprintf(o_gid, "OBJECT_GID="GID_FMT, object_gid);
740 IOVEC_SET_STRING(iovec[n++], o_gid);
741 }
742
743 r = get_process_comm(object_pid, &t);
744 if (r >= 0) {
745 x = strjoina("OBJECT_COMM=", t);
746 free(t);
747 IOVEC_SET_STRING(iovec[n++], x);
748 }
749
750 r = get_process_exe(object_pid, &t);
751 if (r >= 0) {
752 x = strjoina("OBJECT_EXE=", t);
753 free(t);
754 IOVEC_SET_STRING(iovec[n++], x);
755 }
756
757 r = get_process_cmdline(object_pid, 0, false, &t);
758 if (r >= 0) {
759 x = strjoina("OBJECT_CMDLINE=", t);
760 free(t);
761 IOVEC_SET_STRING(iovec[n++], x);
762 }
763
764 #ifdef HAVE_AUDIT
765 r = audit_session_from_pid(object_pid, &audit);
766 if (r >= 0) {
767 sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%"PRIu32, audit);
768 IOVEC_SET_STRING(iovec[n++], o_audit_session);
769 }
770
771 r = audit_loginuid_from_pid(object_pid, &loginuid);
772 if (r >= 0) {
773 sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID="UID_FMT, loginuid);
774 IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
775 }
776 #endif
777
778 r = cg_pid_get_path_shifted(object_pid, s->cgroup_root, &c);
779 if (r >= 0) {
780 x = strjoina("OBJECT_SYSTEMD_CGROUP=", c);
781 IOVEC_SET_STRING(iovec[n++], x);
782
783 r = cg_path_get_session(c, &t);
784 if (r >= 0) {
785 x = strjoina("OBJECT_SYSTEMD_SESSION=", t);
786 free(t);
787 IOVEC_SET_STRING(iovec[n++], x);
788 }
789
790 if (cg_path_get_owner_uid(c, &owner) >= 0) {
791 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID="UID_FMT, owner);
792 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
793 }
794
795 if (cg_path_get_unit(c, &t) >= 0) {
796 x = strjoina("OBJECT_SYSTEMD_UNIT=", t);
797 free(t);
798 IOVEC_SET_STRING(iovec[n++], x);
799 }
800
801 if (cg_path_get_user_unit(c, &t) >= 0) {
802 x = strjoina("OBJECT_SYSTEMD_USER_UNIT=", t);
803 free(t);
804 IOVEC_SET_STRING(iovec[n++], x);
805 }
806
807 free(c);
808 }
809 }
810 assert(n <= m);
811
812 if (tv) {
813 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv));
814 IOVEC_SET_STRING(iovec[n++], source_time);
815 }
816
817 /* Note that strictly speaking storing the boot id here is
818 * redundant since the entry includes this in-line
819 * anyway. However, we need this indexed, too. */
820 if (!isempty(s->boot_id_field))
821 IOVEC_SET_STRING(iovec[n++], s->boot_id_field);
822
823 if (!isempty(s->machine_id_field))
824 IOVEC_SET_STRING(iovec[n++], s->machine_id_field);
825
826 if (!isempty(s->hostname_field))
827 IOVEC_SET_STRING(iovec[n++], s->hostname_field);
828
829 assert(n <= m);
830
831 if (s->split_mode == SPLIT_UID && realuid > 0)
832 /* Split up strictly by any UID */
833 journal_uid = realuid;
834 else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
835 /* Split up by login UIDs. We do this only if the
836 * realuid is not root, in order not to accidentally
837 * leak privileged information to the user that is
838 * logged by a privileged process that is part of an
839 * unprivileged session. */
840 journal_uid = owner;
841 else
842 journal_uid = 0;
843
844 write_to_journal(s, journal_uid, iovec, n, priority);
845 }
846
847 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
848 char mid[11 + 32 + 1];
849 char buffer[16 + LINE_MAX + 1];
850 struct iovec iovec[N_IOVEC_META_FIELDS + 6];
851 int n = 0;
852 va_list ap;
853 struct ucred ucred = {};
854
855 assert(s);
856 assert(format);
857
858 IOVEC_SET_STRING(iovec[n++], "SYSLOG_FACILITY=3");
859 IOVEC_SET_STRING(iovec[n++], "SYSLOG_IDENTIFIER=systemd-journald");
860
861 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
862 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
863
864 memcpy(buffer, "MESSAGE=", 8);
865 va_start(ap, format);
866 vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
867 va_end(ap);
868 IOVEC_SET_STRING(iovec[n++], buffer);
869
870 if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
871 snprintf(mid, sizeof(mid), LOG_MESSAGE_ID(message_id));
872 IOVEC_SET_STRING(iovec[n++], mid);
873 }
874
875 ucred.pid = getpid();
876 ucred.uid = getuid();
877 ucred.gid = getgid();
878
879 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
880 }
881
882 void server_dispatch_message(
883 Server *s,
884 struct iovec *iovec, unsigned n, unsigned m,
885 const struct ucred *ucred,
886 const struct timeval *tv,
887 const char *label, size_t label_len,
888 const char *unit_id,
889 int priority,
890 pid_t object_pid) {
891
892 int rl, r;
893 _cleanup_free_ char *path = NULL;
894 uint64_t available = 0;
895 char *c;
896
897 assert(s);
898 assert(iovec || n == 0);
899
900 if (n == 0)
901 return;
902
903 if (LOG_PRI(priority) > s->max_level_store)
904 return;
905
906 /* Stop early in case the information will not be stored
907 * in a journal. */
908 if (s->storage == STORAGE_NONE)
909 return;
910
911 if (!ucred)
912 goto finish;
913
914 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &path);
915 if (r < 0)
916 goto finish;
917
918 /* example: /user/lennart/3/foobar
919 * /system/dbus.service/foobar
920 *
921 * So let's cut of everything past the third /, since that is
922 * where user directories start */
923
924 c = strchr(path, '/');
925 if (c) {
926 c = strchr(c+1, '/');
927 if (c) {
928 c = strchr(c+1, '/');
929 if (c)
930 *c = 0;
931 }
932 }
933
934 (void) determine_space(s, false, false, &available, NULL);
935 rl = journal_rate_limit_test(s->rate_limit, path, priority & LOG_PRIMASK, available);
936 if (rl == 0)
937 return;
938
939 /* Write a suppression message if we suppressed something */
940 if (rl > 1)
941 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
942 "Suppressed %u messages from %s", rl - 1, path);
943
944 finish:
945 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, priority, object_pid);
946 }
947
948
949 static int system_journal_open(Server *s, bool flush_requested) {
950 const char *fn;
951 int r = 0;
952
953 if (!s->system_journal &&
954 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
955 (flush_requested
956 || access("/run/systemd/journal/flushed", F_OK) >= 0)) {
957
958 /* If in auto mode: first try to create the machine
959 * path, but not the prefix.
960 *
961 * If in persistent mode: create /var/log/journal and
962 * the machine path */
963
964 if (s->storage == STORAGE_PERSISTENT)
965 (void) mkdir_p("/var/log/journal/", 0755);
966
967 fn = strjoina("/var/log/journal/", SERVER_MACHINE_ID(s));
968 (void) mkdir(fn, 0755);
969
970 fn = strjoina(fn, "/system.journal");
971 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
972 if (r >= 0) {
973 server_fix_perms(s, s->system_journal, 0);
974 (void) determine_space_for(s, &s->system_metrics, "/var/log/journal/", "System journal", true, true, NULL, NULL);
975 } else if (r < 0) {
976 if (r != -ENOENT && r != -EROFS)
977 log_warning_errno(r, "Failed to open system journal: %m");
978
979 r = 0;
980 }
981 }
982
983 if (!s->runtime_journal &&
984 (s->storage != STORAGE_NONE)) {
985
986 fn = strjoina("/run/log/journal/", SERVER_MACHINE_ID(s), "/system.journal");
987
988 if (s->system_journal) {
989
990 /* Try to open the runtime journal, but only
991 * if it already exists, so that we can flush
992 * it into the system journal */
993
994 r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
995 if (r < 0) {
996 if (r != -ENOENT)
997 log_warning_errno(r, "Failed to open runtime journal: %m");
998
999 r = 0;
1000 }
1001
1002 } else {
1003
1004 /* OK, we really need the runtime journal, so create
1005 * it if necessary. */
1006
1007 (void) mkdir("/run/log", 0755);
1008 (void) mkdir("/run/log/journal", 0755);
1009 (void) mkdir_parents(fn, 0750);
1010
1011 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
1012 if (r < 0)
1013 return log_error_errno(r, "Failed to open runtime journal: %m");
1014 }
1015
1016 if (s->runtime_journal) {
1017 server_fix_perms(s, s->runtime_journal, 0);
1018 (void) determine_space_for(s, &s->runtime_metrics, "/run/log/journal/", "Runtime journal", true, true, NULL, NULL);
1019 }
1020 }
1021
1022 return r;
1023 }
1024
1025 int server_flush_to_var(Server *s) {
1026 sd_id128_t machine;
1027 sd_journal *j = NULL;
1028 char ts[FORMAT_TIMESPAN_MAX];
1029 usec_t start;
1030 unsigned n = 0;
1031 int r;
1032
1033 assert(s);
1034
1035 if (s->storage != STORAGE_AUTO &&
1036 s->storage != STORAGE_PERSISTENT)
1037 return 0;
1038
1039 if (!s->runtime_journal)
1040 return 0;
1041
1042 (void) system_journal_open(s, true);
1043
1044 if (!s->system_journal)
1045 return 0;
1046
1047 log_debug("Flushing to /var...");
1048
1049 start = now(CLOCK_MONOTONIC);
1050
1051 r = sd_id128_get_machine(&machine);
1052 if (r < 0)
1053 return r;
1054
1055 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
1056 if (r < 0)
1057 return log_error_errno(r, "Failed to read runtime journal: %m");
1058
1059 sd_journal_set_data_threshold(j, 0);
1060
1061 SD_JOURNAL_FOREACH(j) {
1062 Object *o = NULL;
1063 JournalFile *f;
1064
1065 f = j->current_file;
1066 assert(f && f->current_offset > 0);
1067
1068 n++;
1069
1070 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1071 if (r < 0) {
1072 log_error_errno(r, "Can't read entry: %m");
1073 goto finish;
1074 }
1075
1076 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1077 if (r >= 0)
1078 continue;
1079
1080 if (!shall_try_append_again(s->system_journal, r)) {
1081 log_error_errno(r, "Can't write entry: %m");
1082 goto finish;
1083 }
1084
1085 server_rotate(s);
1086 server_vacuum(s, false, false);
1087
1088 if (!s->system_journal) {
1089 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1090 r = -EIO;
1091 goto finish;
1092 }
1093
1094 log_debug("Retrying write.");
1095 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1096 if (r < 0) {
1097 log_error_errno(r, "Can't write entry: %m");
1098 goto finish;
1099 }
1100 }
1101
1102 r = 0;
1103
1104 finish:
1105 journal_file_post_change(s->system_journal);
1106
1107 s->runtime_journal = journal_file_close(s->runtime_journal);
1108
1109 if (r >= 0)
1110 (void) rm_rf("/run/log/journal", REMOVE_ROOT);
1111
1112 sd_journal_close(j);
1113
1114 server_driver_message(s, SD_ID128_NULL, "Time spent on flushing to /var is %s for %u entries.", format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0), n);
1115
1116 return r;
1117 }
1118
1119 int server_process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1120 Server *s = userdata;
1121 struct ucred *ucred = NULL;
1122 struct timeval *tv = NULL;
1123 struct cmsghdr *cmsg;
1124 char *label = NULL;
1125 size_t label_len = 0, m;
1126 struct iovec iovec;
1127 ssize_t n;
1128 int *fds = NULL, v = 0;
1129 unsigned n_fds = 0;
1130
1131 union {
1132 struct cmsghdr cmsghdr;
1133
1134 /* We use NAME_MAX space for the SELinux label
1135 * here. The kernel currently enforces no
1136 * limit, but according to suggestions from
1137 * the SELinux people this will change and it
1138 * will probably be identical to NAME_MAX. For
1139 * now we use that, but this should be updated
1140 * one day when the final limit is known. */
1141 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1142 CMSG_SPACE(sizeof(struct timeval)) +
1143 CMSG_SPACE(sizeof(int)) + /* fd */
1144 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1145 } control = {};
1146
1147 union sockaddr_union sa = {};
1148
1149 struct msghdr msghdr = {
1150 .msg_iov = &iovec,
1151 .msg_iovlen = 1,
1152 .msg_control = &control,
1153 .msg_controllen = sizeof(control),
1154 .msg_name = &sa,
1155 .msg_namelen = sizeof(sa),
1156 };
1157
1158 assert(s);
1159 assert(fd == s->native_fd || fd == s->syslog_fd || fd == s->audit_fd);
1160
1161 if (revents != EPOLLIN) {
1162 log_error("Got invalid event from epoll for datagram fd: %"PRIx32, revents);
1163 return -EIO;
1164 }
1165
1166 /* Try to get the right size, if we can. (Not all
1167 * sockets support SIOCINQ, hence we just try, but
1168 * don't rely on it. */
1169 (void) ioctl(fd, SIOCINQ, &v);
1170
1171 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1172 m = PAGE_ALIGN(MAX3((size_t) v + 1,
1173 (size_t) LINE_MAX,
1174 ALIGN(sizeof(struct nlmsghdr)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH)) + 1);
1175
1176 if (!GREEDY_REALLOC(s->buffer, s->buffer_size, m))
1177 return log_oom();
1178
1179 iovec.iov_base = s->buffer;
1180 iovec.iov_len = s->buffer_size - 1; /* Leave room for trailing NUL we add later */
1181
1182 n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1183 if (n < 0) {
1184 if (errno == EINTR || errno == EAGAIN)
1185 return 0;
1186
1187 return log_error_errno(errno, "recvmsg() failed: %m");
1188 }
1189
1190 CMSG_FOREACH(cmsg, &msghdr) {
1191
1192 if (cmsg->cmsg_level == SOL_SOCKET &&
1193 cmsg->cmsg_type == SCM_CREDENTIALS &&
1194 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1195 ucred = (struct ucred*) CMSG_DATA(cmsg);
1196 else if (cmsg->cmsg_level == SOL_SOCKET &&
1197 cmsg->cmsg_type == SCM_SECURITY) {
1198 label = (char*) CMSG_DATA(cmsg);
1199 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1200 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1201 cmsg->cmsg_type == SO_TIMESTAMP &&
1202 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1203 tv = (struct timeval*) CMSG_DATA(cmsg);
1204 else if (cmsg->cmsg_level == SOL_SOCKET &&
1205 cmsg->cmsg_type == SCM_RIGHTS) {
1206 fds = (int*) CMSG_DATA(cmsg);
1207 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1208 }
1209 }
1210
1211 /* And a trailing NUL, just in case */
1212 s->buffer[n] = 0;
1213
1214 if (fd == s->syslog_fd) {
1215 if (n > 0 && n_fds == 0)
1216 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1217 else if (n_fds > 0)
1218 log_warning("Got file descriptors via syslog socket. Ignoring.");
1219
1220 } else if (fd == s->native_fd) {
1221 if (n > 0 && n_fds == 0)
1222 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1223 else if (n == 0 && n_fds == 1)
1224 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1225 else if (n_fds > 0)
1226 log_warning("Got too many file descriptors via native socket. Ignoring.");
1227
1228 } else {
1229 assert(fd == s->audit_fd);
1230
1231 if (n > 0 && n_fds == 0)
1232 server_process_audit_message(s, s->buffer, n, ucred, &sa, msghdr.msg_namelen);
1233 else if (n_fds > 0)
1234 log_warning("Got file descriptors via audit socket. Ignoring.");
1235 }
1236
1237 close_many(fds, n_fds);
1238 return 0;
1239 }
1240
1241 static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1242 Server *s = userdata;
1243
1244 assert(s);
1245
1246 log_info("Received request to flush runtime journal from PID " PID_FMT, si->ssi_pid);
1247
1248 server_flush_to_var(s);
1249 server_sync(s);
1250 server_vacuum(s, false, false);
1251
1252 (void) touch("/run/systemd/journal/flushed");
1253
1254 return 0;
1255 }
1256
1257 static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1258 Server *s = userdata;
1259
1260 assert(s);
1261
1262 log_info("Received request to rotate journal from PID " PID_FMT, si->ssi_pid);
1263 server_rotate(s);
1264 server_vacuum(s, true, true);
1265
1266 return 0;
1267 }
1268
1269 static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1270 Server *s = userdata;
1271
1272 assert(s);
1273
1274 log_received_signal(LOG_INFO, si);
1275
1276 sd_event_exit(s->event, 0);
1277 return 0;
1278 }
1279
1280 static int dispatch_sigrtmin1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1281 Server *s = userdata;
1282
1283 assert(s);
1284
1285 log_debug("Received request to sync from PID " PID_FMT, si->ssi_pid);
1286
1287 server_sync(s);
1288
1289 /* Let clients know when the most recent sync happened. */
1290 (void) touch("/run/systemd/journal/synced");
1291
1292 return 0;
1293 }
1294
1295 static int setup_signals(Server *s) {
1296 int r;
1297
1298 assert(s);
1299
1300 assert(sigprocmask_many(SIG_SETMASK, NULL, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, SIGRTMIN+1, -1) >= 0);
1301
1302 r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
1303 if (r < 0)
1304 return r;
1305
1306 r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s);
1307 if (r < 0)
1308 return r;
1309
1310 r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s);
1311 if (r < 0)
1312 return r;
1313
1314 /* Let's process SIGTERM late, so that we flush all queued
1315 * messages to disk before we exit */
1316 r = sd_event_source_set_priority(s->sigterm_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1317 if (r < 0)
1318 return r;
1319
1320 /* When journald is invoked on the terminal (when debugging),
1321 * it's useful if C-c is handled equivalent to SIGTERM. */
1322 r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s);
1323 if (r < 0)
1324 return r;
1325
1326 r = sd_event_source_set_priority(s->sigint_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1327 if (r < 0)
1328 return r;
1329
1330 /* SIGRTMIN+1 causes an immediate sync. We process this very
1331 * late, so that everything else queued at this point is
1332 * really written to disk. Clients can watch
1333 * /run/systemd/journal/synced with inotify until its mtime
1334 * changes to see when a sync happened. */
1335 r = sd_event_add_signal(s->event, &s->sigrtmin1_event_source, SIGRTMIN+1, dispatch_sigrtmin1, s);
1336 if (r < 0)
1337 return r;
1338
1339 r = sd_event_source_set_priority(s->sigrtmin1_event_source, SD_EVENT_PRIORITY_NORMAL+15);
1340 if (r < 0)
1341 return r;
1342
1343 return 0;
1344 }
1345
1346 static int server_parse_proc_cmdline(Server *s) {
1347 _cleanup_free_ char *line = NULL;
1348 const char *p;
1349 int r;
1350
1351 r = proc_cmdline(&line);
1352 if (r < 0) {
1353 log_warning_errno(r, "Failed to read /proc/cmdline, ignoring: %m");
1354 return 0;
1355 }
1356
1357 p = line;
1358 for(;;) {
1359 _cleanup_free_ char *word;
1360
1361 r = extract_first_word(&p, &word, NULL, 0);
1362 if (r < 0)
1363 return log_error_errno(r, "Failed to parse journald syntax \"%s\": %m", line);
1364
1365 if (r == 0)
1366 break;
1367
1368 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1369 r = parse_boolean(word + 35);
1370 if (r < 0)
1371 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1372 else
1373 s->forward_to_syslog = r;
1374 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1375 r = parse_boolean(word + 33);
1376 if (r < 0)
1377 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1378 else
1379 s->forward_to_kmsg = r;
1380 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1381 r = parse_boolean(word + 36);
1382 if (r < 0)
1383 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1384 else
1385 s->forward_to_console = r;
1386 } else if (startswith(word, "systemd.journald.forward_to_wall=")) {
1387 r = parse_boolean(word + 33);
1388 if (r < 0)
1389 log_warning("Failed to parse forward to wall switch %s. Ignoring.", word + 33);
1390 else
1391 s->forward_to_wall = r;
1392 } else if (startswith(word, "systemd.journald"))
1393 log_warning("Invalid systemd.journald parameter. Ignoring.");
1394 }
1395
1396 /* do not warn about state here, since probably systemd already did */
1397 return 0;
1398 }
1399
1400 static int server_parse_config_file(Server *s) {
1401 assert(s);
1402
1403 return config_parse_many(PKGSYSCONFDIR "/journald.conf",
1404 CONF_PATHS_NULSTR("systemd/journald.conf.d"),
1405 "Journal\0",
1406 config_item_perf_lookup, journald_gperf_lookup,
1407 false, s);
1408 }
1409
1410 static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
1411 Server *s = userdata;
1412
1413 assert(s);
1414
1415 server_sync(s);
1416 return 0;
1417 }
1418
1419 int server_schedule_sync(Server *s, int priority) {
1420 int r;
1421
1422 assert(s);
1423
1424 if (priority <= LOG_CRIT) {
1425 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1426 server_sync(s);
1427 return 0;
1428 }
1429
1430 if (s->sync_scheduled)
1431 return 0;
1432
1433 if (s->sync_interval_usec > 0) {
1434 usec_t when;
1435
1436 r = sd_event_now(s->event, CLOCK_MONOTONIC, &when);
1437 if (r < 0)
1438 return r;
1439
1440 when += s->sync_interval_usec;
1441
1442 if (!s->sync_event_source) {
1443 r = sd_event_add_time(
1444 s->event,
1445 &s->sync_event_source,
1446 CLOCK_MONOTONIC,
1447 when, 0,
1448 server_dispatch_sync, s);
1449 if (r < 0)
1450 return r;
1451
1452 r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
1453 } else {
1454 r = sd_event_source_set_time(s->sync_event_source, when);
1455 if (r < 0)
1456 return r;
1457
1458 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
1459 }
1460 if (r < 0)
1461 return r;
1462
1463 s->sync_scheduled = true;
1464 }
1465
1466 return 0;
1467 }
1468
1469 static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1470 Server *s = userdata;
1471
1472 assert(s);
1473
1474 server_cache_hostname(s);
1475 return 0;
1476 }
1477
1478 static int server_open_hostname(Server *s) {
1479 int r;
1480
1481 assert(s);
1482
1483 s->hostname_fd = open("/proc/sys/kernel/hostname", O_RDONLY|O_CLOEXEC|O_NDELAY|O_NOCTTY);
1484 if (s->hostname_fd < 0)
1485 return log_error_errno(errno, "Failed to open /proc/sys/kernel/hostname: %m");
1486
1487 r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s);
1488 if (r < 0) {
1489 /* kernels prior to 3.2 don't support polling this file. Ignore
1490 * the failure. */
1491 if (r == -EPERM) {
1492 log_warning_errno(r, "Failed to register hostname fd in event loop, ignoring: %m");
1493 s->hostname_fd = safe_close(s->hostname_fd);
1494 return 0;
1495 }
1496
1497 return log_error_errno(r, "Failed to register hostname fd in event loop: %m");
1498 }
1499
1500 r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
1501 if (r < 0)
1502 return log_error_errno(r, "Failed to adjust priority of host name event source: %m");
1503
1504 return 0;
1505 }
1506
1507 static int dispatch_notify_event(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1508 Server *s = userdata;
1509 int r;
1510
1511 assert(s);
1512 assert(s->notify_event_source == es);
1513 assert(s->notify_fd == fd);
1514
1515 /* The $NOTIFY_SOCKET is writable again, now send exactly one
1516 * message on it. Either it's the wtachdog event, the initial
1517 * READY=1 event or an stdout stream event. If there's nothing
1518 * to write anymore, turn our event source off. The next time
1519 * there's something to send it will be turned on again. */
1520
1521 if (!s->sent_notify_ready) {
1522 static const char p[] =
1523 "READY=1\n"
1524 "STATUS=Processing requests...";
1525 ssize_t l;
1526
1527 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1528 if (l < 0) {
1529 if (errno == EAGAIN)
1530 return 0;
1531
1532 return log_error_errno(errno, "Failed to send READY=1 notification message: %m");
1533 }
1534
1535 s->sent_notify_ready = true;
1536 log_debug("Sent READY=1 notification.");
1537
1538 } else if (s->send_watchdog) {
1539
1540 static const char p[] =
1541 "WATCHDOG=1";
1542
1543 ssize_t l;
1544
1545 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1546 if (l < 0) {
1547 if (errno == EAGAIN)
1548 return 0;
1549
1550 return log_error_errno(errno, "Failed to send WATCHDOG=1 notification message: %m");
1551 }
1552
1553 s->send_watchdog = false;
1554 log_debug("Sent WATCHDOG=1 notification.");
1555
1556 } else if (s->stdout_streams_notify_queue)
1557 /* Dispatch one stream notification event */
1558 stdout_stream_send_notify(s->stdout_streams_notify_queue);
1559
1560 /* Leave us enabled if there's still more to to do. */
1561 if (s->send_watchdog || s->stdout_streams_notify_queue)
1562 return 0;
1563
1564 /* There was nothing to do anymore, let's turn ourselves off. */
1565 r = sd_event_source_set_enabled(es, SD_EVENT_OFF);
1566 if (r < 0)
1567 return log_error_errno(r, "Failed to turn off notify event source: %m");
1568
1569 return 0;
1570 }
1571
1572 static int dispatch_watchdog(sd_event_source *es, uint64_t usec, void *userdata) {
1573 Server *s = userdata;
1574 int r;
1575
1576 assert(s);
1577
1578 s->send_watchdog = true;
1579
1580 r = sd_event_source_set_enabled(s->notify_event_source, SD_EVENT_ON);
1581 if (r < 0)
1582 log_warning_errno(r, "Failed to turn on notify event source: %m");
1583
1584 r = sd_event_source_set_time(s->watchdog_event_source, usec + s->watchdog_usec / 2);
1585 if (r < 0)
1586 return log_error_errno(r, "Failed to restart watchdog event source: %m");
1587
1588 r = sd_event_source_set_enabled(s->watchdog_event_source, SD_EVENT_ON);
1589 if (r < 0)
1590 return log_error_errno(r, "Failed to enable watchdog event source: %m");
1591
1592 return 0;
1593 }
1594
1595 static int server_connect_notify(Server *s) {
1596 union sockaddr_union sa = {
1597 .un.sun_family = AF_UNIX,
1598 };
1599 const char *e;
1600 int r;
1601
1602 assert(s);
1603 assert(s->notify_fd < 0);
1604 assert(!s->notify_event_source);
1605
1606 /*
1607 So here's the problem: we'd like to send notification
1608 messages to PID 1, but we cannot do that via sd_notify(),
1609 since that's synchronous, and we might end up blocking on
1610 it. Specifically: given that PID 1 might block on
1611 dbus-daemon during IPC, and dbus-daemon is logging to us,
1612 and might hence block on us, we might end up in a deadlock
1613 if we block on sending PID 1 notification messages -- by
1614 generating a full blocking circle. To avoid this, let's
1615 create a non-blocking socket, and connect it to the
1616 notification socket, and then wait for POLLOUT before we
1617 send anything. This should efficiently avoid any deadlocks,
1618 as we'll never block on PID 1, hence PID 1 can safely block
1619 on dbus-daemon which can safely block on us again.
1620
1621 Don't think that this issue is real? It is, see:
1622 https://github.com/systemd/systemd/issues/1505
1623 */
1624
1625 e = getenv("NOTIFY_SOCKET");
1626 if (!e)
1627 return 0;
1628
1629 if ((e[0] != '@' && e[0] != '/') || e[1] == 0) {
1630 log_error("NOTIFY_SOCKET set to an invalid value: %s", e);
1631 return -EINVAL;
1632 }
1633
1634 if (strlen(e) > sizeof(sa.un.sun_path)) {
1635 log_error("NOTIFY_SOCKET path too long: %s", e);
1636 return -EINVAL;
1637 }
1638
1639 s->notify_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
1640 if (s->notify_fd < 0)
1641 return log_error_errno(errno, "Failed to create notify socket: %m");
1642
1643 (void) fd_inc_sndbuf(s->notify_fd, NOTIFY_SNDBUF_SIZE);
1644
1645 strncpy(sa.un.sun_path, e, sizeof(sa.un.sun_path));
1646 if (sa.un.sun_path[0] == '@')
1647 sa.un.sun_path[0] = 0;
1648
1649 r = connect(s->notify_fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(e));
1650 if (r < 0)
1651 return log_error_errno(errno, "Failed to connect to notify socket: %m");
1652
1653 r = sd_event_add_io(s->event, &s->notify_event_source, s->notify_fd, EPOLLOUT, dispatch_notify_event, s);
1654 if (r < 0)
1655 return log_error_errno(r, "Failed to watch notification socket: %m");
1656
1657 if (sd_watchdog_enabled(false, &s->watchdog_usec) > 0) {
1658 s->send_watchdog = true;
1659
1660 r = sd_event_add_time(s->event, &s->watchdog_event_source, CLOCK_MONOTONIC, now(CLOCK_MONOTONIC) + s->watchdog_usec/2, s->watchdog_usec*3/4, dispatch_watchdog, s);
1661 if (r < 0)
1662 return log_error_errno(r, "Failed to add watchdog time event: %m");
1663 }
1664
1665 /* This should fire pretty soon, which we'll use to send the
1666 * READY=1 event. */
1667
1668 return 0;
1669 }
1670
1671 int server_init(Server *s) {
1672 _cleanup_fdset_free_ FDSet *fds = NULL;
1673 int n, r, fd;
1674 bool no_sockets;
1675
1676 assert(s);
1677
1678 zero(*s);
1679 s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->audit_fd = s->hostname_fd = s->notify_fd = -1;
1680 s->compress = true;
1681 s->seal = true;
1682
1683 s->watchdog_usec = USEC_INFINITY;
1684
1685 s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1686 s->sync_scheduled = false;
1687
1688 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1689 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1690
1691 s->forward_to_wall = true;
1692
1693 s->max_file_usec = DEFAULT_MAX_FILE_USEC;
1694
1695 s->max_level_store = LOG_DEBUG;
1696 s->max_level_syslog = LOG_DEBUG;
1697 s->max_level_kmsg = LOG_NOTICE;
1698 s->max_level_console = LOG_INFO;
1699 s->max_level_wall = LOG_EMERG;
1700
1701 journal_reset_metrics(&s->system_metrics);
1702 journal_reset_metrics(&s->runtime_metrics);
1703
1704 server_parse_config_file(s);
1705 server_parse_proc_cmdline(s);
1706
1707 if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1708 log_debug("Setting both rate limit interval and burst from "USEC_FMT",%u to 0,0",
1709 s->rate_limit_interval, s->rate_limit_burst);
1710 s->rate_limit_interval = s->rate_limit_burst = 0;
1711 }
1712
1713 (void) mkdir_p("/run/systemd/journal", 0755);
1714
1715 s->user_journals = ordered_hashmap_new(NULL);
1716 if (!s->user_journals)
1717 return log_oom();
1718
1719 s->mmap = mmap_cache_new();
1720 if (!s->mmap)
1721 return log_oom();
1722
1723 r = sd_event_default(&s->event);
1724 if (r < 0)
1725 return log_error_errno(r, "Failed to create event loop: %m");
1726
1727 n = sd_listen_fds(true);
1728 if (n < 0)
1729 return log_error_errno(n, "Failed to read listening file descriptors from environment: %m");
1730
1731 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1732
1733 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1734
1735 if (s->native_fd >= 0) {
1736 log_error("Too many native sockets passed.");
1737 return -EINVAL;
1738 }
1739
1740 s->native_fd = fd;
1741
1742 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1743
1744 if (s->stdout_fd >= 0) {
1745 log_error("Too many stdout sockets passed.");
1746 return -EINVAL;
1747 }
1748
1749 s->stdout_fd = fd;
1750
1751 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0 ||
1752 sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/dev-log", 0) > 0) {
1753
1754 if (s->syslog_fd >= 0) {
1755 log_error("Too many /dev/log sockets passed.");
1756 return -EINVAL;
1757 }
1758
1759 s->syslog_fd = fd;
1760
1761 } else if (sd_is_socket(fd, AF_NETLINK, SOCK_RAW, -1) > 0) {
1762
1763 if (s->audit_fd >= 0) {
1764 log_error("Too many audit sockets passed.");
1765 return -EINVAL;
1766 }
1767
1768 s->audit_fd = fd;
1769
1770 } else {
1771
1772 if (!fds) {
1773 fds = fdset_new();
1774 if (!fds)
1775 return log_oom();
1776 }
1777
1778 r = fdset_put(fds, fd);
1779 if (r < 0)
1780 return log_oom();
1781 }
1782 }
1783
1784 /* Try to restore streams, but don't bother if this fails */
1785 (void) server_restore_streams(s, fds);
1786
1787 if (fdset_size(fds) > 0) {
1788 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds));
1789 fds = fdset_free(fds);
1790 }
1791
1792 no_sockets = s->native_fd < 0 && s->stdout_fd < 0 && s->syslog_fd < 0 && s->audit_fd < 0;
1793
1794 /* always open stdout, syslog, native, and kmsg sockets */
1795
1796 /* systemd-journald.socket: /run/systemd/journal/stdout */
1797 r = server_open_stdout_socket(s);
1798 if (r < 0)
1799 return r;
1800
1801 /* systemd-journald-dev-log.socket: /run/systemd/journal/dev-log */
1802 r = server_open_syslog_socket(s);
1803 if (r < 0)
1804 return r;
1805
1806 /* systemd-journald.socket: /run/systemd/journal/socket */
1807 r = server_open_native_socket(s);
1808 if (r < 0)
1809 return r;
1810
1811 /* /dev/ksmg */
1812 r = server_open_dev_kmsg(s);
1813 if (r < 0)
1814 return r;
1815
1816 /* Unless we got *some* sockets and not audit, open audit socket */
1817 if (s->audit_fd >= 0 || no_sockets) {
1818 r = server_open_audit(s);
1819 if (r < 0)
1820 return r;
1821 }
1822
1823 r = server_open_kernel_seqnum(s);
1824 if (r < 0)
1825 return r;
1826
1827 r = server_open_hostname(s);
1828 if (r < 0)
1829 return r;
1830
1831 r = setup_signals(s);
1832 if (r < 0)
1833 return r;
1834
1835 s->udev = udev_new();
1836 if (!s->udev)
1837 return -ENOMEM;
1838
1839 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
1840 if (!s->rate_limit)
1841 return -ENOMEM;
1842
1843 r = cg_get_root_path(&s->cgroup_root);
1844 if (r < 0)
1845 return r;
1846
1847 server_cache_hostname(s);
1848 server_cache_boot_id(s);
1849 server_cache_machine_id(s);
1850
1851 (void) server_connect_notify(s);
1852
1853 return system_journal_open(s, false);
1854 }
1855
1856 void server_maybe_append_tags(Server *s) {
1857 #ifdef HAVE_GCRYPT
1858 JournalFile *f;
1859 Iterator i;
1860 usec_t n;
1861
1862 n = now(CLOCK_REALTIME);
1863
1864 if (s->system_journal)
1865 journal_file_maybe_append_tag(s->system_journal, n);
1866
1867 ORDERED_HASHMAP_FOREACH(f, s->user_journals, i)
1868 journal_file_maybe_append_tag(f, n);
1869 #endif
1870 }
1871
1872 void server_done(Server *s) {
1873 JournalFile *f;
1874 assert(s);
1875
1876 while (s->stdout_streams)
1877 stdout_stream_free(s->stdout_streams);
1878
1879 if (s->system_journal)
1880 journal_file_close(s->system_journal);
1881
1882 if (s->runtime_journal)
1883 journal_file_close(s->runtime_journal);
1884
1885 while ((f = ordered_hashmap_steal_first(s->user_journals)))
1886 journal_file_close(f);
1887
1888 ordered_hashmap_free(s->user_journals);
1889
1890 sd_event_source_unref(s->syslog_event_source);
1891 sd_event_source_unref(s->native_event_source);
1892 sd_event_source_unref(s->stdout_event_source);
1893 sd_event_source_unref(s->dev_kmsg_event_source);
1894 sd_event_source_unref(s->audit_event_source);
1895 sd_event_source_unref(s->sync_event_source);
1896 sd_event_source_unref(s->sigusr1_event_source);
1897 sd_event_source_unref(s->sigusr2_event_source);
1898 sd_event_source_unref(s->sigterm_event_source);
1899 sd_event_source_unref(s->sigint_event_source);
1900 sd_event_source_unref(s->sigrtmin1_event_source);
1901 sd_event_source_unref(s->hostname_event_source);
1902 sd_event_source_unref(s->notify_event_source);
1903 sd_event_source_unref(s->watchdog_event_source);
1904 sd_event_unref(s->event);
1905
1906 safe_close(s->syslog_fd);
1907 safe_close(s->native_fd);
1908 safe_close(s->stdout_fd);
1909 safe_close(s->dev_kmsg_fd);
1910 safe_close(s->audit_fd);
1911 safe_close(s->hostname_fd);
1912 safe_close(s->notify_fd);
1913
1914 if (s->rate_limit)
1915 journal_rate_limit_free(s->rate_limit);
1916
1917 if (s->kernel_seqnum)
1918 munmap(s->kernel_seqnum, sizeof(uint64_t));
1919
1920 free(s->buffer);
1921 free(s->tty_path);
1922 free(s->cgroup_root);
1923 free(s->hostname_field);
1924
1925 if (s->mmap)
1926 mmap_cache_unref(s->mmap);
1927
1928 udev_unref(s->udev);
1929 }
1930
1931 static const char* const storage_table[_STORAGE_MAX] = {
1932 [STORAGE_AUTO] = "auto",
1933 [STORAGE_VOLATILE] = "volatile",
1934 [STORAGE_PERSISTENT] = "persistent",
1935 [STORAGE_NONE] = "none"
1936 };
1937
1938 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
1939 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
1940
1941 static const char* const split_mode_table[_SPLIT_MAX] = {
1942 [SPLIT_LOGIN] = "login",
1943 [SPLIT_UID] = "uid",
1944 [SPLIT_NONE] = "none",
1945 };
1946
1947 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
1948 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");