]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/journal/journald-server.c
journalctl: make --rotate synchronous, too
[thirdparty/systemd.git] / src / journal / journald-server.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2011 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #ifdef HAVE_SELINUX
23 #include <selinux/selinux.h>
24 #endif
25 #include <sys/ioctl.h>
26 #include <sys/mman.h>
27 #include <sys/signalfd.h>
28 #include <sys/statvfs.h>
29 #include <linux/sockios.h>
30
31 #include "libudev.h"
32 #include "sd-daemon.h"
33 #include "sd-journal.h"
34 #include "sd-messages.h"
35
36 #include "acl-util.h"
37 #include "alloc-util.h"
38 #include "audit-util.h"
39 #include "cgroup-util.h"
40 #include "conf-parser.h"
41 #include "dirent-util.h"
42 #include "extract-word.h"
43 #include "fd-util.h"
44 #include "formats-util.h"
45 #include "fs-util.h"
46 #include "hashmap.h"
47 #include "hostname-util.h"
48 #include "io-util.h"
49 #include "journal-authenticate.h"
50 #include "journal-file.h"
51 #include "journal-internal.h"
52 #include "journal-vacuum.h"
53 #include "journald-audit.h"
54 #include "journald-kmsg.h"
55 #include "journald-native.h"
56 #include "journald-rate-limit.h"
57 #include "journald-server.h"
58 #include "journald-stream.h"
59 #include "journald-syslog.h"
60 #include "missing.h"
61 #include "mkdir.h"
62 #include "parse-util.h"
63 #include "proc-cmdline.h"
64 #include "process-util.h"
65 #include "rm-rf.h"
66 #include "selinux-util.h"
67 #include "signal-util.h"
68 #include "socket-util.h"
69 #include "string-table.h"
70 #include "string-util.h"
71
72 #define USER_JOURNALS_MAX 1024
73
74 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
75 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
76 #define DEFAULT_RATE_LIMIT_BURST 1000
77 #define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
78
79 #define RECHECK_SPACE_USEC (30*USEC_PER_SEC)
80
81 #define NOTIFY_SNDBUF_SIZE (8*1024*1024)
82
83 static int determine_space_for(
84 Server *s,
85 JournalMetrics *metrics,
86 const char *path,
87 const char *name,
88 bool verbose,
89 bool patch_min_use,
90 uint64_t *available,
91 uint64_t *limit) {
92
93 uint64_t sum = 0, ss_avail, avail;
94 _cleanup_closedir_ DIR *d = NULL;
95 struct dirent *de;
96 struct statvfs ss;
97 const char *p;
98 usec_t ts;
99
100 assert(s);
101 assert(metrics);
102 assert(path);
103 assert(name);
104
105 ts = now(CLOCK_MONOTONIC);
106
107 if (!verbose && s->cached_space_timestamp + RECHECK_SPACE_USEC > ts) {
108
109 if (available)
110 *available = s->cached_space_available;
111 if (limit)
112 *limit = s->cached_space_limit;
113
114 return 0;
115 }
116
117 p = strjoina(path, SERVER_MACHINE_ID(s));
118 d = opendir(p);
119 if (!d)
120 return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR, errno, "Failed to open %s: %m", p);
121
122 if (fstatvfs(dirfd(d), &ss) < 0)
123 return log_error_errno(errno, "Failed to fstatvfs(%s): %m", p);
124
125 FOREACH_DIRENT_ALL(de, d, break) {
126 struct stat st;
127
128 if (!endswith(de->d_name, ".journal") &&
129 !endswith(de->d_name, ".journal~"))
130 continue;
131
132 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) {
133 log_debug_errno(errno, "Failed to stat %s/%s, ignoring: %m", p, de->d_name);
134 continue;
135 }
136
137 if (!S_ISREG(st.st_mode))
138 continue;
139
140 sum += (uint64_t) st.st_blocks * 512UL;
141 }
142
143 /* If request, then let's bump the min_use limit to the
144 * current usage on disk. We do this when starting up and
145 * first opening the journal files. This way sudden spikes in
146 * disk usage will not cause journald to vacuum files without
147 * bounds. Note that this means that only a restart of
148 * journald will make it reset this value. */
149
150 if (patch_min_use)
151 metrics->min_use = MAX(metrics->min_use, sum);
152
153 ss_avail = ss.f_bsize * ss.f_bavail;
154 avail = LESS_BY(ss_avail, metrics->keep_free);
155
156 s->cached_space_limit = MIN(MAX(sum + avail, metrics->min_use), metrics->max_use);
157 s->cached_space_available = LESS_BY(s->cached_space_limit, sum);
158 s->cached_space_timestamp = ts;
159
160 if (verbose) {
161 char fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
162 fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX], fb6[FORMAT_BYTES_MAX];
163
164 server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
165 "%s (%s) is currently using %s.\n"
166 "Maximum allowed usage is set to %s.\n"
167 "Leaving at least %s free (of currently available %s of space).\n"
168 "Enforced usage limit is thus %s, of which %s are still available.",
169 name, path,
170 format_bytes(fb1, sizeof(fb1), sum),
171 format_bytes(fb2, sizeof(fb2), metrics->max_use),
172 format_bytes(fb3, sizeof(fb3), metrics->keep_free),
173 format_bytes(fb4, sizeof(fb4), ss_avail),
174 format_bytes(fb5, sizeof(fb5), s->cached_space_limit),
175 format_bytes(fb6, sizeof(fb6), s->cached_space_available));
176 }
177
178 if (available)
179 *available = s->cached_space_available;
180 if (limit)
181 *limit = s->cached_space_limit;
182
183 return 1;
184 }
185
186 static int determine_space(Server *s, bool verbose, bool patch_min_use, uint64_t *available, uint64_t *limit) {
187 JournalMetrics *metrics;
188 const char *path, *name;
189
190 assert(s);
191
192 if (s->system_journal) {
193 path = "/var/log/journal/";
194 metrics = &s->system_metrics;
195 name = "System journal";
196 } else {
197 path = "/run/log/journal/";
198 metrics = &s->runtime_metrics;
199 name = "Runtime journal";
200 }
201
202 return determine_space_for(s, metrics, path, name, verbose, patch_min_use, available, limit);
203 }
204
205 void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
206 int r;
207 #ifdef HAVE_ACL
208 _cleanup_(acl_freep) acl_t acl = NULL;
209 acl_entry_t entry;
210 acl_permset_t permset;
211 #endif
212
213 assert(f);
214
215 r = fchmod(f->fd, 0640);
216 if (r < 0)
217 log_warning_errno(errno, "Failed to fix access mode on %s, ignoring: %m", f->path);
218
219 #ifdef HAVE_ACL
220 if (uid <= SYSTEM_UID_MAX)
221 return;
222
223 acl = acl_get_fd(f->fd);
224 if (!acl) {
225 log_warning_errno(errno, "Failed to read ACL on %s, ignoring: %m", f->path);
226 return;
227 }
228
229 r = acl_find_uid(acl, uid, &entry);
230 if (r <= 0) {
231
232 if (acl_create_entry(&acl, &entry) < 0 ||
233 acl_set_tag_type(entry, ACL_USER) < 0 ||
234 acl_set_qualifier(entry, &uid) < 0) {
235 log_warning_errno(errno, "Failed to patch ACL on %s, ignoring: %m", f->path);
236 return;
237 }
238 }
239
240 /* We do not recalculate the mask unconditionally here,
241 * so that the fchmod() mask above stays intact. */
242 if (acl_get_permset(entry, &permset) < 0 ||
243 acl_add_perm(permset, ACL_READ) < 0) {
244 log_warning_errno(errno, "Failed to patch ACL on %s, ignoring: %m", f->path);
245 return;
246 }
247
248 r = calc_acl_mask_if_needed(&acl);
249 if (r < 0) {
250 log_warning_errno(r, "Failed to patch ACL on %s, ignoring: %m", f->path);
251 return;
252 }
253
254 if (acl_set_fd(f->fd, acl) < 0)
255 log_warning_errno(errno, "Failed to set ACL on %s, ignoring: %m", f->path);
256
257 #endif
258 }
259
260 static JournalFile* find_journal(Server *s, uid_t uid) {
261 _cleanup_free_ char *p = NULL;
262 int r;
263 JournalFile *f;
264 sd_id128_t machine;
265
266 assert(s);
267
268 /* We split up user logs only on /var, not on /run. If the
269 * runtime file is open, we write to it exclusively, in order
270 * to guarantee proper order as soon as we flush /run to
271 * /var and close the runtime file. */
272
273 if (s->runtime_journal)
274 return s->runtime_journal;
275
276 if (uid <= SYSTEM_UID_MAX)
277 return s->system_journal;
278
279 r = sd_id128_get_machine(&machine);
280 if (r < 0)
281 return s->system_journal;
282
283 f = ordered_hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
284 if (f)
285 return f;
286
287 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-"UID_FMT".journal",
288 SD_ID128_FORMAT_VAL(machine), uid) < 0)
289 return s->system_journal;
290
291 while (ordered_hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
292 /* Too many open? Then let's close one */
293 f = ordered_hashmap_steal_first(s->user_journals);
294 assert(f);
295 journal_file_close(f);
296 }
297
298 r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &f);
299 if (r < 0)
300 return s->system_journal;
301
302 server_fix_perms(s, f, uid);
303
304 r = ordered_hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
305 if (r < 0) {
306 journal_file_close(f);
307 return s->system_journal;
308 }
309
310 return f;
311 }
312
313 static int do_rotate(
314 Server *s,
315 JournalFile **f,
316 const char* name,
317 bool seal,
318 uint32_t uid) {
319
320 int r;
321 assert(s);
322
323 if (!*f)
324 return -EINVAL;
325
326 r = journal_file_rotate(f, s->compress, seal);
327 if (r < 0)
328 if (*f)
329 log_error_errno(r, "Failed to rotate %s: %m", (*f)->path);
330 else
331 log_error_errno(r, "Failed to create new %s journal: %m", name);
332 else
333 server_fix_perms(s, *f, uid);
334
335 return r;
336 }
337
338 void server_rotate(Server *s) {
339 JournalFile *f;
340 void *k;
341 Iterator i;
342 int r;
343
344 log_debug("Rotating...");
345
346 (void) do_rotate(s, &s->runtime_journal, "runtime", false, 0);
347 (void) do_rotate(s, &s->system_journal, "system", s->seal, 0);
348
349 ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
350 r = do_rotate(s, &f, "user", s->seal, PTR_TO_UINT32(k));
351 if (r >= 0)
352 ordered_hashmap_replace(s->user_journals, k, f);
353 else if (!f)
354 /* Old file has been closed and deallocated */
355 ordered_hashmap_remove(s->user_journals, k);
356 }
357 }
358
359 void server_sync(Server *s) {
360 JournalFile *f;
361 void *k;
362 Iterator i;
363 int r;
364
365 if (s->system_journal) {
366 r = journal_file_set_offline(s->system_journal);
367 if (r < 0)
368 log_warning_errno(r, "Failed to sync system journal, ignoring: %m");
369 }
370
371 ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
372 r = journal_file_set_offline(f);
373 if (r < 0)
374 log_warning_errno(r, "Failed to sync user journal, ignoring: %m");
375 }
376
377 if (s->sync_event_source) {
378 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
379 if (r < 0)
380 log_error_errno(r, "Failed to disable sync timer source: %m");
381 }
382
383 s->sync_scheduled = false;
384 }
385
386 static void do_vacuum(
387 Server *s,
388 JournalFile *f,
389 JournalMetrics *metrics,
390 const char *path,
391 const char *name,
392 bool verbose,
393 bool patch_min_use) {
394
395 const char *p;
396 uint64_t limit;
397 int r;
398
399 assert(s);
400 assert(metrics);
401 assert(path);
402 assert(name);
403
404 if (!f)
405 return;
406
407 p = strjoina(path, SERVER_MACHINE_ID(s));
408
409 limit = metrics->max_use;
410 (void) determine_space_for(s, metrics, path, name, verbose, patch_min_use, NULL, &limit);
411
412 r = journal_directory_vacuum(p, limit, metrics->n_max_files, s->max_retention_usec, &s->oldest_file_usec, verbose);
413 if (r < 0 && r != -ENOENT)
414 log_warning_errno(r, "Failed to vacuum %s, ignoring: %m", p);
415 }
416
417 int server_vacuum(Server *s, bool verbose, bool patch_min_use) {
418 assert(s);
419
420 log_debug("Vacuuming...");
421
422 s->oldest_file_usec = 0;
423
424 do_vacuum(s, s->system_journal, &s->system_metrics, "/var/log/journal/", "System journal", verbose, patch_min_use);
425 do_vacuum(s, s->runtime_journal, &s->runtime_metrics, "/run/log/journal/", "Runtime journal", verbose, patch_min_use);
426
427 s->cached_space_limit = 0;
428 s->cached_space_available = 0;
429 s->cached_space_timestamp = 0;
430
431 return 0;
432 }
433
434 static void server_cache_machine_id(Server *s) {
435 sd_id128_t id;
436 int r;
437
438 assert(s);
439
440 r = sd_id128_get_machine(&id);
441 if (r < 0)
442 return;
443
444 sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
445 }
446
447 static void server_cache_boot_id(Server *s) {
448 sd_id128_t id;
449 int r;
450
451 assert(s);
452
453 r = sd_id128_get_boot(&id);
454 if (r < 0)
455 return;
456
457 sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
458 }
459
460 static void server_cache_hostname(Server *s) {
461 _cleanup_free_ char *t = NULL;
462 char *x;
463
464 assert(s);
465
466 t = gethostname_malloc();
467 if (!t)
468 return;
469
470 x = strappend("_HOSTNAME=", t);
471 if (!x)
472 return;
473
474 free(s->hostname_field);
475 s->hostname_field = x;
476 }
477
478 static bool shall_try_append_again(JournalFile *f, int r) {
479
480 /* -E2BIG Hit configured limit
481 -EFBIG Hit fs limit
482 -EDQUOT Quota limit hit
483 -ENOSPC Disk full
484 -EIO I/O error of some kind (mmap)
485 -EHOSTDOWN Other machine
486 -EBUSY Unclean shutdown
487 -EPROTONOSUPPORT Unsupported feature
488 -EBADMSG Corrupted
489 -ENODATA Truncated
490 -ESHUTDOWN Already archived
491 -EIDRM Journal file has been deleted */
492
493 if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
494 log_debug("%s: Allocation limit reached, rotating.", f->path);
495 else if (r == -EHOSTDOWN)
496 log_info("%s: Journal file from other machine, rotating.", f->path);
497 else if (r == -EBUSY)
498 log_info("%s: Unclean shutdown, rotating.", f->path);
499 else if (r == -EPROTONOSUPPORT)
500 log_info("%s: Unsupported feature, rotating.", f->path);
501 else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
502 log_warning("%s: Journal file corrupted, rotating.", f->path);
503 else if (r == -EIO)
504 log_warning("%s: IO error, rotating.", f->path);
505 else if (r == -EIDRM)
506 log_warning("%s: Journal file has been deleted, rotating.", f->path);
507 else
508 return false;
509
510 return true;
511 }
512
513 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
514 JournalFile *f;
515 bool vacuumed = false;
516 int r;
517
518 assert(s);
519 assert(iovec);
520 assert(n > 0);
521
522 f = find_journal(s, uid);
523 if (!f)
524 return;
525
526 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
527 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
528 server_rotate(s);
529 server_vacuum(s, false, false);
530 vacuumed = true;
531
532 f = find_journal(s, uid);
533 if (!f)
534 return;
535 }
536
537 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
538 if (r >= 0) {
539 server_schedule_sync(s, priority);
540 return;
541 }
542
543 if (vacuumed || !shall_try_append_again(f, r)) {
544 log_error_errno(r, "Failed to write entry (%d items, %zu bytes), ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
545 return;
546 }
547
548 server_rotate(s);
549 server_vacuum(s, false, false);
550
551 f = find_journal(s, uid);
552 if (!f)
553 return;
554
555 log_debug("Retrying write.");
556 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
557 if (r < 0)
558 log_error_errno(r, "Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
559 else
560 server_schedule_sync(s, priority);
561 }
562
563 static void dispatch_message_real(
564 Server *s,
565 struct iovec *iovec, unsigned n, unsigned m,
566 const struct ucred *ucred,
567 const struct timeval *tv,
568 const char *label, size_t label_len,
569 const char *unit_id,
570 int priority,
571 pid_t object_pid) {
572
573 char pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
574 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
575 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
576 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
577 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
578 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
579 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
580 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
581 uid_t object_uid;
582 gid_t object_gid;
583 char *x;
584 int r;
585 char *t, *c;
586 uid_t realuid = 0, owner = 0, journal_uid;
587 bool owner_valid = false;
588 #ifdef HAVE_AUDIT
589 char audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
590 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
591 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
592 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
593
594 uint32_t audit;
595 uid_t loginuid;
596 #endif
597
598 assert(s);
599 assert(iovec);
600 assert(n > 0);
601 assert(n + N_IOVEC_META_FIELDS + (object_pid ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
602
603 if (ucred) {
604 realuid = ucred->uid;
605
606 sprintf(pid, "_PID="PID_FMT, ucred->pid);
607 IOVEC_SET_STRING(iovec[n++], pid);
608
609 sprintf(uid, "_UID="UID_FMT, ucred->uid);
610 IOVEC_SET_STRING(iovec[n++], uid);
611
612 sprintf(gid, "_GID="GID_FMT, ucred->gid);
613 IOVEC_SET_STRING(iovec[n++], gid);
614
615 r = get_process_comm(ucred->pid, &t);
616 if (r >= 0) {
617 x = strjoina("_COMM=", t);
618 free(t);
619 IOVEC_SET_STRING(iovec[n++], x);
620 }
621
622 r = get_process_exe(ucred->pid, &t);
623 if (r >= 0) {
624 x = strjoina("_EXE=", t);
625 free(t);
626 IOVEC_SET_STRING(iovec[n++], x);
627 }
628
629 r = get_process_cmdline(ucred->pid, 0, false, &t);
630 if (r >= 0) {
631 x = strjoina("_CMDLINE=", t);
632 free(t);
633 IOVEC_SET_STRING(iovec[n++], x);
634 }
635
636 r = get_process_capeff(ucred->pid, &t);
637 if (r >= 0) {
638 x = strjoina("_CAP_EFFECTIVE=", t);
639 free(t);
640 IOVEC_SET_STRING(iovec[n++], x);
641 }
642
643 #ifdef HAVE_AUDIT
644 r = audit_session_from_pid(ucred->pid, &audit);
645 if (r >= 0) {
646 sprintf(audit_session, "_AUDIT_SESSION=%"PRIu32, audit);
647 IOVEC_SET_STRING(iovec[n++], audit_session);
648 }
649
650 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
651 if (r >= 0) {
652 sprintf(audit_loginuid, "_AUDIT_LOGINUID="UID_FMT, loginuid);
653 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
654 }
655 #endif
656
657 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &c);
658 if (r >= 0) {
659 char *session = NULL;
660
661 x = strjoina("_SYSTEMD_CGROUP=", c);
662 IOVEC_SET_STRING(iovec[n++], x);
663
664 r = cg_path_get_session(c, &t);
665 if (r >= 0) {
666 session = strjoina("_SYSTEMD_SESSION=", t);
667 free(t);
668 IOVEC_SET_STRING(iovec[n++], session);
669 }
670
671 if (cg_path_get_owner_uid(c, &owner) >= 0) {
672 owner_valid = true;
673
674 sprintf(owner_uid, "_SYSTEMD_OWNER_UID="UID_FMT, owner);
675 IOVEC_SET_STRING(iovec[n++], owner_uid);
676 }
677
678 if (cg_path_get_unit(c, &t) >= 0) {
679 x = strjoina("_SYSTEMD_UNIT=", t);
680 free(t);
681 IOVEC_SET_STRING(iovec[n++], x);
682 } else if (unit_id && !session) {
683 x = strjoina("_SYSTEMD_UNIT=", unit_id);
684 IOVEC_SET_STRING(iovec[n++], x);
685 }
686
687 if (cg_path_get_user_unit(c, &t) >= 0) {
688 x = strjoina("_SYSTEMD_USER_UNIT=", t);
689 free(t);
690 IOVEC_SET_STRING(iovec[n++], x);
691 } else if (unit_id && session) {
692 x = strjoina("_SYSTEMD_USER_UNIT=", unit_id);
693 IOVEC_SET_STRING(iovec[n++], x);
694 }
695
696 if (cg_path_get_slice(c, &t) >= 0) {
697 x = strjoina("_SYSTEMD_SLICE=", t);
698 free(t);
699 IOVEC_SET_STRING(iovec[n++], x);
700 }
701
702 free(c);
703 } else if (unit_id) {
704 x = strjoina("_SYSTEMD_UNIT=", unit_id);
705 IOVEC_SET_STRING(iovec[n++], x);
706 }
707
708 #ifdef HAVE_SELINUX
709 if (mac_selinux_use()) {
710 if (label) {
711 x = alloca(strlen("_SELINUX_CONTEXT=") + label_len + 1);
712
713 *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
714 IOVEC_SET_STRING(iovec[n++], x);
715 } else {
716 security_context_t con;
717
718 if (getpidcon(ucred->pid, &con) >= 0) {
719 x = strjoina("_SELINUX_CONTEXT=", con);
720
721 freecon(con);
722 IOVEC_SET_STRING(iovec[n++], x);
723 }
724 }
725 }
726 #endif
727 }
728 assert(n <= m);
729
730 if (object_pid) {
731 r = get_process_uid(object_pid, &object_uid);
732 if (r >= 0) {
733 sprintf(o_uid, "OBJECT_UID="UID_FMT, object_uid);
734 IOVEC_SET_STRING(iovec[n++], o_uid);
735 }
736
737 r = get_process_gid(object_pid, &object_gid);
738 if (r >= 0) {
739 sprintf(o_gid, "OBJECT_GID="GID_FMT, object_gid);
740 IOVEC_SET_STRING(iovec[n++], o_gid);
741 }
742
743 r = get_process_comm(object_pid, &t);
744 if (r >= 0) {
745 x = strjoina("OBJECT_COMM=", t);
746 free(t);
747 IOVEC_SET_STRING(iovec[n++], x);
748 }
749
750 r = get_process_exe(object_pid, &t);
751 if (r >= 0) {
752 x = strjoina("OBJECT_EXE=", t);
753 free(t);
754 IOVEC_SET_STRING(iovec[n++], x);
755 }
756
757 r = get_process_cmdline(object_pid, 0, false, &t);
758 if (r >= 0) {
759 x = strjoina("OBJECT_CMDLINE=", t);
760 free(t);
761 IOVEC_SET_STRING(iovec[n++], x);
762 }
763
764 #ifdef HAVE_AUDIT
765 r = audit_session_from_pid(object_pid, &audit);
766 if (r >= 0) {
767 sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%"PRIu32, audit);
768 IOVEC_SET_STRING(iovec[n++], o_audit_session);
769 }
770
771 r = audit_loginuid_from_pid(object_pid, &loginuid);
772 if (r >= 0) {
773 sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID="UID_FMT, loginuid);
774 IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
775 }
776 #endif
777
778 r = cg_pid_get_path_shifted(object_pid, s->cgroup_root, &c);
779 if (r >= 0) {
780 x = strjoina("OBJECT_SYSTEMD_CGROUP=", c);
781 IOVEC_SET_STRING(iovec[n++], x);
782
783 r = cg_path_get_session(c, &t);
784 if (r >= 0) {
785 x = strjoina("OBJECT_SYSTEMD_SESSION=", t);
786 free(t);
787 IOVEC_SET_STRING(iovec[n++], x);
788 }
789
790 if (cg_path_get_owner_uid(c, &owner) >= 0) {
791 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID="UID_FMT, owner);
792 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
793 }
794
795 if (cg_path_get_unit(c, &t) >= 0) {
796 x = strjoina("OBJECT_SYSTEMD_UNIT=", t);
797 free(t);
798 IOVEC_SET_STRING(iovec[n++], x);
799 }
800
801 if (cg_path_get_user_unit(c, &t) >= 0) {
802 x = strjoina("OBJECT_SYSTEMD_USER_UNIT=", t);
803 free(t);
804 IOVEC_SET_STRING(iovec[n++], x);
805 }
806
807 free(c);
808 }
809 }
810 assert(n <= m);
811
812 if (tv) {
813 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv));
814 IOVEC_SET_STRING(iovec[n++], source_time);
815 }
816
817 /* Note that strictly speaking storing the boot id here is
818 * redundant since the entry includes this in-line
819 * anyway. However, we need this indexed, too. */
820 if (!isempty(s->boot_id_field))
821 IOVEC_SET_STRING(iovec[n++], s->boot_id_field);
822
823 if (!isempty(s->machine_id_field))
824 IOVEC_SET_STRING(iovec[n++], s->machine_id_field);
825
826 if (!isempty(s->hostname_field))
827 IOVEC_SET_STRING(iovec[n++], s->hostname_field);
828
829 assert(n <= m);
830
831 if (s->split_mode == SPLIT_UID && realuid > 0)
832 /* Split up strictly by any UID */
833 journal_uid = realuid;
834 else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
835 /* Split up by login UIDs. We do this only if the
836 * realuid is not root, in order not to accidentally
837 * leak privileged information to the user that is
838 * logged by a privileged process that is part of an
839 * unprivileged session. */
840 journal_uid = owner;
841 else
842 journal_uid = 0;
843
844 write_to_journal(s, journal_uid, iovec, n, priority);
845 }
846
847 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
848 char mid[11 + 32 + 1];
849 char buffer[16 + LINE_MAX + 1];
850 struct iovec iovec[N_IOVEC_META_FIELDS + 6];
851 int n = 0;
852 va_list ap;
853 struct ucred ucred = {};
854
855 assert(s);
856 assert(format);
857
858 IOVEC_SET_STRING(iovec[n++], "SYSLOG_FACILITY=3");
859 IOVEC_SET_STRING(iovec[n++], "SYSLOG_IDENTIFIER=systemd-journald");
860
861 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
862 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
863
864 memcpy(buffer, "MESSAGE=", 8);
865 va_start(ap, format);
866 vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
867 va_end(ap);
868 IOVEC_SET_STRING(iovec[n++], buffer);
869
870 if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
871 snprintf(mid, sizeof(mid), LOG_MESSAGE_ID(message_id));
872 IOVEC_SET_STRING(iovec[n++], mid);
873 }
874
875 ucred.pid = getpid();
876 ucred.uid = getuid();
877 ucred.gid = getgid();
878
879 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
880 }
881
882 void server_dispatch_message(
883 Server *s,
884 struct iovec *iovec, unsigned n, unsigned m,
885 const struct ucred *ucred,
886 const struct timeval *tv,
887 const char *label, size_t label_len,
888 const char *unit_id,
889 int priority,
890 pid_t object_pid) {
891
892 int rl, r;
893 _cleanup_free_ char *path = NULL;
894 uint64_t available = 0;
895 char *c;
896
897 assert(s);
898 assert(iovec || n == 0);
899
900 if (n == 0)
901 return;
902
903 if (LOG_PRI(priority) > s->max_level_store)
904 return;
905
906 /* Stop early in case the information will not be stored
907 * in a journal. */
908 if (s->storage == STORAGE_NONE)
909 return;
910
911 if (!ucred)
912 goto finish;
913
914 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &path);
915 if (r < 0)
916 goto finish;
917
918 /* example: /user/lennart/3/foobar
919 * /system/dbus.service/foobar
920 *
921 * So let's cut of everything past the third /, since that is
922 * where user directories start */
923
924 c = strchr(path, '/');
925 if (c) {
926 c = strchr(c+1, '/');
927 if (c) {
928 c = strchr(c+1, '/');
929 if (c)
930 *c = 0;
931 }
932 }
933
934 (void) determine_space(s, false, false, &available, NULL);
935 rl = journal_rate_limit_test(s->rate_limit, path, priority & LOG_PRIMASK, available);
936 if (rl == 0)
937 return;
938
939 /* Write a suppression message if we suppressed something */
940 if (rl > 1)
941 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
942 "Suppressed %u messages from %s", rl - 1, path);
943
944 finish:
945 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, priority, object_pid);
946 }
947
948
949 static int system_journal_open(Server *s, bool flush_requested) {
950 const char *fn;
951 int r = 0;
952
953 if (!s->system_journal &&
954 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
955 (flush_requested
956 || access("/run/systemd/journal/flushed", F_OK) >= 0)) {
957
958 /* If in auto mode: first try to create the machine
959 * path, but not the prefix.
960 *
961 * If in persistent mode: create /var/log/journal and
962 * the machine path */
963
964 if (s->storage == STORAGE_PERSISTENT)
965 (void) mkdir_p("/var/log/journal/", 0755);
966
967 fn = strjoina("/var/log/journal/", SERVER_MACHINE_ID(s));
968 (void) mkdir(fn, 0755);
969
970 fn = strjoina(fn, "/system.journal");
971 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
972 if (r >= 0) {
973 server_fix_perms(s, s->system_journal, 0);
974 (void) determine_space_for(s, &s->system_metrics, "/var/log/journal/", "System journal", true, true, NULL, NULL);
975 } else if (r < 0) {
976 if (r != -ENOENT && r != -EROFS)
977 log_warning_errno(r, "Failed to open system journal: %m");
978
979 r = 0;
980 }
981 }
982
983 if (!s->runtime_journal &&
984 (s->storage != STORAGE_NONE)) {
985
986 fn = strjoina("/run/log/journal/", SERVER_MACHINE_ID(s), "/system.journal");
987
988 if (s->system_journal) {
989
990 /* Try to open the runtime journal, but only
991 * if it already exists, so that we can flush
992 * it into the system journal */
993
994 r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
995 if (r < 0) {
996 if (r != -ENOENT)
997 log_warning_errno(r, "Failed to open runtime journal: %m");
998
999 r = 0;
1000 }
1001
1002 } else {
1003
1004 /* OK, we really need the runtime journal, so create
1005 * it if necessary. */
1006
1007 (void) mkdir("/run/log", 0755);
1008 (void) mkdir("/run/log/journal", 0755);
1009 (void) mkdir_parents(fn, 0750);
1010
1011 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
1012 if (r < 0)
1013 return log_error_errno(r, "Failed to open runtime journal: %m");
1014 }
1015
1016 if (s->runtime_journal) {
1017 server_fix_perms(s, s->runtime_journal, 0);
1018 (void) determine_space_for(s, &s->runtime_metrics, "/run/log/journal/", "Runtime journal", true, true, NULL, NULL);
1019 }
1020 }
1021
1022 return r;
1023 }
1024
1025 int server_flush_to_var(Server *s) {
1026 sd_id128_t machine;
1027 sd_journal *j = NULL;
1028 char ts[FORMAT_TIMESPAN_MAX];
1029 usec_t start;
1030 unsigned n = 0;
1031 int r;
1032
1033 assert(s);
1034
1035 if (s->storage != STORAGE_AUTO &&
1036 s->storage != STORAGE_PERSISTENT)
1037 return 0;
1038
1039 if (!s->runtime_journal)
1040 return 0;
1041
1042 (void) system_journal_open(s, true);
1043
1044 if (!s->system_journal)
1045 return 0;
1046
1047 log_debug("Flushing to /var...");
1048
1049 start = now(CLOCK_MONOTONIC);
1050
1051 r = sd_id128_get_machine(&machine);
1052 if (r < 0)
1053 return r;
1054
1055 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
1056 if (r < 0)
1057 return log_error_errno(r, "Failed to read runtime journal: %m");
1058
1059 sd_journal_set_data_threshold(j, 0);
1060
1061 SD_JOURNAL_FOREACH(j) {
1062 Object *o = NULL;
1063 JournalFile *f;
1064
1065 f = j->current_file;
1066 assert(f && f->current_offset > 0);
1067
1068 n++;
1069
1070 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1071 if (r < 0) {
1072 log_error_errno(r, "Can't read entry: %m");
1073 goto finish;
1074 }
1075
1076 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1077 if (r >= 0)
1078 continue;
1079
1080 if (!shall_try_append_again(s->system_journal, r)) {
1081 log_error_errno(r, "Can't write entry: %m");
1082 goto finish;
1083 }
1084
1085 server_rotate(s);
1086 server_vacuum(s, false, false);
1087
1088 if (!s->system_journal) {
1089 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1090 r = -EIO;
1091 goto finish;
1092 }
1093
1094 log_debug("Retrying write.");
1095 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1096 if (r < 0) {
1097 log_error_errno(r, "Can't write entry: %m");
1098 goto finish;
1099 }
1100 }
1101
1102 r = 0;
1103
1104 finish:
1105 journal_file_post_change(s->system_journal);
1106
1107 s->runtime_journal = journal_file_close(s->runtime_journal);
1108
1109 if (r >= 0)
1110 (void) rm_rf("/run/log/journal", REMOVE_ROOT);
1111
1112 sd_journal_close(j);
1113
1114 server_driver_message(s, SD_ID128_NULL, "Time spent on flushing to /var is %s for %u entries.", format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0), n);
1115
1116 return r;
1117 }
1118
1119 int server_process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1120 Server *s = userdata;
1121 struct ucred *ucred = NULL;
1122 struct timeval *tv = NULL;
1123 struct cmsghdr *cmsg;
1124 char *label = NULL;
1125 size_t label_len = 0, m;
1126 struct iovec iovec;
1127 ssize_t n;
1128 int *fds = NULL, v = 0;
1129 unsigned n_fds = 0;
1130
1131 union {
1132 struct cmsghdr cmsghdr;
1133
1134 /* We use NAME_MAX space for the SELinux label
1135 * here. The kernel currently enforces no
1136 * limit, but according to suggestions from
1137 * the SELinux people this will change and it
1138 * will probably be identical to NAME_MAX. For
1139 * now we use that, but this should be updated
1140 * one day when the final limit is known. */
1141 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1142 CMSG_SPACE(sizeof(struct timeval)) +
1143 CMSG_SPACE(sizeof(int)) + /* fd */
1144 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1145 } control = {};
1146
1147 union sockaddr_union sa = {};
1148
1149 struct msghdr msghdr = {
1150 .msg_iov = &iovec,
1151 .msg_iovlen = 1,
1152 .msg_control = &control,
1153 .msg_controllen = sizeof(control),
1154 .msg_name = &sa,
1155 .msg_namelen = sizeof(sa),
1156 };
1157
1158 assert(s);
1159 assert(fd == s->native_fd || fd == s->syslog_fd || fd == s->audit_fd);
1160
1161 if (revents != EPOLLIN) {
1162 log_error("Got invalid event from epoll for datagram fd: %"PRIx32, revents);
1163 return -EIO;
1164 }
1165
1166 /* Try to get the right size, if we can. (Not all
1167 * sockets support SIOCINQ, hence we just try, but
1168 * don't rely on it. */
1169 (void) ioctl(fd, SIOCINQ, &v);
1170
1171 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1172 m = PAGE_ALIGN(MAX3((size_t) v + 1,
1173 (size_t) LINE_MAX,
1174 ALIGN(sizeof(struct nlmsghdr)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH)) + 1);
1175
1176 if (!GREEDY_REALLOC(s->buffer, s->buffer_size, m))
1177 return log_oom();
1178
1179 iovec.iov_base = s->buffer;
1180 iovec.iov_len = s->buffer_size - 1; /* Leave room for trailing NUL we add later */
1181
1182 n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1183 if (n < 0) {
1184 if (errno == EINTR || errno == EAGAIN)
1185 return 0;
1186
1187 return log_error_errno(errno, "recvmsg() failed: %m");
1188 }
1189
1190 CMSG_FOREACH(cmsg, &msghdr) {
1191
1192 if (cmsg->cmsg_level == SOL_SOCKET &&
1193 cmsg->cmsg_type == SCM_CREDENTIALS &&
1194 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1195 ucred = (struct ucred*) CMSG_DATA(cmsg);
1196 else if (cmsg->cmsg_level == SOL_SOCKET &&
1197 cmsg->cmsg_type == SCM_SECURITY) {
1198 label = (char*) CMSG_DATA(cmsg);
1199 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1200 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1201 cmsg->cmsg_type == SO_TIMESTAMP &&
1202 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1203 tv = (struct timeval*) CMSG_DATA(cmsg);
1204 else if (cmsg->cmsg_level == SOL_SOCKET &&
1205 cmsg->cmsg_type == SCM_RIGHTS) {
1206 fds = (int*) CMSG_DATA(cmsg);
1207 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1208 }
1209 }
1210
1211 /* And a trailing NUL, just in case */
1212 s->buffer[n] = 0;
1213
1214 if (fd == s->syslog_fd) {
1215 if (n > 0 && n_fds == 0)
1216 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1217 else if (n_fds > 0)
1218 log_warning("Got file descriptors via syslog socket. Ignoring.");
1219
1220 } else if (fd == s->native_fd) {
1221 if (n > 0 && n_fds == 0)
1222 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1223 else if (n == 0 && n_fds == 1)
1224 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1225 else if (n_fds > 0)
1226 log_warning("Got too many file descriptors via native socket. Ignoring.");
1227
1228 } else {
1229 assert(fd == s->audit_fd);
1230
1231 if (n > 0 && n_fds == 0)
1232 server_process_audit_message(s, s->buffer, n, ucred, &sa, msghdr.msg_namelen);
1233 else if (n_fds > 0)
1234 log_warning("Got file descriptors via audit socket. Ignoring.");
1235 }
1236
1237 close_many(fds, n_fds);
1238 return 0;
1239 }
1240
1241 static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1242 Server *s = userdata;
1243
1244 assert(s);
1245
1246 log_info("Received request to flush runtime journal from PID " PID_FMT, si->ssi_pid);
1247
1248 server_flush_to_var(s);
1249 server_sync(s);
1250 server_vacuum(s, false, false);
1251
1252 (void) touch("/run/systemd/journal/flushed");
1253
1254 return 0;
1255 }
1256
1257 static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1258 Server *s = userdata;
1259
1260 assert(s);
1261
1262 log_info("Received request to rotate journal from PID " PID_FMT, si->ssi_pid);
1263 server_rotate(s);
1264 server_vacuum(s, true, true);
1265
1266 /* Let clients know when the most recent rotation happened. */
1267 (void) touch("/run/systemd/journal/rotated");
1268
1269 return 0;
1270 }
1271
1272 static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1273 Server *s = userdata;
1274
1275 assert(s);
1276
1277 log_received_signal(LOG_INFO, si);
1278
1279 sd_event_exit(s->event, 0);
1280 return 0;
1281 }
1282
1283 static int dispatch_sigrtmin1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1284 Server *s = userdata;
1285
1286 assert(s);
1287
1288 log_debug("Received request to sync from PID " PID_FMT, si->ssi_pid);
1289
1290 server_sync(s);
1291
1292 /* Let clients know when the most recent sync happened. */
1293 (void) touch("/run/systemd/journal/synced");
1294
1295 return 0;
1296 }
1297
1298 static int setup_signals(Server *s) {
1299 int r;
1300
1301 assert(s);
1302
1303 assert(sigprocmask_many(SIG_SETMASK, NULL, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, SIGRTMIN+1, -1) >= 0);
1304
1305 r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
1306 if (r < 0)
1307 return r;
1308
1309 r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s);
1310 if (r < 0)
1311 return r;
1312
1313 r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s);
1314 if (r < 0)
1315 return r;
1316
1317 /* Let's process SIGTERM late, so that we flush all queued
1318 * messages to disk before we exit */
1319 r = sd_event_source_set_priority(s->sigterm_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1320 if (r < 0)
1321 return r;
1322
1323 /* When journald is invoked on the terminal (when debugging),
1324 * it's useful if C-c is handled equivalent to SIGTERM. */
1325 r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s);
1326 if (r < 0)
1327 return r;
1328
1329 r = sd_event_source_set_priority(s->sigint_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1330 if (r < 0)
1331 return r;
1332
1333 /* SIGRTMIN+1 causes an immediate sync. We process this very
1334 * late, so that everything else queued at this point is
1335 * really written to disk. Clients can watch
1336 * /run/systemd/journal/synced with inotify until its mtime
1337 * changes to see when a sync happened. */
1338 r = sd_event_add_signal(s->event, &s->sigrtmin1_event_source, SIGRTMIN+1, dispatch_sigrtmin1, s);
1339 if (r < 0)
1340 return r;
1341
1342 r = sd_event_source_set_priority(s->sigrtmin1_event_source, SD_EVENT_PRIORITY_NORMAL+15);
1343 if (r < 0)
1344 return r;
1345
1346 return 0;
1347 }
1348
1349 static int server_parse_proc_cmdline(Server *s) {
1350 _cleanup_free_ char *line = NULL;
1351 const char *p;
1352 int r;
1353
1354 r = proc_cmdline(&line);
1355 if (r < 0) {
1356 log_warning_errno(r, "Failed to read /proc/cmdline, ignoring: %m");
1357 return 0;
1358 }
1359
1360 p = line;
1361 for(;;) {
1362 _cleanup_free_ char *word;
1363
1364 r = extract_first_word(&p, &word, NULL, 0);
1365 if (r < 0)
1366 return log_error_errno(r, "Failed to parse journald syntax \"%s\": %m", line);
1367
1368 if (r == 0)
1369 break;
1370
1371 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1372 r = parse_boolean(word + 35);
1373 if (r < 0)
1374 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1375 else
1376 s->forward_to_syslog = r;
1377 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1378 r = parse_boolean(word + 33);
1379 if (r < 0)
1380 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1381 else
1382 s->forward_to_kmsg = r;
1383 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1384 r = parse_boolean(word + 36);
1385 if (r < 0)
1386 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1387 else
1388 s->forward_to_console = r;
1389 } else if (startswith(word, "systemd.journald.forward_to_wall=")) {
1390 r = parse_boolean(word + 33);
1391 if (r < 0)
1392 log_warning("Failed to parse forward to wall switch %s. Ignoring.", word + 33);
1393 else
1394 s->forward_to_wall = r;
1395 } else if (startswith(word, "systemd.journald"))
1396 log_warning("Invalid systemd.journald parameter. Ignoring.");
1397 }
1398
1399 /* do not warn about state here, since probably systemd already did */
1400 return 0;
1401 }
1402
1403 static int server_parse_config_file(Server *s) {
1404 assert(s);
1405
1406 return config_parse_many(PKGSYSCONFDIR "/journald.conf",
1407 CONF_PATHS_NULSTR("systemd/journald.conf.d"),
1408 "Journal\0",
1409 config_item_perf_lookup, journald_gperf_lookup,
1410 false, s);
1411 }
1412
1413 static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
1414 Server *s = userdata;
1415
1416 assert(s);
1417
1418 server_sync(s);
1419 return 0;
1420 }
1421
1422 int server_schedule_sync(Server *s, int priority) {
1423 int r;
1424
1425 assert(s);
1426
1427 if (priority <= LOG_CRIT) {
1428 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1429 server_sync(s);
1430 return 0;
1431 }
1432
1433 if (s->sync_scheduled)
1434 return 0;
1435
1436 if (s->sync_interval_usec > 0) {
1437 usec_t when;
1438
1439 r = sd_event_now(s->event, CLOCK_MONOTONIC, &when);
1440 if (r < 0)
1441 return r;
1442
1443 when += s->sync_interval_usec;
1444
1445 if (!s->sync_event_source) {
1446 r = sd_event_add_time(
1447 s->event,
1448 &s->sync_event_source,
1449 CLOCK_MONOTONIC,
1450 when, 0,
1451 server_dispatch_sync, s);
1452 if (r < 0)
1453 return r;
1454
1455 r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
1456 } else {
1457 r = sd_event_source_set_time(s->sync_event_source, when);
1458 if (r < 0)
1459 return r;
1460
1461 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
1462 }
1463 if (r < 0)
1464 return r;
1465
1466 s->sync_scheduled = true;
1467 }
1468
1469 return 0;
1470 }
1471
1472 static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1473 Server *s = userdata;
1474
1475 assert(s);
1476
1477 server_cache_hostname(s);
1478 return 0;
1479 }
1480
1481 static int server_open_hostname(Server *s) {
1482 int r;
1483
1484 assert(s);
1485
1486 s->hostname_fd = open("/proc/sys/kernel/hostname", O_RDONLY|O_CLOEXEC|O_NDELAY|O_NOCTTY);
1487 if (s->hostname_fd < 0)
1488 return log_error_errno(errno, "Failed to open /proc/sys/kernel/hostname: %m");
1489
1490 r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s);
1491 if (r < 0) {
1492 /* kernels prior to 3.2 don't support polling this file. Ignore
1493 * the failure. */
1494 if (r == -EPERM) {
1495 log_warning_errno(r, "Failed to register hostname fd in event loop, ignoring: %m");
1496 s->hostname_fd = safe_close(s->hostname_fd);
1497 return 0;
1498 }
1499
1500 return log_error_errno(r, "Failed to register hostname fd in event loop: %m");
1501 }
1502
1503 r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
1504 if (r < 0)
1505 return log_error_errno(r, "Failed to adjust priority of host name event source: %m");
1506
1507 return 0;
1508 }
1509
1510 static int dispatch_notify_event(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1511 Server *s = userdata;
1512 int r;
1513
1514 assert(s);
1515 assert(s->notify_event_source == es);
1516 assert(s->notify_fd == fd);
1517
1518 /* The $NOTIFY_SOCKET is writable again, now send exactly one
1519 * message on it. Either it's the wtachdog event, the initial
1520 * READY=1 event or an stdout stream event. If there's nothing
1521 * to write anymore, turn our event source off. The next time
1522 * there's something to send it will be turned on again. */
1523
1524 if (!s->sent_notify_ready) {
1525 static const char p[] =
1526 "READY=1\n"
1527 "STATUS=Processing requests...";
1528 ssize_t l;
1529
1530 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1531 if (l < 0) {
1532 if (errno == EAGAIN)
1533 return 0;
1534
1535 return log_error_errno(errno, "Failed to send READY=1 notification message: %m");
1536 }
1537
1538 s->sent_notify_ready = true;
1539 log_debug("Sent READY=1 notification.");
1540
1541 } else if (s->send_watchdog) {
1542
1543 static const char p[] =
1544 "WATCHDOG=1";
1545
1546 ssize_t l;
1547
1548 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1549 if (l < 0) {
1550 if (errno == EAGAIN)
1551 return 0;
1552
1553 return log_error_errno(errno, "Failed to send WATCHDOG=1 notification message: %m");
1554 }
1555
1556 s->send_watchdog = false;
1557 log_debug("Sent WATCHDOG=1 notification.");
1558
1559 } else if (s->stdout_streams_notify_queue)
1560 /* Dispatch one stream notification event */
1561 stdout_stream_send_notify(s->stdout_streams_notify_queue);
1562
1563 /* Leave us enabled if there's still more to to do. */
1564 if (s->send_watchdog || s->stdout_streams_notify_queue)
1565 return 0;
1566
1567 /* There was nothing to do anymore, let's turn ourselves off. */
1568 r = sd_event_source_set_enabled(es, SD_EVENT_OFF);
1569 if (r < 0)
1570 return log_error_errno(r, "Failed to turn off notify event source: %m");
1571
1572 return 0;
1573 }
1574
1575 static int dispatch_watchdog(sd_event_source *es, uint64_t usec, void *userdata) {
1576 Server *s = userdata;
1577 int r;
1578
1579 assert(s);
1580
1581 s->send_watchdog = true;
1582
1583 r = sd_event_source_set_enabled(s->notify_event_source, SD_EVENT_ON);
1584 if (r < 0)
1585 log_warning_errno(r, "Failed to turn on notify event source: %m");
1586
1587 r = sd_event_source_set_time(s->watchdog_event_source, usec + s->watchdog_usec / 2);
1588 if (r < 0)
1589 return log_error_errno(r, "Failed to restart watchdog event source: %m");
1590
1591 r = sd_event_source_set_enabled(s->watchdog_event_source, SD_EVENT_ON);
1592 if (r < 0)
1593 return log_error_errno(r, "Failed to enable watchdog event source: %m");
1594
1595 return 0;
1596 }
1597
1598 static int server_connect_notify(Server *s) {
1599 union sockaddr_union sa = {
1600 .un.sun_family = AF_UNIX,
1601 };
1602 const char *e;
1603 int r;
1604
1605 assert(s);
1606 assert(s->notify_fd < 0);
1607 assert(!s->notify_event_source);
1608
1609 /*
1610 So here's the problem: we'd like to send notification
1611 messages to PID 1, but we cannot do that via sd_notify(),
1612 since that's synchronous, and we might end up blocking on
1613 it. Specifically: given that PID 1 might block on
1614 dbus-daemon during IPC, and dbus-daemon is logging to us,
1615 and might hence block on us, we might end up in a deadlock
1616 if we block on sending PID 1 notification messages -- by
1617 generating a full blocking circle. To avoid this, let's
1618 create a non-blocking socket, and connect it to the
1619 notification socket, and then wait for POLLOUT before we
1620 send anything. This should efficiently avoid any deadlocks,
1621 as we'll never block on PID 1, hence PID 1 can safely block
1622 on dbus-daemon which can safely block on us again.
1623
1624 Don't think that this issue is real? It is, see:
1625 https://github.com/systemd/systemd/issues/1505
1626 */
1627
1628 e = getenv("NOTIFY_SOCKET");
1629 if (!e)
1630 return 0;
1631
1632 if ((e[0] != '@' && e[0] != '/') || e[1] == 0) {
1633 log_error("NOTIFY_SOCKET set to an invalid value: %s", e);
1634 return -EINVAL;
1635 }
1636
1637 if (strlen(e) > sizeof(sa.un.sun_path)) {
1638 log_error("NOTIFY_SOCKET path too long: %s", e);
1639 return -EINVAL;
1640 }
1641
1642 s->notify_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
1643 if (s->notify_fd < 0)
1644 return log_error_errno(errno, "Failed to create notify socket: %m");
1645
1646 (void) fd_inc_sndbuf(s->notify_fd, NOTIFY_SNDBUF_SIZE);
1647
1648 strncpy(sa.un.sun_path, e, sizeof(sa.un.sun_path));
1649 if (sa.un.sun_path[0] == '@')
1650 sa.un.sun_path[0] = 0;
1651
1652 r = connect(s->notify_fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(e));
1653 if (r < 0)
1654 return log_error_errno(errno, "Failed to connect to notify socket: %m");
1655
1656 r = sd_event_add_io(s->event, &s->notify_event_source, s->notify_fd, EPOLLOUT, dispatch_notify_event, s);
1657 if (r < 0)
1658 return log_error_errno(r, "Failed to watch notification socket: %m");
1659
1660 if (sd_watchdog_enabled(false, &s->watchdog_usec) > 0) {
1661 s->send_watchdog = true;
1662
1663 r = sd_event_add_time(s->event, &s->watchdog_event_source, CLOCK_MONOTONIC, now(CLOCK_MONOTONIC) + s->watchdog_usec/2, s->watchdog_usec*3/4, dispatch_watchdog, s);
1664 if (r < 0)
1665 return log_error_errno(r, "Failed to add watchdog time event: %m");
1666 }
1667
1668 /* This should fire pretty soon, which we'll use to send the
1669 * READY=1 event. */
1670
1671 return 0;
1672 }
1673
1674 int server_init(Server *s) {
1675 _cleanup_fdset_free_ FDSet *fds = NULL;
1676 int n, r, fd;
1677 bool no_sockets;
1678
1679 assert(s);
1680
1681 zero(*s);
1682 s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->audit_fd = s->hostname_fd = s->notify_fd = -1;
1683 s->compress = true;
1684 s->seal = true;
1685
1686 s->watchdog_usec = USEC_INFINITY;
1687
1688 s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1689 s->sync_scheduled = false;
1690
1691 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1692 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1693
1694 s->forward_to_wall = true;
1695
1696 s->max_file_usec = DEFAULT_MAX_FILE_USEC;
1697
1698 s->max_level_store = LOG_DEBUG;
1699 s->max_level_syslog = LOG_DEBUG;
1700 s->max_level_kmsg = LOG_NOTICE;
1701 s->max_level_console = LOG_INFO;
1702 s->max_level_wall = LOG_EMERG;
1703
1704 journal_reset_metrics(&s->system_metrics);
1705 journal_reset_metrics(&s->runtime_metrics);
1706
1707 server_parse_config_file(s);
1708 server_parse_proc_cmdline(s);
1709
1710 if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1711 log_debug("Setting both rate limit interval and burst from "USEC_FMT",%u to 0,0",
1712 s->rate_limit_interval, s->rate_limit_burst);
1713 s->rate_limit_interval = s->rate_limit_burst = 0;
1714 }
1715
1716 (void) mkdir_p("/run/systemd/journal", 0755);
1717
1718 s->user_journals = ordered_hashmap_new(NULL);
1719 if (!s->user_journals)
1720 return log_oom();
1721
1722 s->mmap = mmap_cache_new();
1723 if (!s->mmap)
1724 return log_oom();
1725
1726 r = sd_event_default(&s->event);
1727 if (r < 0)
1728 return log_error_errno(r, "Failed to create event loop: %m");
1729
1730 n = sd_listen_fds(true);
1731 if (n < 0)
1732 return log_error_errno(n, "Failed to read listening file descriptors from environment: %m");
1733
1734 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1735
1736 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1737
1738 if (s->native_fd >= 0) {
1739 log_error("Too many native sockets passed.");
1740 return -EINVAL;
1741 }
1742
1743 s->native_fd = fd;
1744
1745 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1746
1747 if (s->stdout_fd >= 0) {
1748 log_error("Too many stdout sockets passed.");
1749 return -EINVAL;
1750 }
1751
1752 s->stdout_fd = fd;
1753
1754 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0 ||
1755 sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/dev-log", 0) > 0) {
1756
1757 if (s->syslog_fd >= 0) {
1758 log_error("Too many /dev/log sockets passed.");
1759 return -EINVAL;
1760 }
1761
1762 s->syslog_fd = fd;
1763
1764 } else if (sd_is_socket(fd, AF_NETLINK, SOCK_RAW, -1) > 0) {
1765
1766 if (s->audit_fd >= 0) {
1767 log_error("Too many audit sockets passed.");
1768 return -EINVAL;
1769 }
1770
1771 s->audit_fd = fd;
1772
1773 } else {
1774
1775 if (!fds) {
1776 fds = fdset_new();
1777 if (!fds)
1778 return log_oom();
1779 }
1780
1781 r = fdset_put(fds, fd);
1782 if (r < 0)
1783 return log_oom();
1784 }
1785 }
1786
1787 /* Try to restore streams, but don't bother if this fails */
1788 (void) server_restore_streams(s, fds);
1789
1790 if (fdset_size(fds) > 0) {
1791 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds));
1792 fds = fdset_free(fds);
1793 }
1794
1795 no_sockets = s->native_fd < 0 && s->stdout_fd < 0 && s->syslog_fd < 0 && s->audit_fd < 0;
1796
1797 /* always open stdout, syslog, native, and kmsg sockets */
1798
1799 /* systemd-journald.socket: /run/systemd/journal/stdout */
1800 r = server_open_stdout_socket(s);
1801 if (r < 0)
1802 return r;
1803
1804 /* systemd-journald-dev-log.socket: /run/systemd/journal/dev-log */
1805 r = server_open_syslog_socket(s);
1806 if (r < 0)
1807 return r;
1808
1809 /* systemd-journald.socket: /run/systemd/journal/socket */
1810 r = server_open_native_socket(s);
1811 if (r < 0)
1812 return r;
1813
1814 /* /dev/ksmg */
1815 r = server_open_dev_kmsg(s);
1816 if (r < 0)
1817 return r;
1818
1819 /* Unless we got *some* sockets and not audit, open audit socket */
1820 if (s->audit_fd >= 0 || no_sockets) {
1821 r = server_open_audit(s);
1822 if (r < 0)
1823 return r;
1824 }
1825
1826 r = server_open_kernel_seqnum(s);
1827 if (r < 0)
1828 return r;
1829
1830 r = server_open_hostname(s);
1831 if (r < 0)
1832 return r;
1833
1834 r = setup_signals(s);
1835 if (r < 0)
1836 return r;
1837
1838 s->udev = udev_new();
1839 if (!s->udev)
1840 return -ENOMEM;
1841
1842 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
1843 if (!s->rate_limit)
1844 return -ENOMEM;
1845
1846 r = cg_get_root_path(&s->cgroup_root);
1847 if (r < 0)
1848 return r;
1849
1850 server_cache_hostname(s);
1851 server_cache_boot_id(s);
1852 server_cache_machine_id(s);
1853
1854 (void) server_connect_notify(s);
1855
1856 return system_journal_open(s, false);
1857 }
1858
1859 void server_maybe_append_tags(Server *s) {
1860 #ifdef HAVE_GCRYPT
1861 JournalFile *f;
1862 Iterator i;
1863 usec_t n;
1864
1865 n = now(CLOCK_REALTIME);
1866
1867 if (s->system_journal)
1868 journal_file_maybe_append_tag(s->system_journal, n);
1869
1870 ORDERED_HASHMAP_FOREACH(f, s->user_journals, i)
1871 journal_file_maybe_append_tag(f, n);
1872 #endif
1873 }
1874
1875 void server_done(Server *s) {
1876 JournalFile *f;
1877 assert(s);
1878
1879 while (s->stdout_streams)
1880 stdout_stream_free(s->stdout_streams);
1881
1882 if (s->system_journal)
1883 journal_file_close(s->system_journal);
1884
1885 if (s->runtime_journal)
1886 journal_file_close(s->runtime_journal);
1887
1888 while ((f = ordered_hashmap_steal_first(s->user_journals)))
1889 journal_file_close(f);
1890
1891 ordered_hashmap_free(s->user_journals);
1892
1893 sd_event_source_unref(s->syslog_event_source);
1894 sd_event_source_unref(s->native_event_source);
1895 sd_event_source_unref(s->stdout_event_source);
1896 sd_event_source_unref(s->dev_kmsg_event_source);
1897 sd_event_source_unref(s->audit_event_source);
1898 sd_event_source_unref(s->sync_event_source);
1899 sd_event_source_unref(s->sigusr1_event_source);
1900 sd_event_source_unref(s->sigusr2_event_source);
1901 sd_event_source_unref(s->sigterm_event_source);
1902 sd_event_source_unref(s->sigint_event_source);
1903 sd_event_source_unref(s->sigrtmin1_event_source);
1904 sd_event_source_unref(s->hostname_event_source);
1905 sd_event_source_unref(s->notify_event_source);
1906 sd_event_source_unref(s->watchdog_event_source);
1907 sd_event_unref(s->event);
1908
1909 safe_close(s->syslog_fd);
1910 safe_close(s->native_fd);
1911 safe_close(s->stdout_fd);
1912 safe_close(s->dev_kmsg_fd);
1913 safe_close(s->audit_fd);
1914 safe_close(s->hostname_fd);
1915 safe_close(s->notify_fd);
1916
1917 if (s->rate_limit)
1918 journal_rate_limit_free(s->rate_limit);
1919
1920 if (s->kernel_seqnum)
1921 munmap(s->kernel_seqnum, sizeof(uint64_t));
1922
1923 free(s->buffer);
1924 free(s->tty_path);
1925 free(s->cgroup_root);
1926 free(s->hostname_field);
1927
1928 if (s->mmap)
1929 mmap_cache_unref(s->mmap);
1930
1931 udev_unref(s->udev);
1932 }
1933
1934 static const char* const storage_table[_STORAGE_MAX] = {
1935 [STORAGE_AUTO] = "auto",
1936 [STORAGE_VOLATILE] = "volatile",
1937 [STORAGE_PERSISTENT] = "persistent",
1938 [STORAGE_NONE] = "none"
1939 };
1940
1941 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
1942 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
1943
1944 static const char* const split_mode_table[_SPLIT_MAX] = {
1945 [SPLIT_LOGIN] = "login",
1946 [SPLIT_UID] = "uid",
1947 [SPLIT_NONE] = "none",
1948 };
1949
1950 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
1951 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");