]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/journal/journald-server.c
Merge pull request #1880 from fsateler/sysctl-doc
[thirdparty/systemd.git] / src / journal / journald-server.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2011 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #ifdef HAVE_SELINUX
23 #include <selinux/selinux.h>
24 #endif
25 #include <sys/ioctl.h>
26 #include <sys/mman.h>
27 #include <sys/signalfd.h>
28 #include <sys/statvfs.h>
29 #include <linux/sockios.h>
30
31 #include "libudev.h"
32 #include "sd-daemon.h"
33 #include "sd-journal.h"
34 #include "sd-messages.h"
35
36 #include "acl-util.h"
37 #include "alloc-util.h"
38 #include "audit-util.h"
39 #include "cgroup-util.h"
40 #include "conf-parser.h"
41 #include "dirent-util.h"
42 #include "extract-word.h"
43 #include "fd-util.h"
44 #include "fileio.h"
45 #include "formats-util.h"
46 #include "fs-util.h"
47 #include "hashmap.h"
48 #include "hostname-util.h"
49 #include "io-util.h"
50 #include "journal-authenticate.h"
51 #include "journal-file.h"
52 #include "journal-internal.h"
53 #include "journal-vacuum.h"
54 #include "journald-audit.h"
55 #include "journald-kmsg.h"
56 #include "journald-native.h"
57 #include "journald-rate-limit.h"
58 #include "journald-server.h"
59 #include "journald-stream.h"
60 #include "journald-syslog.h"
61 #include "missing.h"
62 #include "mkdir.h"
63 #include "parse-util.h"
64 #include "proc-cmdline.h"
65 #include "process-util.h"
66 #include "rm-rf.h"
67 #include "selinux-util.h"
68 #include "signal-util.h"
69 #include "socket-util.h"
70 #include "string-table.h"
71 #include "string-util.h"
72
73 #define USER_JOURNALS_MAX 1024
74
75 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
76 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
77 #define DEFAULT_RATE_LIMIT_BURST 1000
78 #define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
79
80 #define RECHECK_SPACE_USEC (30*USEC_PER_SEC)
81
82 #define NOTIFY_SNDBUF_SIZE (8*1024*1024)
83
84 static int determine_space_for(
85 Server *s,
86 JournalMetrics *metrics,
87 const char *path,
88 const char *name,
89 bool verbose,
90 bool patch_min_use,
91 uint64_t *available,
92 uint64_t *limit) {
93
94 uint64_t sum = 0, ss_avail, avail;
95 _cleanup_closedir_ DIR *d = NULL;
96 struct dirent *de;
97 struct statvfs ss;
98 const char *p;
99 usec_t ts;
100
101 assert(s);
102 assert(metrics);
103 assert(path);
104 assert(name);
105
106 ts = now(CLOCK_MONOTONIC);
107
108 if (!verbose && s->cached_space_timestamp + RECHECK_SPACE_USEC > ts) {
109
110 if (available)
111 *available = s->cached_space_available;
112 if (limit)
113 *limit = s->cached_space_limit;
114
115 return 0;
116 }
117
118 p = strjoina(path, SERVER_MACHINE_ID(s));
119 d = opendir(p);
120 if (!d)
121 return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR, errno, "Failed to open %s: %m", p);
122
123 if (fstatvfs(dirfd(d), &ss) < 0)
124 return log_error_errno(errno, "Failed to fstatvfs(%s): %m", p);
125
126 FOREACH_DIRENT_ALL(de, d, break) {
127 struct stat st;
128
129 if (!endswith(de->d_name, ".journal") &&
130 !endswith(de->d_name, ".journal~"))
131 continue;
132
133 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) {
134 log_debug_errno(errno, "Failed to stat %s/%s, ignoring: %m", p, de->d_name);
135 continue;
136 }
137
138 if (!S_ISREG(st.st_mode))
139 continue;
140
141 sum += (uint64_t) st.st_blocks * 512UL;
142 }
143
144 /* If request, then let's bump the min_use limit to the
145 * current usage on disk. We do this when starting up and
146 * first opening the journal files. This way sudden spikes in
147 * disk usage will not cause journald to vacuum files without
148 * bounds. Note that this means that only a restart of
149 * journald will make it reset this value. */
150
151 if (patch_min_use)
152 metrics->min_use = MAX(metrics->min_use, sum);
153
154 ss_avail = ss.f_bsize * ss.f_bavail;
155 avail = LESS_BY(ss_avail, metrics->keep_free);
156
157 s->cached_space_limit = MIN(MAX(sum + avail, metrics->min_use), metrics->max_use);
158 s->cached_space_available = LESS_BY(s->cached_space_limit, sum);
159 s->cached_space_timestamp = ts;
160
161 if (verbose) {
162 char fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
163 fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX], fb6[FORMAT_BYTES_MAX];
164
165 server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
166 "%s (%s) is currently using %s.\n"
167 "Maximum allowed usage is set to %s.\n"
168 "Leaving at least %s free (of currently available %s of space).\n"
169 "Enforced usage limit is thus %s, of which %s are still available.",
170 name, path,
171 format_bytes(fb1, sizeof(fb1), sum),
172 format_bytes(fb2, sizeof(fb2), metrics->max_use),
173 format_bytes(fb3, sizeof(fb3), metrics->keep_free),
174 format_bytes(fb4, sizeof(fb4), ss_avail),
175 format_bytes(fb5, sizeof(fb5), s->cached_space_limit),
176 format_bytes(fb6, sizeof(fb6), s->cached_space_available));
177 }
178
179 if (available)
180 *available = s->cached_space_available;
181 if (limit)
182 *limit = s->cached_space_limit;
183
184 return 1;
185 }
186
187 static int determine_space(Server *s, bool verbose, bool patch_min_use, uint64_t *available, uint64_t *limit) {
188 JournalMetrics *metrics;
189 const char *path, *name;
190
191 assert(s);
192
193 if (s->system_journal) {
194 path = "/var/log/journal/";
195 metrics = &s->system_metrics;
196 name = "System journal";
197 } else {
198 path = "/run/log/journal/";
199 metrics = &s->runtime_metrics;
200 name = "Runtime journal";
201 }
202
203 return determine_space_for(s, metrics, path, name, verbose, patch_min_use, available, limit);
204 }
205
206 void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
207 int r;
208 #ifdef HAVE_ACL
209 _cleanup_(acl_freep) acl_t acl = NULL;
210 acl_entry_t entry;
211 acl_permset_t permset;
212 #endif
213
214 assert(f);
215
216 r = fchmod(f->fd, 0640);
217 if (r < 0)
218 log_warning_errno(errno, "Failed to fix access mode on %s, ignoring: %m", f->path);
219
220 #ifdef HAVE_ACL
221 if (uid <= SYSTEM_UID_MAX)
222 return;
223
224 acl = acl_get_fd(f->fd);
225 if (!acl) {
226 log_warning_errno(errno, "Failed to read ACL on %s, ignoring: %m", f->path);
227 return;
228 }
229
230 r = acl_find_uid(acl, uid, &entry);
231 if (r <= 0) {
232
233 if (acl_create_entry(&acl, &entry) < 0 ||
234 acl_set_tag_type(entry, ACL_USER) < 0 ||
235 acl_set_qualifier(entry, &uid) < 0) {
236 log_warning_errno(errno, "Failed to patch ACL on %s, ignoring: %m", f->path);
237 return;
238 }
239 }
240
241 /* We do not recalculate the mask unconditionally here,
242 * so that the fchmod() mask above stays intact. */
243 if (acl_get_permset(entry, &permset) < 0 ||
244 acl_add_perm(permset, ACL_READ) < 0) {
245 log_warning_errno(errno, "Failed to patch ACL on %s, ignoring: %m", f->path);
246 return;
247 }
248
249 r = calc_acl_mask_if_needed(&acl);
250 if (r < 0) {
251 log_warning_errno(r, "Failed to patch ACL on %s, ignoring: %m", f->path);
252 return;
253 }
254
255 if (acl_set_fd(f->fd, acl) < 0)
256 log_warning_errno(errno, "Failed to set ACL on %s, ignoring: %m", f->path);
257
258 #endif
259 }
260
261 static JournalFile* find_journal(Server *s, uid_t uid) {
262 _cleanup_free_ char *p = NULL;
263 int r;
264 JournalFile *f;
265 sd_id128_t machine;
266
267 assert(s);
268
269 /* We split up user logs only on /var, not on /run. If the
270 * runtime file is open, we write to it exclusively, in order
271 * to guarantee proper order as soon as we flush /run to
272 * /var and close the runtime file. */
273
274 if (s->runtime_journal)
275 return s->runtime_journal;
276
277 if (uid <= SYSTEM_UID_MAX)
278 return s->system_journal;
279
280 r = sd_id128_get_machine(&machine);
281 if (r < 0)
282 return s->system_journal;
283
284 f = ordered_hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
285 if (f)
286 return f;
287
288 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-"UID_FMT".journal",
289 SD_ID128_FORMAT_VAL(machine), uid) < 0)
290 return s->system_journal;
291
292 while (ordered_hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
293 /* Too many open? Then let's close one */
294 f = ordered_hashmap_steal_first(s->user_journals);
295 assert(f);
296 journal_file_close(f);
297 }
298
299 r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &f);
300 if (r < 0)
301 return s->system_journal;
302
303 server_fix_perms(s, f, uid);
304
305 r = ordered_hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
306 if (r < 0) {
307 journal_file_close(f);
308 return s->system_journal;
309 }
310
311 return f;
312 }
313
314 static int do_rotate(
315 Server *s,
316 JournalFile **f,
317 const char* name,
318 bool seal,
319 uint32_t uid) {
320
321 int r;
322 assert(s);
323
324 if (!*f)
325 return -EINVAL;
326
327 r = journal_file_rotate(f, s->compress, seal);
328 if (r < 0)
329 if (*f)
330 log_error_errno(r, "Failed to rotate %s: %m", (*f)->path);
331 else
332 log_error_errno(r, "Failed to create new %s journal: %m", name);
333 else
334 server_fix_perms(s, *f, uid);
335
336 return r;
337 }
338
339 void server_rotate(Server *s) {
340 JournalFile *f;
341 void *k;
342 Iterator i;
343 int r;
344
345 log_debug("Rotating...");
346
347 (void) do_rotate(s, &s->runtime_journal, "runtime", false, 0);
348 (void) do_rotate(s, &s->system_journal, "system", s->seal, 0);
349
350 ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
351 r = do_rotate(s, &f, "user", s->seal, PTR_TO_UINT32(k));
352 if (r >= 0)
353 ordered_hashmap_replace(s->user_journals, k, f);
354 else if (!f)
355 /* Old file has been closed and deallocated */
356 ordered_hashmap_remove(s->user_journals, k);
357 }
358 }
359
360 void server_sync(Server *s) {
361 JournalFile *f;
362 void *k;
363 Iterator i;
364 int r;
365
366 if (s->system_journal) {
367 r = journal_file_set_offline(s->system_journal);
368 if (r < 0)
369 log_warning_errno(r, "Failed to sync system journal, ignoring: %m");
370 }
371
372 ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
373 r = journal_file_set_offline(f);
374 if (r < 0)
375 log_warning_errno(r, "Failed to sync user journal, ignoring: %m");
376 }
377
378 if (s->sync_event_source) {
379 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
380 if (r < 0)
381 log_error_errno(r, "Failed to disable sync timer source: %m");
382 }
383
384 s->sync_scheduled = false;
385 }
386
387 static void do_vacuum(
388 Server *s,
389 JournalFile *f,
390 JournalMetrics *metrics,
391 const char *path,
392 const char *name,
393 bool verbose,
394 bool patch_min_use) {
395
396 const char *p;
397 uint64_t limit;
398 int r;
399
400 assert(s);
401 assert(metrics);
402 assert(path);
403 assert(name);
404
405 if (!f)
406 return;
407
408 p = strjoina(path, SERVER_MACHINE_ID(s));
409
410 limit = metrics->max_use;
411 (void) determine_space_for(s, metrics, path, name, verbose, patch_min_use, NULL, &limit);
412
413 r = journal_directory_vacuum(p, limit, metrics->n_max_files, s->max_retention_usec, &s->oldest_file_usec, verbose);
414 if (r < 0 && r != -ENOENT)
415 log_warning_errno(r, "Failed to vacuum %s, ignoring: %m", p);
416 }
417
418 int server_vacuum(Server *s, bool verbose, bool patch_min_use) {
419 assert(s);
420
421 log_debug("Vacuuming...");
422
423 s->oldest_file_usec = 0;
424
425 do_vacuum(s, s->system_journal, &s->system_metrics, "/var/log/journal/", "System journal", verbose, patch_min_use);
426 do_vacuum(s, s->runtime_journal, &s->runtime_metrics, "/run/log/journal/", "Runtime journal", verbose, patch_min_use);
427
428 s->cached_space_limit = 0;
429 s->cached_space_available = 0;
430 s->cached_space_timestamp = 0;
431
432 return 0;
433 }
434
435 static void server_cache_machine_id(Server *s) {
436 sd_id128_t id;
437 int r;
438
439 assert(s);
440
441 r = sd_id128_get_machine(&id);
442 if (r < 0)
443 return;
444
445 sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
446 }
447
448 static void server_cache_boot_id(Server *s) {
449 sd_id128_t id;
450 int r;
451
452 assert(s);
453
454 r = sd_id128_get_boot(&id);
455 if (r < 0)
456 return;
457
458 sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
459 }
460
461 static void server_cache_hostname(Server *s) {
462 _cleanup_free_ char *t = NULL;
463 char *x;
464
465 assert(s);
466
467 t = gethostname_malloc();
468 if (!t)
469 return;
470
471 x = strappend("_HOSTNAME=", t);
472 if (!x)
473 return;
474
475 free(s->hostname_field);
476 s->hostname_field = x;
477 }
478
479 static bool shall_try_append_again(JournalFile *f, int r) {
480
481 /* -E2BIG Hit configured limit
482 -EFBIG Hit fs limit
483 -EDQUOT Quota limit hit
484 -ENOSPC Disk full
485 -EIO I/O error of some kind (mmap)
486 -EHOSTDOWN Other machine
487 -EBUSY Unclean shutdown
488 -EPROTONOSUPPORT Unsupported feature
489 -EBADMSG Corrupted
490 -ENODATA Truncated
491 -ESHUTDOWN Already archived
492 -EIDRM Journal file has been deleted */
493
494 if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
495 log_debug("%s: Allocation limit reached, rotating.", f->path);
496 else if (r == -EHOSTDOWN)
497 log_info("%s: Journal file from other machine, rotating.", f->path);
498 else if (r == -EBUSY)
499 log_info("%s: Unclean shutdown, rotating.", f->path);
500 else if (r == -EPROTONOSUPPORT)
501 log_info("%s: Unsupported feature, rotating.", f->path);
502 else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
503 log_warning("%s: Journal file corrupted, rotating.", f->path);
504 else if (r == -EIO)
505 log_warning("%s: IO error, rotating.", f->path);
506 else if (r == -EIDRM)
507 log_warning("%s: Journal file has been deleted, rotating.", f->path);
508 else
509 return false;
510
511 return true;
512 }
513
514 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
515 JournalFile *f;
516 bool vacuumed = false;
517 int r;
518
519 assert(s);
520 assert(iovec);
521 assert(n > 0);
522
523 f = find_journal(s, uid);
524 if (!f)
525 return;
526
527 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
528 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
529 server_rotate(s);
530 server_vacuum(s, false, false);
531 vacuumed = true;
532
533 f = find_journal(s, uid);
534 if (!f)
535 return;
536 }
537
538 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
539 if (r >= 0) {
540 server_schedule_sync(s, priority);
541 return;
542 }
543
544 if (vacuumed || !shall_try_append_again(f, r)) {
545 log_error_errno(r, "Failed to write entry (%d items, %zu bytes), ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
546 return;
547 }
548
549 server_rotate(s);
550 server_vacuum(s, false, false);
551
552 f = find_journal(s, uid);
553 if (!f)
554 return;
555
556 log_debug("Retrying write.");
557 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
558 if (r < 0)
559 log_error_errno(r, "Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
560 else
561 server_schedule_sync(s, priority);
562 }
563
564 static void dispatch_message_real(
565 Server *s,
566 struct iovec *iovec, unsigned n, unsigned m,
567 const struct ucred *ucred,
568 const struct timeval *tv,
569 const char *label, size_t label_len,
570 const char *unit_id,
571 int priority,
572 pid_t object_pid) {
573
574 char pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
575 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
576 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
577 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
578 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
579 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
580 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
581 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
582 uid_t object_uid;
583 gid_t object_gid;
584 char *x;
585 int r;
586 char *t, *c;
587 uid_t realuid = 0, owner = 0, journal_uid;
588 bool owner_valid = false;
589 #ifdef HAVE_AUDIT
590 char audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
591 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
592 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
593 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
594
595 uint32_t audit;
596 uid_t loginuid;
597 #endif
598
599 assert(s);
600 assert(iovec);
601 assert(n > 0);
602 assert(n + N_IOVEC_META_FIELDS + (object_pid ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
603
604 if (ucred) {
605 realuid = ucred->uid;
606
607 sprintf(pid, "_PID="PID_FMT, ucred->pid);
608 IOVEC_SET_STRING(iovec[n++], pid);
609
610 sprintf(uid, "_UID="UID_FMT, ucred->uid);
611 IOVEC_SET_STRING(iovec[n++], uid);
612
613 sprintf(gid, "_GID="GID_FMT, ucred->gid);
614 IOVEC_SET_STRING(iovec[n++], gid);
615
616 r = get_process_comm(ucred->pid, &t);
617 if (r >= 0) {
618 x = strjoina("_COMM=", t);
619 free(t);
620 IOVEC_SET_STRING(iovec[n++], x);
621 }
622
623 r = get_process_exe(ucred->pid, &t);
624 if (r >= 0) {
625 x = strjoina("_EXE=", t);
626 free(t);
627 IOVEC_SET_STRING(iovec[n++], x);
628 }
629
630 r = get_process_cmdline(ucred->pid, 0, false, &t);
631 if (r >= 0) {
632 x = strjoina("_CMDLINE=", t);
633 free(t);
634 IOVEC_SET_STRING(iovec[n++], x);
635 }
636
637 r = get_process_capeff(ucred->pid, &t);
638 if (r >= 0) {
639 x = strjoina("_CAP_EFFECTIVE=", t);
640 free(t);
641 IOVEC_SET_STRING(iovec[n++], x);
642 }
643
644 #ifdef HAVE_AUDIT
645 r = audit_session_from_pid(ucred->pid, &audit);
646 if (r >= 0) {
647 sprintf(audit_session, "_AUDIT_SESSION=%"PRIu32, audit);
648 IOVEC_SET_STRING(iovec[n++], audit_session);
649 }
650
651 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
652 if (r >= 0) {
653 sprintf(audit_loginuid, "_AUDIT_LOGINUID="UID_FMT, loginuid);
654 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
655 }
656 #endif
657
658 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &c);
659 if (r >= 0) {
660 char *session = NULL;
661
662 x = strjoina("_SYSTEMD_CGROUP=", c);
663 IOVEC_SET_STRING(iovec[n++], x);
664
665 r = cg_path_get_session(c, &t);
666 if (r >= 0) {
667 session = strjoina("_SYSTEMD_SESSION=", t);
668 free(t);
669 IOVEC_SET_STRING(iovec[n++], session);
670 }
671
672 if (cg_path_get_owner_uid(c, &owner) >= 0) {
673 owner_valid = true;
674
675 sprintf(owner_uid, "_SYSTEMD_OWNER_UID="UID_FMT, owner);
676 IOVEC_SET_STRING(iovec[n++], owner_uid);
677 }
678
679 if (cg_path_get_unit(c, &t) >= 0) {
680 x = strjoina("_SYSTEMD_UNIT=", t);
681 free(t);
682 IOVEC_SET_STRING(iovec[n++], x);
683 } else if (unit_id && !session) {
684 x = strjoina("_SYSTEMD_UNIT=", unit_id);
685 IOVEC_SET_STRING(iovec[n++], x);
686 }
687
688 if (cg_path_get_user_unit(c, &t) >= 0) {
689 x = strjoina("_SYSTEMD_USER_UNIT=", t);
690 free(t);
691 IOVEC_SET_STRING(iovec[n++], x);
692 } else if (unit_id && session) {
693 x = strjoina("_SYSTEMD_USER_UNIT=", unit_id);
694 IOVEC_SET_STRING(iovec[n++], x);
695 }
696
697 if (cg_path_get_slice(c, &t) >= 0) {
698 x = strjoina("_SYSTEMD_SLICE=", t);
699 free(t);
700 IOVEC_SET_STRING(iovec[n++], x);
701 }
702
703 free(c);
704 } else if (unit_id) {
705 x = strjoina("_SYSTEMD_UNIT=", unit_id);
706 IOVEC_SET_STRING(iovec[n++], x);
707 }
708
709 #ifdef HAVE_SELINUX
710 if (mac_selinux_use()) {
711 if (label) {
712 x = alloca(strlen("_SELINUX_CONTEXT=") + label_len + 1);
713
714 *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
715 IOVEC_SET_STRING(iovec[n++], x);
716 } else {
717 security_context_t con;
718
719 if (getpidcon(ucred->pid, &con) >= 0) {
720 x = strjoina("_SELINUX_CONTEXT=", con);
721
722 freecon(con);
723 IOVEC_SET_STRING(iovec[n++], x);
724 }
725 }
726 }
727 #endif
728 }
729 assert(n <= m);
730
731 if (object_pid) {
732 r = get_process_uid(object_pid, &object_uid);
733 if (r >= 0) {
734 sprintf(o_uid, "OBJECT_UID="UID_FMT, object_uid);
735 IOVEC_SET_STRING(iovec[n++], o_uid);
736 }
737
738 r = get_process_gid(object_pid, &object_gid);
739 if (r >= 0) {
740 sprintf(o_gid, "OBJECT_GID="GID_FMT, object_gid);
741 IOVEC_SET_STRING(iovec[n++], o_gid);
742 }
743
744 r = get_process_comm(object_pid, &t);
745 if (r >= 0) {
746 x = strjoina("OBJECT_COMM=", t);
747 free(t);
748 IOVEC_SET_STRING(iovec[n++], x);
749 }
750
751 r = get_process_exe(object_pid, &t);
752 if (r >= 0) {
753 x = strjoina("OBJECT_EXE=", t);
754 free(t);
755 IOVEC_SET_STRING(iovec[n++], x);
756 }
757
758 r = get_process_cmdline(object_pid, 0, false, &t);
759 if (r >= 0) {
760 x = strjoina("OBJECT_CMDLINE=", t);
761 free(t);
762 IOVEC_SET_STRING(iovec[n++], x);
763 }
764
765 #ifdef HAVE_AUDIT
766 r = audit_session_from_pid(object_pid, &audit);
767 if (r >= 0) {
768 sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%"PRIu32, audit);
769 IOVEC_SET_STRING(iovec[n++], o_audit_session);
770 }
771
772 r = audit_loginuid_from_pid(object_pid, &loginuid);
773 if (r >= 0) {
774 sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID="UID_FMT, loginuid);
775 IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
776 }
777 #endif
778
779 r = cg_pid_get_path_shifted(object_pid, s->cgroup_root, &c);
780 if (r >= 0) {
781 x = strjoina("OBJECT_SYSTEMD_CGROUP=", c);
782 IOVEC_SET_STRING(iovec[n++], x);
783
784 r = cg_path_get_session(c, &t);
785 if (r >= 0) {
786 x = strjoina("OBJECT_SYSTEMD_SESSION=", t);
787 free(t);
788 IOVEC_SET_STRING(iovec[n++], x);
789 }
790
791 if (cg_path_get_owner_uid(c, &owner) >= 0) {
792 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID="UID_FMT, owner);
793 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
794 }
795
796 if (cg_path_get_unit(c, &t) >= 0) {
797 x = strjoina("OBJECT_SYSTEMD_UNIT=", t);
798 free(t);
799 IOVEC_SET_STRING(iovec[n++], x);
800 }
801
802 if (cg_path_get_user_unit(c, &t) >= 0) {
803 x = strjoina("OBJECT_SYSTEMD_USER_UNIT=", t);
804 free(t);
805 IOVEC_SET_STRING(iovec[n++], x);
806 }
807
808 free(c);
809 }
810 }
811 assert(n <= m);
812
813 if (tv) {
814 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv));
815 IOVEC_SET_STRING(iovec[n++], source_time);
816 }
817
818 /* Note that strictly speaking storing the boot id here is
819 * redundant since the entry includes this in-line
820 * anyway. However, we need this indexed, too. */
821 if (!isempty(s->boot_id_field))
822 IOVEC_SET_STRING(iovec[n++], s->boot_id_field);
823
824 if (!isempty(s->machine_id_field))
825 IOVEC_SET_STRING(iovec[n++], s->machine_id_field);
826
827 if (!isempty(s->hostname_field))
828 IOVEC_SET_STRING(iovec[n++], s->hostname_field);
829
830 assert(n <= m);
831
832 if (s->split_mode == SPLIT_UID && realuid > 0)
833 /* Split up strictly by any UID */
834 journal_uid = realuid;
835 else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
836 /* Split up by login UIDs. We do this only if the
837 * realuid is not root, in order not to accidentally
838 * leak privileged information to the user that is
839 * logged by a privileged process that is part of an
840 * unprivileged session. */
841 journal_uid = owner;
842 else
843 journal_uid = 0;
844
845 write_to_journal(s, journal_uid, iovec, n, priority);
846 }
847
848 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
849 char mid[11 + 32 + 1];
850 char buffer[16 + LINE_MAX + 1];
851 struct iovec iovec[N_IOVEC_META_FIELDS + 6];
852 int n = 0;
853 va_list ap;
854 struct ucred ucred = {};
855
856 assert(s);
857 assert(format);
858
859 IOVEC_SET_STRING(iovec[n++], "SYSLOG_FACILITY=3");
860 IOVEC_SET_STRING(iovec[n++], "SYSLOG_IDENTIFIER=systemd-journald");
861
862 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
863 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
864
865 memcpy(buffer, "MESSAGE=", 8);
866 va_start(ap, format);
867 vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
868 va_end(ap);
869 IOVEC_SET_STRING(iovec[n++], buffer);
870
871 if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
872 snprintf(mid, sizeof(mid), LOG_MESSAGE_ID(message_id));
873 IOVEC_SET_STRING(iovec[n++], mid);
874 }
875
876 ucred.pid = getpid();
877 ucred.uid = getuid();
878 ucred.gid = getgid();
879
880 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
881 }
882
883 void server_dispatch_message(
884 Server *s,
885 struct iovec *iovec, unsigned n, unsigned m,
886 const struct ucred *ucred,
887 const struct timeval *tv,
888 const char *label, size_t label_len,
889 const char *unit_id,
890 int priority,
891 pid_t object_pid) {
892
893 int rl, r;
894 _cleanup_free_ char *path = NULL;
895 uint64_t available = 0;
896 char *c;
897
898 assert(s);
899 assert(iovec || n == 0);
900
901 if (n == 0)
902 return;
903
904 if (LOG_PRI(priority) > s->max_level_store)
905 return;
906
907 /* Stop early in case the information will not be stored
908 * in a journal. */
909 if (s->storage == STORAGE_NONE)
910 return;
911
912 if (!ucred)
913 goto finish;
914
915 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &path);
916 if (r < 0)
917 goto finish;
918
919 /* example: /user/lennart/3/foobar
920 * /system/dbus.service/foobar
921 *
922 * So let's cut of everything past the third /, since that is
923 * where user directories start */
924
925 c = strchr(path, '/');
926 if (c) {
927 c = strchr(c+1, '/');
928 if (c) {
929 c = strchr(c+1, '/');
930 if (c)
931 *c = 0;
932 }
933 }
934
935 (void) determine_space(s, false, false, &available, NULL);
936 rl = journal_rate_limit_test(s->rate_limit, path, priority & LOG_PRIMASK, available);
937 if (rl == 0)
938 return;
939
940 /* Write a suppression message if we suppressed something */
941 if (rl > 1)
942 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
943 "Suppressed %u messages from %s", rl - 1, path);
944
945 finish:
946 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, priority, object_pid);
947 }
948
949
950 static int system_journal_open(Server *s, bool flush_requested) {
951 const char *fn;
952 int r = 0;
953
954 if (!s->system_journal &&
955 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
956 (flush_requested
957 || access("/run/systemd/journal/flushed", F_OK) >= 0)) {
958
959 /* If in auto mode: first try to create the machine
960 * path, but not the prefix.
961 *
962 * If in persistent mode: create /var/log/journal and
963 * the machine path */
964
965 if (s->storage == STORAGE_PERSISTENT)
966 (void) mkdir_p("/var/log/journal/", 0755);
967
968 fn = strjoina("/var/log/journal/", SERVER_MACHINE_ID(s));
969 (void) mkdir(fn, 0755);
970
971 fn = strjoina(fn, "/system.journal");
972 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
973 if (r >= 0) {
974 server_fix_perms(s, s->system_journal, 0);
975 (void) determine_space_for(s, &s->system_metrics, "/var/log/journal/", "System journal", true, true, NULL, NULL);
976 } else if (r < 0) {
977 if (r != -ENOENT && r != -EROFS)
978 log_warning_errno(r, "Failed to open system journal: %m");
979
980 r = 0;
981 }
982 }
983
984 if (!s->runtime_journal &&
985 (s->storage != STORAGE_NONE)) {
986
987 fn = strjoina("/run/log/journal/", SERVER_MACHINE_ID(s), "/system.journal");
988
989 if (s->system_journal) {
990
991 /* Try to open the runtime journal, but only
992 * if it already exists, so that we can flush
993 * it into the system journal */
994
995 r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
996 if (r < 0) {
997 if (r != -ENOENT)
998 log_warning_errno(r, "Failed to open runtime journal: %m");
999
1000 r = 0;
1001 }
1002
1003 } else {
1004
1005 /* OK, we really need the runtime journal, so create
1006 * it if necessary. */
1007
1008 (void) mkdir("/run/log", 0755);
1009 (void) mkdir("/run/log/journal", 0755);
1010 (void) mkdir_parents(fn, 0750);
1011
1012 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
1013 if (r < 0)
1014 return log_error_errno(r, "Failed to open runtime journal: %m");
1015 }
1016
1017 if (s->runtime_journal) {
1018 server_fix_perms(s, s->runtime_journal, 0);
1019 (void) determine_space_for(s, &s->runtime_metrics, "/run/log/journal/", "Runtime journal", true, true, NULL, NULL);
1020 }
1021 }
1022
1023 return r;
1024 }
1025
1026 int server_flush_to_var(Server *s) {
1027 sd_id128_t machine;
1028 sd_journal *j = NULL;
1029 char ts[FORMAT_TIMESPAN_MAX];
1030 usec_t start;
1031 unsigned n = 0;
1032 int r;
1033
1034 assert(s);
1035
1036 if (s->storage != STORAGE_AUTO &&
1037 s->storage != STORAGE_PERSISTENT)
1038 return 0;
1039
1040 if (!s->runtime_journal)
1041 return 0;
1042
1043 (void) system_journal_open(s, true);
1044
1045 if (!s->system_journal)
1046 return 0;
1047
1048 log_debug("Flushing to /var...");
1049
1050 start = now(CLOCK_MONOTONIC);
1051
1052 r = sd_id128_get_machine(&machine);
1053 if (r < 0)
1054 return r;
1055
1056 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
1057 if (r < 0)
1058 return log_error_errno(r, "Failed to read runtime journal: %m");
1059
1060 sd_journal_set_data_threshold(j, 0);
1061
1062 SD_JOURNAL_FOREACH(j) {
1063 Object *o = NULL;
1064 JournalFile *f;
1065
1066 f = j->current_file;
1067 assert(f && f->current_offset > 0);
1068
1069 n++;
1070
1071 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1072 if (r < 0) {
1073 log_error_errno(r, "Can't read entry: %m");
1074 goto finish;
1075 }
1076
1077 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1078 if (r >= 0)
1079 continue;
1080
1081 if (!shall_try_append_again(s->system_journal, r)) {
1082 log_error_errno(r, "Can't write entry: %m");
1083 goto finish;
1084 }
1085
1086 server_rotate(s);
1087 server_vacuum(s, false, false);
1088
1089 if (!s->system_journal) {
1090 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1091 r = -EIO;
1092 goto finish;
1093 }
1094
1095 log_debug("Retrying write.");
1096 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1097 if (r < 0) {
1098 log_error_errno(r, "Can't write entry: %m");
1099 goto finish;
1100 }
1101 }
1102
1103 r = 0;
1104
1105 finish:
1106 journal_file_post_change(s->system_journal);
1107
1108 s->runtime_journal = journal_file_close(s->runtime_journal);
1109
1110 if (r >= 0)
1111 (void) rm_rf("/run/log/journal", REMOVE_ROOT);
1112
1113 sd_journal_close(j);
1114
1115 server_driver_message(s, SD_ID128_NULL, "Time spent on flushing to /var is %s for %u entries.", format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0), n);
1116
1117 return r;
1118 }
1119
1120 int server_process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1121 Server *s = userdata;
1122 struct ucred *ucred = NULL;
1123 struct timeval *tv = NULL;
1124 struct cmsghdr *cmsg;
1125 char *label = NULL;
1126 size_t label_len = 0, m;
1127 struct iovec iovec;
1128 ssize_t n;
1129 int *fds = NULL, v = 0;
1130 unsigned n_fds = 0;
1131
1132 union {
1133 struct cmsghdr cmsghdr;
1134
1135 /* We use NAME_MAX space for the SELinux label
1136 * here. The kernel currently enforces no
1137 * limit, but according to suggestions from
1138 * the SELinux people this will change and it
1139 * will probably be identical to NAME_MAX. For
1140 * now we use that, but this should be updated
1141 * one day when the final limit is known. */
1142 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1143 CMSG_SPACE(sizeof(struct timeval)) +
1144 CMSG_SPACE(sizeof(int)) + /* fd */
1145 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1146 } control = {};
1147
1148 union sockaddr_union sa = {};
1149
1150 struct msghdr msghdr = {
1151 .msg_iov = &iovec,
1152 .msg_iovlen = 1,
1153 .msg_control = &control,
1154 .msg_controllen = sizeof(control),
1155 .msg_name = &sa,
1156 .msg_namelen = sizeof(sa),
1157 };
1158
1159 assert(s);
1160 assert(fd == s->native_fd || fd == s->syslog_fd || fd == s->audit_fd);
1161
1162 if (revents != EPOLLIN) {
1163 log_error("Got invalid event from epoll for datagram fd: %"PRIx32, revents);
1164 return -EIO;
1165 }
1166
1167 /* Try to get the right size, if we can. (Not all
1168 * sockets support SIOCINQ, hence we just try, but
1169 * don't rely on it. */
1170 (void) ioctl(fd, SIOCINQ, &v);
1171
1172 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1173 m = PAGE_ALIGN(MAX3((size_t) v + 1,
1174 (size_t) LINE_MAX,
1175 ALIGN(sizeof(struct nlmsghdr)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH)) + 1);
1176
1177 if (!GREEDY_REALLOC(s->buffer, s->buffer_size, m))
1178 return log_oom();
1179
1180 iovec.iov_base = s->buffer;
1181 iovec.iov_len = s->buffer_size - 1; /* Leave room for trailing NUL we add later */
1182
1183 n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1184 if (n < 0) {
1185 if (errno == EINTR || errno == EAGAIN)
1186 return 0;
1187
1188 return log_error_errno(errno, "recvmsg() failed: %m");
1189 }
1190
1191 CMSG_FOREACH(cmsg, &msghdr) {
1192
1193 if (cmsg->cmsg_level == SOL_SOCKET &&
1194 cmsg->cmsg_type == SCM_CREDENTIALS &&
1195 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1196 ucred = (struct ucred*) CMSG_DATA(cmsg);
1197 else if (cmsg->cmsg_level == SOL_SOCKET &&
1198 cmsg->cmsg_type == SCM_SECURITY) {
1199 label = (char*) CMSG_DATA(cmsg);
1200 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1201 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1202 cmsg->cmsg_type == SO_TIMESTAMP &&
1203 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1204 tv = (struct timeval*) CMSG_DATA(cmsg);
1205 else if (cmsg->cmsg_level == SOL_SOCKET &&
1206 cmsg->cmsg_type == SCM_RIGHTS) {
1207 fds = (int*) CMSG_DATA(cmsg);
1208 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1209 }
1210 }
1211
1212 /* And a trailing NUL, just in case */
1213 s->buffer[n] = 0;
1214
1215 if (fd == s->syslog_fd) {
1216 if (n > 0 && n_fds == 0)
1217 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1218 else if (n_fds > 0)
1219 log_warning("Got file descriptors via syslog socket. Ignoring.");
1220
1221 } else if (fd == s->native_fd) {
1222 if (n > 0 && n_fds == 0)
1223 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1224 else if (n == 0 && n_fds == 1)
1225 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1226 else if (n_fds > 0)
1227 log_warning("Got too many file descriptors via native socket. Ignoring.");
1228
1229 } else {
1230 assert(fd == s->audit_fd);
1231
1232 if (n > 0 && n_fds == 0)
1233 server_process_audit_message(s, s->buffer, n, ucred, &sa, msghdr.msg_namelen);
1234 else if (n_fds > 0)
1235 log_warning("Got file descriptors via audit socket. Ignoring.");
1236 }
1237
1238 close_many(fds, n_fds);
1239 return 0;
1240 }
1241
1242 static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1243 Server *s = userdata;
1244 int r;
1245
1246 assert(s);
1247
1248 log_info("Received request to flush runtime journal from PID " PID_FMT, si->ssi_pid);
1249
1250 server_flush_to_var(s);
1251 server_sync(s);
1252 server_vacuum(s, false, false);
1253
1254 r = touch("/run/systemd/journal/flushed");
1255 if (r < 0)
1256 log_warning_errno(r, "Failed to touch /run/systemd/journal/flushed, ignoring: %m");
1257
1258 return 0;
1259 }
1260
1261 static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1262 Server *s = userdata;
1263 int r;
1264
1265 assert(s);
1266
1267 log_info("Received request to rotate journal from PID " PID_FMT, si->ssi_pid);
1268 server_rotate(s);
1269 server_vacuum(s, true, true);
1270
1271 /* Let clients know when the most recent rotation happened. */
1272 r = write_timestamp_file_atomic("/run/systemd/journal/rotated", now(CLOCK_MONOTONIC));
1273 if (r < 0)
1274 log_warning_errno(r, "Failed to write /run/systemd/journal/rotated, ignoring: %m");
1275
1276 return 0;
1277 }
1278
1279 static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1280 Server *s = userdata;
1281
1282 assert(s);
1283
1284 log_received_signal(LOG_INFO, si);
1285
1286 sd_event_exit(s->event, 0);
1287 return 0;
1288 }
1289
1290 static int dispatch_sigrtmin1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1291 Server *s = userdata;
1292 int r;
1293
1294 assert(s);
1295
1296 log_debug("Received request to sync from PID " PID_FMT, si->ssi_pid);
1297
1298 server_sync(s);
1299
1300 /* Let clients know when the most recent sync happened. */
1301 r = write_timestamp_file_atomic("/run/systemd/journal/synced", now(CLOCK_MONOTONIC));
1302 if (r < 0)
1303 log_warning_errno(r, "Failed to write /run/systemd/journal/synced, ignoring: %m");
1304
1305 return 0;
1306 }
1307
1308 static int setup_signals(Server *s) {
1309 int r;
1310
1311 assert(s);
1312
1313 assert(sigprocmask_many(SIG_SETMASK, NULL, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, SIGRTMIN+1, -1) >= 0);
1314
1315 r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
1316 if (r < 0)
1317 return r;
1318
1319 r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s);
1320 if (r < 0)
1321 return r;
1322
1323 r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s);
1324 if (r < 0)
1325 return r;
1326
1327 /* Let's process SIGTERM late, so that we flush all queued
1328 * messages to disk before we exit */
1329 r = sd_event_source_set_priority(s->sigterm_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1330 if (r < 0)
1331 return r;
1332
1333 /* When journald is invoked on the terminal (when debugging),
1334 * it's useful if C-c is handled equivalent to SIGTERM. */
1335 r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s);
1336 if (r < 0)
1337 return r;
1338
1339 r = sd_event_source_set_priority(s->sigint_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1340 if (r < 0)
1341 return r;
1342
1343 /* SIGRTMIN+1 causes an immediate sync. We process this very
1344 * late, so that everything else queued at this point is
1345 * really written to disk. Clients can watch
1346 * /run/systemd/journal/synced with inotify until its mtime
1347 * changes to see when a sync happened. */
1348 r = sd_event_add_signal(s->event, &s->sigrtmin1_event_source, SIGRTMIN+1, dispatch_sigrtmin1, s);
1349 if (r < 0)
1350 return r;
1351
1352 r = sd_event_source_set_priority(s->sigrtmin1_event_source, SD_EVENT_PRIORITY_NORMAL+15);
1353 if (r < 0)
1354 return r;
1355
1356 return 0;
1357 }
1358
1359 static int server_parse_proc_cmdline(Server *s) {
1360 _cleanup_free_ char *line = NULL;
1361 const char *p;
1362 int r;
1363
1364 r = proc_cmdline(&line);
1365 if (r < 0) {
1366 log_warning_errno(r, "Failed to read /proc/cmdline, ignoring: %m");
1367 return 0;
1368 }
1369
1370 p = line;
1371 for(;;) {
1372 _cleanup_free_ char *word;
1373
1374 r = extract_first_word(&p, &word, NULL, 0);
1375 if (r < 0)
1376 return log_error_errno(r, "Failed to parse journald syntax \"%s\": %m", line);
1377
1378 if (r == 0)
1379 break;
1380
1381 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1382 r = parse_boolean(word + 35);
1383 if (r < 0)
1384 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1385 else
1386 s->forward_to_syslog = r;
1387 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1388 r = parse_boolean(word + 33);
1389 if (r < 0)
1390 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1391 else
1392 s->forward_to_kmsg = r;
1393 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1394 r = parse_boolean(word + 36);
1395 if (r < 0)
1396 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1397 else
1398 s->forward_to_console = r;
1399 } else if (startswith(word, "systemd.journald.forward_to_wall=")) {
1400 r = parse_boolean(word + 33);
1401 if (r < 0)
1402 log_warning("Failed to parse forward to wall switch %s. Ignoring.", word + 33);
1403 else
1404 s->forward_to_wall = r;
1405 } else if (startswith(word, "systemd.journald"))
1406 log_warning("Invalid systemd.journald parameter. Ignoring.");
1407 }
1408
1409 /* do not warn about state here, since probably systemd already did */
1410 return 0;
1411 }
1412
1413 static int server_parse_config_file(Server *s) {
1414 assert(s);
1415
1416 return config_parse_many(PKGSYSCONFDIR "/journald.conf",
1417 CONF_PATHS_NULSTR("systemd/journald.conf.d"),
1418 "Journal\0",
1419 config_item_perf_lookup, journald_gperf_lookup,
1420 false, s);
1421 }
1422
1423 static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
1424 Server *s = userdata;
1425
1426 assert(s);
1427
1428 server_sync(s);
1429 return 0;
1430 }
1431
1432 int server_schedule_sync(Server *s, int priority) {
1433 int r;
1434
1435 assert(s);
1436
1437 if (priority <= LOG_CRIT) {
1438 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1439 server_sync(s);
1440 return 0;
1441 }
1442
1443 if (s->sync_scheduled)
1444 return 0;
1445
1446 if (s->sync_interval_usec > 0) {
1447 usec_t when;
1448
1449 r = sd_event_now(s->event, CLOCK_MONOTONIC, &when);
1450 if (r < 0)
1451 return r;
1452
1453 when += s->sync_interval_usec;
1454
1455 if (!s->sync_event_source) {
1456 r = sd_event_add_time(
1457 s->event,
1458 &s->sync_event_source,
1459 CLOCK_MONOTONIC,
1460 when, 0,
1461 server_dispatch_sync, s);
1462 if (r < 0)
1463 return r;
1464
1465 r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
1466 } else {
1467 r = sd_event_source_set_time(s->sync_event_source, when);
1468 if (r < 0)
1469 return r;
1470
1471 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
1472 }
1473 if (r < 0)
1474 return r;
1475
1476 s->sync_scheduled = true;
1477 }
1478
1479 return 0;
1480 }
1481
1482 static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1483 Server *s = userdata;
1484
1485 assert(s);
1486
1487 server_cache_hostname(s);
1488 return 0;
1489 }
1490
1491 static int server_open_hostname(Server *s) {
1492 int r;
1493
1494 assert(s);
1495
1496 s->hostname_fd = open("/proc/sys/kernel/hostname", O_RDONLY|O_CLOEXEC|O_NDELAY|O_NOCTTY);
1497 if (s->hostname_fd < 0)
1498 return log_error_errno(errno, "Failed to open /proc/sys/kernel/hostname: %m");
1499
1500 r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s);
1501 if (r < 0) {
1502 /* kernels prior to 3.2 don't support polling this file. Ignore
1503 * the failure. */
1504 if (r == -EPERM) {
1505 log_warning_errno(r, "Failed to register hostname fd in event loop, ignoring: %m");
1506 s->hostname_fd = safe_close(s->hostname_fd);
1507 return 0;
1508 }
1509
1510 return log_error_errno(r, "Failed to register hostname fd in event loop: %m");
1511 }
1512
1513 r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
1514 if (r < 0)
1515 return log_error_errno(r, "Failed to adjust priority of host name event source: %m");
1516
1517 return 0;
1518 }
1519
1520 static int dispatch_notify_event(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1521 Server *s = userdata;
1522 int r;
1523
1524 assert(s);
1525 assert(s->notify_event_source == es);
1526 assert(s->notify_fd == fd);
1527
1528 /* The $NOTIFY_SOCKET is writable again, now send exactly one
1529 * message on it. Either it's the wtachdog event, the initial
1530 * READY=1 event or an stdout stream event. If there's nothing
1531 * to write anymore, turn our event source off. The next time
1532 * there's something to send it will be turned on again. */
1533
1534 if (!s->sent_notify_ready) {
1535 static const char p[] =
1536 "READY=1\n"
1537 "STATUS=Processing requests...";
1538 ssize_t l;
1539
1540 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1541 if (l < 0) {
1542 if (errno == EAGAIN)
1543 return 0;
1544
1545 return log_error_errno(errno, "Failed to send READY=1 notification message: %m");
1546 }
1547
1548 s->sent_notify_ready = true;
1549 log_debug("Sent READY=1 notification.");
1550
1551 } else if (s->send_watchdog) {
1552
1553 static const char p[] =
1554 "WATCHDOG=1";
1555
1556 ssize_t l;
1557
1558 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1559 if (l < 0) {
1560 if (errno == EAGAIN)
1561 return 0;
1562
1563 return log_error_errno(errno, "Failed to send WATCHDOG=1 notification message: %m");
1564 }
1565
1566 s->send_watchdog = false;
1567 log_debug("Sent WATCHDOG=1 notification.");
1568
1569 } else if (s->stdout_streams_notify_queue)
1570 /* Dispatch one stream notification event */
1571 stdout_stream_send_notify(s->stdout_streams_notify_queue);
1572
1573 /* Leave us enabled if there's still more to to do. */
1574 if (s->send_watchdog || s->stdout_streams_notify_queue)
1575 return 0;
1576
1577 /* There was nothing to do anymore, let's turn ourselves off. */
1578 r = sd_event_source_set_enabled(es, SD_EVENT_OFF);
1579 if (r < 0)
1580 return log_error_errno(r, "Failed to turn off notify event source: %m");
1581
1582 return 0;
1583 }
1584
1585 static int dispatch_watchdog(sd_event_source *es, uint64_t usec, void *userdata) {
1586 Server *s = userdata;
1587 int r;
1588
1589 assert(s);
1590
1591 s->send_watchdog = true;
1592
1593 r = sd_event_source_set_enabled(s->notify_event_source, SD_EVENT_ON);
1594 if (r < 0)
1595 log_warning_errno(r, "Failed to turn on notify event source: %m");
1596
1597 r = sd_event_source_set_time(s->watchdog_event_source, usec + s->watchdog_usec / 2);
1598 if (r < 0)
1599 return log_error_errno(r, "Failed to restart watchdog event source: %m");
1600
1601 r = sd_event_source_set_enabled(s->watchdog_event_source, SD_EVENT_ON);
1602 if (r < 0)
1603 return log_error_errno(r, "Failed to enable watchdog event source: %m");
1604
1605 return 0;
1606 }
1607
1608 static int server_connect_notify(Server *s) {
1609 union sockaddr_union sa = {
1610 .un.sun_family = AF_UNIX,
1611 };
1612 const char *e;
1613 int r;
1614
1615 assert(s);
1616 assert(s->notify_fd < 0);
1617 assert(!s->notify_event_source);
1618
1619 /*
1620 So here's the problem: we'd like to send notification
1621 messages to PID 1, but we cannot do that via sd_notify(),
1622 since that's synchronous, and we might end up blocking on
1623 it. Specifically: given that PID 1 might block on
1624 dbus-daemon during IPC, and dbus-daemon is logging to us,
1625 and might hence block on us, we might end up in a deadlock
1626 if we block on sending PID 1 notification messages -- by
1627 generating a full blocking circle. To avoid this, let's
1628 create a non-blocking socket, and connect it to the
1629 notification socket, and then wait for POLLOUT before we
1630 send anything. This should efficiently avoid any deadlocks,
1631 as we'll never block on PID 1, hence PID 1 can safely block
1632 on dbus-daemon which can safely block on us again.
1633
1634 Don't think that this issue is real? It is, see:
1635 https://github.com/systemd/systemd/issues/1505
1636 */
1637
1638 e = getenv("NOTIFY_SOCKET");
1639 if (!e)
1640 return 0;
1641
1642 if ((e[0] != '@' && e[0] != '/') || e[1] == 0) {
1643 log_error("NOTIFY_SOCKET set to an invalid value: %s", e);
1644 return -EINVAL;
1645 }
1646
1647 if (strlen(e) > sizeof(sa.un.sun_path)) {
1648 log_error("NOTIFY_SOCKET path too long: %s", e);
1649 return -EINVAL;
1650 }
1651
1652 s->notify_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
1653 if (s->notify_fd < 0)
1654 return log_error_errno(errno, "Failed to create notify socket: %m");
1655
1656 (void) fd_inc_sndbuf(s->notify_fd, NOTIFY_SNDBUF_SIZE);
1657
1658 strncpy(sa.un.sun_path, e, sizeof(sa.un.sun_path));
1659 if (sa.un.sun_path[0] == '@')
1660 sa.un.sun_path[0] = 0;
1661
1662 r = connect(s->notify_fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(e));
1663 if (r < 0)
1664 return log_error_errno(errno, "Failed to connect to notify socket: %m");
1665
1666 r = sd_event_add_io(s->event, &s->notify_event_source, s->notify_fd, EPOLLOUT, dispatch_notify_event, s);
1667 if (r < 0)
1668 return log_error_errno(r, "Failed to watch notification socket: %m");
1669
1670 if (sd_watchdog_enabled(false, &s->watchdog_usec) > 0) {
1671 s->send_watchdog = true;
1672
1673 r = sd_event_add_time(s->event, &s->watchdog_event_source, CLOCK_MONOTONIC, now(CLOCK_MONOTONIC) + s->watchdog_usec/2, s->watchdog_usec/4, dispatch_watchdog, s);
1674 if (r < 0)
1675 return log_error_errno(r, "Failed to add watchdog time event: %m");
1676 }
1677
1678 /* This should fire pretty soon, which we'll use to send the
1679 * READY=1 event. */
1680
1681 return 0;
1682 }
1683
1684 int server_init(Server *s) {
1685 _cleanup_fdset_free_ FDSet *fds = NULL;
1686 int n, r, fd;
1687 bool no_sockets;
1688
1689 assert(s);
1690
1691 zero(*s);
1692 s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->audit_fd = s->hostname_fd = s->notify_fd = -1;
1693 s->compress = true;
1694 s->seal = true;
1695
1696 s->watchdog_usec = USEC_INFINITY;
1697
1698 s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1699 s->sync_scheduled = false;
1700
1701 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1702 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1703
1704 s->forward_to_wall = true;
1705
1706 s->max_file_usec = DEFAULT_MAX_FILE_USEC;
1707
1708 s->max_level_store = LOG_DEBUG;
1709 s->max_level_syslog = LOG_DEBUG;
1710 s->max_level_kmsg = LOG_NOTICE;
1711 s->max_level_console = LOG_INFO;
1712 s->max_level_wall = LOG_EMERG;
1713
1714 journal_reset_metrics(&s->system_metrics);
1715 journal_reset_metrics(&s->runtime_metrics);
1716
1717 server_parse_config_file(s);
1718 server_parse_proc_cmdline(s);
1719
1720 if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1721 log_debug("Setting both rate limit interval and burst from "USEC_FMT",%u to 0,0",
1722 s->rate_limit_interval, s->rate_limit_burst);
1723 s->rate_limit_interval = s->rate_limit_burst = 0;
1724 }
1725
1726 (void) mkdir_p("/run/systemd/journal", 0755);
1727
1728 s->user_journals = ordered_hashmap_new(NULL);
1729 if (!s->user_journals)
1730 return log_oom();
1731
1732 s->mmap = mmap_cache_new();
1733 if (!s->mmap)
1734 return log_oom();
1735
1736 r = sd_event_default(&s->event);
1737 if (r < 0)
1738 return log_error_errno(r, "Failed to create event loop: %m");
1739
1740 n = sd_listen_fds(true);
1741 if (n < 0)
1742 return log_error_errno(n, "Failed to read listening file descriptors from environment: %m");
1743
1744 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1745
1746 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1747
1748 if (s->native_fd >= 0) {
1749 log_error("Too many native sockets passed.");
1750 return -EINVAL;
1751 }
1752
1753 s->native_fd = fd;
1754
1755 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1756
1757 if (s->stdout_fd >= 0) {
1758 log_error("Too many stdout sockets passed.");
1759 return -EINVAL;
1760 }
1761
1762 s->stdout_fd = fd;
1763
1764 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0 ||
1765 sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/dev-log", 0) > 0) {
1766
1767 if (s->syslog_fd >= 0) {
1768 log_error("Too many /dev/log sockets passed.");
1769 return -EINVAL;
1770 }
1771
1772 s->syslog_fd = fd;
1773
1774 } else if (sd_is_socket(fd, AF_NETLINK, SOCK_RAW, -1) > 0) {
1775
1776 if (s->audit_fd >= 0) {
1777 log_error("Too many audit sockets passed.");
1778 return -EINVAL;
1779 }
1780
1781 s->audit_fd = fd;
1782
1783 } else {
1784
1785 if (!fds) {
1786 fds = fdset_new();
1787 if (!fds)
1788 return log_oom();
1789 }
1790
1791 r = fdset_put(fds, fd);
1792 if (r < 0)
1793 return log_oom();
1794 }
1795 }
1796
1797 /* Try to restore streams, but don't bother if this fails */
1798 (void) server_restore_streams(s, fds);
1799
1800 if (fdset_size(fds) > 0) {
1801 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds));
1802 fds = fdset_free(fds);
1803 }
1804
1805 no_sockets = s->native_fd < 0 && s->stdout_fd < 0 && s->syslog_fd < 0 && s->audit_fd < 0;
1806
1807 /* always open stdout, syslog, native, and kmsg sockets */
1808
1809 /* systemd-journald.socket: /run/systemd/journal/stdout */
1810 r = server_open_stdout_socket(s);
1811 if (r < 0)
1812 return r;
1813
1814 /* systemd-journald-dev-log.socket: /run/systemd/journal/dev-log */
1815 r = server_open_syslog_socket(s);
1816 if (r < 0)
1817 return r;
1818
1819 /* systemd-journald.socket: /run/systemd/journal/socket */
1820 r = server_open_native_socket(s);
1821 if (r < 0)
1822 return r;
1823
1824 /* /dev/ksmg */
1825 r = server_open_dev_kmsg(s);
1826 if (r < 0)
1827 return r;
1828
1829 /* Unless we got *some* sockets and not audit, open audit socket */
1830 if (s->audit_fd >= 0 || no_sockets) {
1831 r = server_open_audit(s);
1832 if (r < 0)
1833 return r;
1834 }
1835
1836 r = server_open_kernel_seqnum(s);
1837 if (r < 0)
1838 return r;
1839
1840 r = server_open_hostname(s);
1841 if (r < 0)
1842 return r;
1843
1844 r = setup_signals(s);
1845 if (r < 0)
1846 return r;
1847
1848 s->udev = udev_new();
1849 if (!s->udev)
1850 return -ENOMEM;
1851
1852 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
1853 if (!s->rate_limit)
1854 return -ENOMEM;
1855
1856 r = cg_get_root_path(&s->cgroup_root);
1857 if (r < 0)
1858 return r;
1859
1860 server_cache_hostname(s);
1861 server_cache_boot_id(s);
1862 server_cache_machine_id(s);
1863
1864 (void) server_connect_notify(s);
1865
1866 return system_journal_open(s, false);
1867 }
1868
1869 void server_maybe_append_tags(Server *s) {
1870 #ifdef HAVE_GCRYPT
1871 JournalFile *f;
1872 Iterator i;
1873 usec_t n;
1874
1875 n = now(CLOCK_REALTIME);
1876
1877 if (s->system_journal)
1878 journal_file_maybe_append_tag(s->system_journal, n);
1879
1880 ORDERED_HASHMAP_FOREACH(f, s->user_journals, i)
1881 journal_file_maybe_append_tag(f, n);
1882 #endif
1883 }
1884
1885 void server_done(Server *s) {
1886 JournalFile *f;
1887 assert(s);
1888
1889 while (s->stdout_streams)
1890 stdout_stream_free(s->stdout_streams);
1891
1892 if (s->system_journal)
1893 journal_file_close(s->system_journal);
1894
1895 if (s->runtime_journal)
1896 journal_file_close(s->runtime_journal);
1897
1898 while ((f = ordered_hashmap_steal_first(s->user_journals)))
1899 journal_file_close(f);
1900
1901 ordered_hashmap_free(s->user_journals);
1902
1903 sd_event_source_unref(s->syslog_event_source);
1904 sd_event_source_unref(s->native_event_source);
1905 sd_event_source_unref(s->stdout_event_source);
1906 sd_event_source_unref(s->dev_kmsg_event_source);
1907 sd_event_source_unref(s->audit_event_source);
1908 sd_event_source_unref(s->sync_event_source);
1909 sd_event_source_unref(s->sigusr1_event_source);
1910 sd_event_source_unref(s->sigusr2_event_source);
1911 sd_event_source_unref(s->sigterm_event_source);
1912 sd_event_source_unref(s->sigint_event_source);
1913 sd_event_source_unref(s->sigrtmin1_event_source);
1914 sd_event_source_unref(s->hostname_event_source);
1915 sd_event_source_unref(s->notify_event_source);
1916 sd_event_source_unref(s->watchdog_event_source);
1917 sd_event_unref(s->event);
1918
1919 safe_close(s->syslog_fd);
1920 safe_close(s->native_fd);
1921 safe_close(s->stdout_fd);
1922 safe_close(s->dev_kmsg_fd);
1923 safe_close(s->audit_fd);
1924 safe_close(s->hostname_fd);
1925 safe_close(s->notify_fd);
1926
1927 if (s->rate_limit)
1928 journal_rate_limit_free(s->rate_limit);
1929
1930 if (s->kernel_seqnum)
1931 munmap(s->kernel_seqnum, sizeof(uint64_t));
1932
1933 free(s->buffer);
1934 free(s->tty_path);
1935 free(s->cgroup_root);
1936 free(s->hostname_field);
1937
1938 if (s->mmap)
1939 mmap_cache_unref(s->mmap);
1940
1941 udev_unref(s->udev);
1942 }
1943
1944 static const char* const storage_table[_STORAGE_MAX] = {
1945 [STORAGE_AUTO] = "auto",
1946 [STORAGE_VOLATILE] = "volatile",
1947 [STORAGE_PERSISTENT] = "persistent",
1948 [STORAGE_NONE] = "none"
1949 };
1950
1951 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
1952 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
1953
1954 static const char* const split_mode_table[_SPLIT_MAX] = {
1955 [SPLIT_LOGIN] = "login",
1956 [SPLIT_UID] = "uid",
1957 [SPLIT_NONE] = "none",
1958 };
1959
1960 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
1961 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");