]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/journal/journald-server.c
Merge pull request #2543 from keszybz/build-sys-and-man-fixes
[thirdparty/systemd.git] / src / journal / journald-server.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2011 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #ifdef HAVE_SELINUX
23 #include <selinux/selinux.h>
24 #endif
25 #include <sys/ioctl.h>
26 #include <sys/mman.h>
27 #include <sys/signalfd.h>
28 #include <sys/statvfs.h>
29 #include <linux/sockios.h>
30
31 #include "libudev.h"
32 #include "sd-daemon.h"
33 #include "sd-journal.h"
34 #include "sd-messages.h"
35
36 #include "acl-util.h"
37 #include "alloc-util.h"
38 #include "audit-util.h"
39 #include "cgroup-util.h"
40 #include "conf-parser.h"
41 #include "dirent-util.h"
42 #include "extract-word.h"
43 #include "fd-util.h"
44 #include "fileio.h"
45 #include "formats-util.h"
46 #include "fs-util.h"
47 #include "hashmap.h"
48 #include "hostname-util.h"
49 #include "io-util.h"
50 #include "journal-authenticate.h"
51 #include "journal-file.h"
52 #include "journal-internal.h"
53 #include "journal-vacuum.h"
54 #include "journald-audit.h"
55 #include "journald-kmsg.h"
56 #include "journald-native.h"
57 #include "journald-rate-limit.h"
58 #include "journald-server.h"
59 #include "journald-stream.h"
60 #include "journald-syslog.h"
61 #include "missing.h"
62 #include "mkdir.h"
63 #include "parse-util.h"
64 #include "proc-cmdline.h"
65 #include "process-util.h"
66 #include "rm-rf.h"
67 #include "selinux-util.h"
68 #include "signal-util.h"
69 #include "socket-util.h"
70 #include "stdio-util.h"
71 #include "string-table.h"
72 #include "string-util.h"
73 #include "user-util.h"
74 #include "log.h"
75
76 #define USER_JOURNALS_MAX 1024
77
78 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
79 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
80 #define DEFAULT_RATE_LIMIT_BURST 1000
81 #define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
82
83 #define RECHECK_SPACE_USEC (30*USEC_PER_SEC)
84
85 #define NOTIFY_SNDBUF_SIZE (8*1024*1024)
86
87 /* The period to insert between posting changes for coalescing */
88 #define POST_CHANGE_TIMER_INTERVAL_USEC (250*USEC_PER_MSEC)
89
90 static int determine_space_for(
91 Server *s,
92 JournalMetrics *metrics,
93 const char *path,
94 const char *name,
95 bool verbose,
96 bool patch_min_use,
97 uint64_t *available,
98 uint64_t *limit) {
99
100 uint64_t sum = 0, ss_avail, avail;
101 _cleanup_closedir_ DIR *d = NULL;
102 struct dirent *de;
103 struct statvfs ss;
104 const char *p;
105 usec_t ts;
106
107 assert(s);
108 assert(metrics);
109 assert(path);
110 assert(name);
111
112 ts = now(CLOCK_MONOTONIC);
113
114 if (!verbose && s->cached_space_timestamp + RECHECK_SPACE_USEC > ts) {
115
116 if (available)
117 *available = s->cached_space_available;
118 if (limit)
119 *limit = s->cached_space_limit;
120
121 return 0;
122 }
123
124 p = strjoina(path, SERVER_MACHINE_ID(s));
125 d = opendir(p);
126 if (!d)
127 return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR, errno, "Failed to open %s: %m", p);
128
129 if (fstatvfs(dirfd(d), &ss) < 0)
130 return log_error_errno(errno, "Failed to fstatvfs(%s): %m", p);
131
132 FOREACH_DIRENT_ALL(de, d, break) {
133 struct stat st;
134
135 if (!endswith(de->d_name, ".journal") &&
136 !endswith(de->d_name, ".journal~"))
137 continue;
138
139 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) {
140 log_debug_errno(errno, "Failed to stat %s/%s, ignoring: %m", p, de->d_name);
141 continue;
142 }
143
144 if (!S_ISREG(st.st_mode))
145 continue;
146
147 sum += (uint64_t) st.st_blocks * 512UL;
148 }
149
150 /* If requested, then let's bump the min_use limit to the
151 * current usage on disk. We do this when starting up and
152 * first opening the journal files. This way sudden spikes in
153 * disk usage will not cause journald to vacuum files without
154 * bounds. Note that this means that only a restart of
155 * journald will make it reset this value. */
156
157 if (patch_min_use)
158 metrics->min_use = MAX(metrics->min_use, sum);
159
160 ss_avail = ss.f_bsize * ss.f_bavail;
161 avail = LESS_BY(ss_avail, metrics->keep_free);
162
163 s->cached_space_limit = MIN(MAX(sum + avail, metrics->min_use), metrics->max_use);
164 s->cached_space_available = LESS_BY(s->cached_space_limit, sum);
165 s->cached_space_timestamp = ts;
166
167 if (verbose) {
168 char fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
169 fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX], fb6[FORMAT_BYTES_MAX];
170 format_bytes(fb1, sizeof(fb1), sum);
171 format_bytes(fb2, sizeof(fb2), metrics->max_use);
172 format_bytes(fb3, sizeof(fb3), metrics->keep_free);
173 format_bytes(fb4, sizeof(fb4), ss_avail);
174 format_bytes(fb5, sizeof(fb5), s->cached_space_limit);
175 format_bytes(fb6, sizeof(fb6), s->cached_space_available);
176
177 server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
178 LOG_MESSAGE("%s (%s) is %s, max %s, %s free.",
179 name, path, fb1, fb5, fb6),
180 "JOURNAL_NAME=%s", name,
181 "JOURNAL_PATH=%s", path,
182 "CURRENT_USE=%"PRIu64, sum,
183 "CURRENT_USE_PRETTY=%s", fb1,
184 "MAX_USE=%"PRIu64, metrics->max_use,
185 "MAX_USE_PRETTY=%s", fb2,
186 "DISK_KEEP_FREE=%"PRIu64, metrics->keep_free,
187 "DISK_KEEP_FREE_PRETTY=%s", fb3,
188 "DISK_AVAILABLE=%"PRIu64, ss_avail,
189 "DISK_AVAILABLE_PRETTY=%s", fb4,
190 "LIMIT=%"PRIu64, s->cached_space_limit,
191 "LIMIT_PRETTY=%s", fb5,
192 "AVAILABLE=%"PRIu64, s->cached_space_available,
193 "AVAILABLE_PRETTY=%s", fb6,
194 NULL);
195 }
196
197 if (available)
198 *available = s->cached_space_available;
199 if (limit)
200 *limit = s->cached_space_limit;
201
202 return 1;
203 }
204
205 static int determine_space(Server *s, bool verbose, bool patch_min_use, uint64_t *available, uint64_t *limit) {
206 JournalMetrics *metrics;
207 const char *path, *name;
208
209 assert(s);
210
211 if (s->system_journal) {
212 path = "/var/log/journal/";
213 metrics = &s->system_metrics;
214 name = "System journal";
215 } else {
216 path = "/run/log/journal/";
217 metrics = &s->runtime_metrics;
218 name = "Runtime journal";
219 }
220
221 return determine_space_for(s, metrics, path, name, verbose, patch_min_use, available, limit);
222 }
223
224 static void server_add_acls(JournalFile *f, uid_t uid) {
225 #ifdef HAVE_ACL
226 int r;
227 #endif
228 assert(f);
229
230 #ifdef HAVE_ACL
231 if (uid <= SYSTEM_UID_MAX)
232 return;
233
234 r = add_acls_for_user(f->fd, uid);
235 if (r < 0)
236 log_warning_errno(r, "Failed to set ACL on %s, ignoring: %m", f->path);
237 #endif
238 }
239
240 static int open_journal(
241 Server *s,
242 bool reliably,
243 const char *fname,
244 int flags,
245 bool seal,
246 JournalMetrics *metrics,
247 JournalFile **ret) {
248 int r;
249 JournalFile *f;
250
251 assert(s);
252 assert(fname);
253 assert(ret);
254
255 if (reliably)
256 r = journal_file_open_reliably(fname, flags, 0640, s->compress, seal, metrics, s->mmap, NULL, &f);
257 else
258 r = journal_file_open(fname, flags, 0640, s->compress, seal, metrics, s->mmap, NULL, &f);
259 if (r < 0)
260 return r;
261
262 r = journal_file_enable_post_change_timer(f, s->event, POST_CHANGE_TIMER_INTERVAL_USEC);
263 if (r < 0) {
264 journal_file_close(f);
265 return r;
266 }
267
268 *ret = f;
269 return r;
270 }
271
272 static JournalFile* find_journal(Server *s, uid_t uid) {
273 _cleanup_free_ char *p = NULL;
274 int r;
275 JournalFile *f;
276 sd_id128_t machine;
277
278 assert(s);
279
280 /* We split up user logs only on /var, not on /run. If the
281 * runtime file is open, we write to it exclusively, in order
282 * to guarantee proper order as soon as we flush /run to
283 * /var and close the runtime file. */
284
285 if (s->runtime_journal)
286 return s->runtime_journal;
287
288 if (uid <= SYSTEM_UID_MAX)
289 return s->system_journal;
290
291 r = sd_id128_get_machine(&machine);
292 if (r < 0)
293 return s->system_journal;
294
295 f = ordered_hashmap_get(s->user_journals, UID_TO_PTR(uid));
296 if (f)
297 return f;
298
299 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-"UID_FMT".journal",
300 SD_ID128_FORMAT_VAL(machine), uid) < 0)
301 return s->system_journal;
302
303 while (ordered_hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
304 /* Too many open? Then let's close one */
305 f = ordered_hashmap_steal_first(s->user_journals);
306 assert(f);
307 journal_file_close(f);
308 }
309
310 r = open_journal(s, true, p, O_RDWR|O_CREAT, s->seal, &s->system_metrics, &f);
311 if (r < 0)
312 return s->system_journal;
313
314 server_add_acls(f, uid);
315
316 r = ordered_hashmap_put(s->user_journals, UID_TO_PTR(uid), f);
317 if (r < 0) {
318 journal_file_close(f);
319 return s->system_journal;
320 }
321
322 return f;
323 }
324
325 static int do_rotate(
326 Server *s,
327 JournalFile **f,
328 const char* name,
329 bool seal,
330 uint32_t uid) {
331
332 int r;
333 assert(s);
334
335 if (!*f)
336 return -EINVAL;
337
338 r = journal_file_rotate(f, s->compress, seal);
339 if (r < 0)
340 if (*f)
341 log_error_errno(r, "Failed to rotate %s: %m", (*f)->path);
342 else
343 log_error_errno(r, "Failed to create new %s journal: %m", name);
344 else
345 server_add_acls(*f, uid);
346
347 return r;
348 }
349
350 void server_rotate(Server *s) {
351 JournalFile *f;
352 void *k;
353 Iterator i;
354 int r;
355
356 log_debug("Rotating...");
357
358 (void) do_rotate(s, &s->runtime_journal, "runtime", false, 0);
359 (void) do_rotate(s, &s->system_journal, "system", s->seal, 0);
360
361 ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
362 r = do_rotate(s, &f, "user", s->seal, PTR_TO_UID(k));
363 if (r >= 0)
364 ordered_hashmap_replace(s->user_journals, k, f);
365 else if (!f)
366 /* Old file has been closed and deallocated */
367 ordered_hashmap_remove(s->user_journals, k);
368 }
369 }
370
371 void server_sync(Server *s) {
372 JournalFile *f;
373 Iterator i;
374 int r;
375
376 if (s->system_journal) {
377 r = journal_file_set_offline(s->system_journal);
378 if (r < 0)
379 log_warning_errno(r, "Failed to sync system journal, ignoring: %m");
380 }
381
382 ORDERED_HASHMAP_FOREACH(f, s->user_journals, i) {
383 r = journal_file_set_offline(f);
384 if (r < 0)
385 log_warning_errno(r, "Failed to sync user journal, ignoring: %m");
386 }
387
388 if (s->sync_event_source) {
389 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
390 if (r < 0)
391 log_error_errno(r, "Failed to disable sync timer source: %m");
392 }
393
394 s->sync_scheduled = false;
395 }
396
397 static void do_vacuum(
398 Server *s,
399 JournalFile *f,
400 JournalMetrics *metrics,
401 const char *path,
402 const char *name,
403 bool verbose,
404 bool patch_min_use) {
405
406 const char *p;
407 uint64_t limit;
408 int r;
409
410 assert(s);
411 assert(metrics);
412 assert(path);
413 assert(name);
414
415 if (!f)
416 return;
417
418 p = strjoina(path, SERVER_MACHINE_ID(s));
419
420 limit = metrics->max_use;
421 (void) determine_space_for(s, metrics, path, name, verbose, patch_min_use, NULL, &limit);
422
423 r = journal_directory_vacuum(p, limit, metrics->n_max_files, s->max_retention_usec, &s->oldest_file_usec, verbose);
424 if (r < 0 && r != -ENOENT)
425 log_warning_errno(r, "Failed to vacuum %s, ignoring: %m", p);
426 }
427
428 int server_vacuum(Server *s, bool verbose, bool patch_min_use) {
429 assert(s);
430
431 log_debug("Vacuuming...");
432
433 s->oldest_file_usec = 0;
434
435 do_vacuum(s, s->system_journal, &s->system_metrics, "/var/log/journal/", "System journal", verbose, patch_min_use);
436 do_vacuum(s, s->runtime_journal, &s->runtime_metrics, "/run/log/journal/", "Runtime journal", verbose, patch_min_use);
437
438 s->cached_space_limit = 0;
439 s->cached_space_available = 0;
440 s->cached_space_timestamp = 0;
441
442 return 0;
443 }
444
445 static void server_cache_machine_id(Server *s) {
446 sd_id128_t id;
447 int r;
448
449 assert(s);
450
451 r = sd_id128_get_machine(&id);
452 if (r < 0)
453 return;
454
455 sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
456 }
457
458 static void server_cache_boot_id(Server *s) {
459 sd_id128_t id;
460 int r;
461
462 assert(s);
463
464 r = sd_id128_get_boot(&id);
465 if (r < 0)
466 return;
467
468 sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
469 }
470
471 static void server_cache_hostname(Server *s) {
472 _cleanup_free_ char *t = NULL;
473 char *x;
474
475 assert(s);
476
477 t = gethostname_malloc();
478 if (!t)
479 return;
480
481 x = strappend("_HOSTNAME=", t);
482 if (!x)
483 return;
484
485 free(s->hostname_field);
486 s->hostname_field = x;
487 }
488
489 static bool shall_try_append_again(JournalFile *f, int r) {
490
491 /* -E2BIG Hit configured limit
492 -EFBIG Hit fs limit
493 -EDQUOT Quota limit hit
494 -ENOSPC Disk full
495 -EIO I/O error of some kind (mmap)
496 -EHOSTDOWN Other machine
497 -EBUSY Unclean shutdown
498 -EPROTONOSUPPORT Unsupported feature
499 -EBADMSG Corrupted
500 -ENODATA Truncated
501 -ESHUTDOWN Already archived
502 -EIDRM Journal file has been deleted */
503
504 if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
505 log_debug("%s: Allocation limit reached, rotating.", f->path);
506 else if (r == -EHOSTDOWN)
507 log_info("%s: Journal file from other machine, rotating.", f->path);
508 else if (r == -EBUSY)
509 log_info("%s: Unclean shutdown, rotating.", f->path);
510 else if (r == -EPROTONOSUPPORT)
511 log_info("%s: Unsupported feature, rotating.", f->path);
512 else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
513 log_warning("%s: Journal file corrupted, rotating.", f->path);
514 else if (r == -EIO)
515 log_warning("%s: IO error, rotating.", f->path);
516 else if (r == -EIDRM)
517 log_warning("%s: Journal file has been deleted, rotating.", f->path);
518 else
519 return false;
520
521 return true;
522 }
523
524 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
525 JournalFile *f;
526 bool vacuumed = false;
527 int r;
528
529 assert(s);
530 assert(iovec);
531 assert(n > 0);
532
533 f = find_journal(s, uid);
534 if (!f)
535 return;
536
537 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
538 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
539 server_rotate(s);
540 server_vacuum(s, false, false);
541 vacuumed = true;
542
543 f = find_journal(s, uid);
544 if (!f)
545 return;
546 }
547
548 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
549 if (r >= 0) {
550 server_schedule_sync(s, priority);
551 return;
552 }
553
554 if (vacuumed || !shall_try_append_again(f, r)) {
555 log_error_errno(r, "Failed to write entry (%d items, %zu bytes), ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
556 return;
557 }
558
559 server_rotate(s);
560 server_vacuum(s, false, false);
561
562 f = find_journal(s, uid);
563 if (!f)
564 return;
565
566 log_debug("Retrying write.");
567 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
568 if (r < 0)
569 log_error_errno(r, "Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
570 else
571 server_schedule_sync(s, priority);
572 }
573
574 static void dispatch_message_real(
575 Server *s,
576 struct iovec *iovec, unsigned n, unsigned m,
577 const struct ucred *ucred,
578 const struct timeval *tv,
579 const char *label, size_t label_len,
580 const char *unit_id,
581 int priority,
582 pid_t object_pid) {
583
584 char pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
585 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
586 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
587 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
588 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
589 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
590 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
591 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
592 uid_t object_uid;
593 gid_t object_gid;
594 char *x;
595 int r;
596 char *t, *c;
597 uid_t realuid = 0, owner = 0, journal_uid;
598 bool owner_valid = false;
599 #ifdef HAVE_AUDIT
600 char audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
601 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
602 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
603 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
604
605 uint32_t audit;
606 uid_t loginuid;
607 #endif
608
609 assert(s);
610 assert(iovec);
611 assert(n > 0);
612 assert(n + N_IOVEC_META_FIELDS + (object_pid ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
613
614 if (ucred) {
615 realuid = ucred->uid;
616
617 sprintf(pid, "_PID="PID_FMT, ucred->pid);
618 IOVEC_SET_STRING(iovec[n++], pid);
619
620 sprintf(uid, "_UID="UID_FMT, ucred->uid);
621 IOVEC_SET_STRING(iovec[n++], uid);
622
623 sprintf(gid, "_GID="GID_FMT, ucred->gid);
624 IOVEC_SET_STRING(iovec[n++], gid);
625
626 r = get_process_comm(ucred->pid, &t);
627 if (r >= 0) {
628 x = strjoina("_COMM=", t);
629 free(t);
630 IOVEC_SET_STRING(iovec[n++], x);
631 }
632
633 r = get_process_exe(ucred->pid, &t);
634 if (r >= 0) {
635 x = strjoina("_EXE=", t);
636 free(t);
637 IOVEC_SET_STRING(iovec[n++], x);
638 }
639
640 r = get_process_cmdline(ucred->pid, 0, false, &t);
641 if (r >= 0) {
642 x = strjoina("_CMDLINE=", t);
643 free(t);
644 IOVEC_SET_STRING(iovec[n++], x);
645 }
646
647 r = get_process_capeff(ucred->pid, &t);
648 if (r >= 0) {
649 x = strjoina("_CAP_EFFECTIVE=", t);
650 free(t);
651 IOVEC_SET_STRING(iovec[n++], x);
652 }
653
654 #ifdef HAVE_AUDIT
655 r = audit_session_from_pid(ucred->pid, &audit);
656 if (r >= 0) {
657 sprintf(audit_session, "_AUDIT_SESSION=%"PRIu32, audit);
658 IOVEC_SET_STRING(iovec[n++], audit_session);
659 }
660
661 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
662 if (r >= 0) {
663 sprintf(audit_loginuid, "_AUDIT_LOGINUID="UID_FMT, loginuid);
664 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
665 }
666 #endif
667
668 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &c);
669 if (r >= 0) {
670 char *session = NULL;
671
672 x = strjoina("_SYSTEMD_CGROUP=", c);
673 IOVEC_SET_STRING(iovec[n++], x);
674
675 r = cg_path_get_session(c, &t);
676 if (r >= 0) {
677 session = strjoina("_SYSTEMD_SESSION=", t);
678 free(t);
679 IOVEC_SET_STRING(iovec[n++], session);
680 }
681
682 if (cg_path_get_owner_uid(c, &owner) >= 0) {
683 owner_valid = true;
684
685 sprintf(owner_uid, "_SYSTEMD_OWNER_UID="UID_FMT, owner);
686 IOVEC_SET_STRING(iovec[n++], owner_uid);
687 }
688
689 if (cg_path_get_unit(c, &t) >= 0) {
690 x = strjoina("_SYSTEMD_UNIT=", t);
691 free(t);
692 IOVEC_SET_STRING(iovec[n++], x);
693 } else if (unit_id && !session) {
694 x = strjoina("_SYSTEMD_UNIT=", unit_id);
695 IOVEC_SET_STRING(iovec[n++], x);
696 }
697
698 if (cg_path_get_user_unit(c, &t) >= 0) {
699 x = strjoina("_SYSTEMD_USER_UNIT=", t);
700 free(t);
701 IOVEC_SET_STRING(iovec[n++], x);
702 } else if (unit_id && session) {
703 x = strjoina("_SYSTEMD_USER_UNIT=", unit_id);
704 IOVEC_SET_STRING(iovec[n++], x);
705 }
706
707 if (cg_path_get_slice(c, &t) >= 0) {
708 x = strjoina("_SYSTEMD_SLICE=", t);
709 free(t);
710 IOVEC_SET_STRING(iovec[n++], x);
711 }
712
713 free(c);
714 } else if (unit_id) {
715 x = strjoina("_SYSTEMD_UNIT=", unit_id);
716 IOVEC_SET_STRING(iovec[n++], x);
717 }
718
719 #ifdef HAVE_SELINUX
720 if (mac_selinux_have()) {
721 if (label) {
722 x = alloca(strlen("_SELINUX_CONTEXT=") + label_len + 1);
723
724 *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
725 IOVEC_SET_STRING(iovec[n++], x);
726 } else {
727 security_context_t con;
728
729 if (getpidcon(ucred->pid, &con) >= 0) {
730 x = strjoina("_SELINUX_CONTEXT=", con);
731
732 freecon(con);
733 IOVEC_SET_STRING(iovec[n++], x);
734 }
735 }
736 }
737 #endif
738 }
739 assert(n <= m);
740
741 if (object_pid) {
742 r = get_process_uid(object_pid, &object_uid);
743 if (r >= 0) {
744 sprintf(o_uid, "OBJECT_UID="UID_FMT, object_uid);
745 IOVEC_SET_STRING(iovec[n++], o_uid);
746 }
747
748 r = get_process_gid(object_pid, &object_gid);
749 if (r >= 0) {
750 sprintf(o_gid, "OBJECT_GID="GID_FMT, object_gid);
751 IOVEC_SET_STRING(iovec[n++], o_gid);
752 }
753
754 r = get_process_comm(object_pid, &t);
755 if (r >= 0) {
756 x = strjoina("OBJECT_COMM=", t);
757 free(t);
758 IOVEC_SET_STRING(iovec[n++], x);
759 }
760
761 r = get_process_exe(object_pid, &t);
762 if (r >= 0) {
763 x = strjoina("OBJECT_EXE=", t);
764 free(t);
765 IOVEC_SET_STRING(iovec[n++], x);
766 }
767
768 r = get_process_cmdline(object_pid, 0, false, &t);
769 if (r >= 0) {
770 x = strjoina("OBJECT_CMDLINE=", t);
771 free(t);
772 IOVEC_SET_STRING(iovec[n++], x);
773 }
774
775 #ifdef HAVE_AUDIT
776 r = audit_session_from_pid(object_pid, &audit);
777 if (r >= 0) {
778 sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%"PRIu32, audit);
779 IOVEC_SET_STRING(iovec[n++], o_audit_session);
780 }
781
782 r = audit_loginuid_from_pid(object_pid, &loginuid);
783 if (r >= 0) {
784 sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID="UID_FMT, loginuid);
785 IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
786 }
787 #endif
788
789 r = cg_pid_get_path_shifted(object_pid, s->cgroup_root, &c);
790 if (r >= 0) {
791 x = strjoina("OBJECT_SYSTEMD_CGROUP=", c);
792 IOVEC_SET_STRING(iovec[n++], x);
793
794 r = cg_path_get_session(c, &t);
795 if (r >= 0) {
796 x = strjoina("OBJECT_SYSTEMD_SESSION=", t);
797 free(t);
798 IOVEC_SET_STRING(iovec[n++], x);
799 }
800
801 if (cg_path_get_owner_uid(c, &owner) >= 0) {
802 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID="UID_FMT, owner);
803 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
804 }
805
806 if (cg_path_get_unit(c, &t) >= 0) {
807 x = strjoina("OBJECT_SYSTEMD_UNIT=", t);
808 free(t);
809 IOVEC_SET_STRING(iovec[n++], x);
810 }
811
812 if (cg_path_get_user_unit(c, &t) >= 0) {
813 x = strjoina("OBJECT_SYSTEMD_USER_UNIT=", t);
814 free(t);
815 IOVEC_SET_STRING(iovec[n++], x);
816 }
817
818 free(c);
819 }
820 }
821 assert(n <= m);
822
823 if (tv) {
824 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv));
825 IOVEC_SET_STRING(iovec[n++], source_time);
826 }
827
828 /* Note that strictly speaking storing the boot id here is
829 * redundant since the entry includes this in-line
830 * anyway. However, we need this indexed, too. */
831 if (!isempty(s->boot_id_field))
832 IOVEC_SET_STRING(iovec[n++], s->boot_id_field);
833
834 if (!isempty(s->machine_id_field))
835 IOVEC_SET_STRING(iovec[n++], s->machine_id_field);
836
837 if (!isempty(s->hostname_field))
838 IOVEC_SET_STRING(iovec[n++], s->hostname_field);
839
840 assert(n <= m);
841
842 if (s->split_mode == SPLIT_UID && realuid > 0)
843 /* Split up strictly by any UID */
844 journal_uid = realuid;
845 else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
846 /* Split up by login UIDs. We do this only if the
847 * realuid is not root, in order not to accidentally
848 * leak privileged information to the user that is
849 * logged by a privileged process that is part of an
850 * unprivileged session. */
851 journal_uid = owner;
852 else
853 journal_uid = 0;
854
855 write_to_journal(s, journal_uid, iovec, n, priority);
856 }
857
858 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
859 char mid[11 + 32 + 1];
860 struct iovec iovec[N_IOVEC_META_FIELDS + 5 + N_IOVEC_PAYLOAD_FIELDS];
861 unsigned n = 0, m;
862 int r;
863 va_list ap;
864 struct ucred ucred = {};
865
866 assert(s);
867 assert(format);
868
869 assert_cc(3 == LOG_FAC(LOG_DAEMON));
870 IOVEC_SET_STRING(iovec[n++], "SYSLOG_FACILITY=3");
871 IOVEC_SET_STRING(iovec[n++], "SYSLOG_IDENTIFIER=systemd-journald");
872
873 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
874 assert_cc(6 == LOG_INFO);
875 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
876
877 if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
878 snprintf(mid, sizeof(mid), LOG_MESSAGE_ID(message_id));
879 IOVEC_SET_STRING(iovec[n++], mid);
880 }
881
882 m = n;
883
884 va_start(ap, format);
885 r = log_format_iovec(iovec, ELEMENTSOF(iovec), &n, false, 0, format, ap);
886 /* Error handling below */
887 va_end(ap);
888
889 ucred.pid = getpid();
890 ucred.uid = getuid();
891 ucred.gid = getgid();
892
893 if (r >= 0)
894 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
895
896 while (m < n)
897 free(iovec[m++].iov_base);
898
899 if (r < 0) {
900 /* We failed to format the message. Emit a warning instead. */
901 char buf[LINE_MAX];
902
903 xsprintf(buf, "MESSAGE=Entry printing failed: %s", strerror(-r));
904
905 n = 3;
906 IOVEC_SET_STRING(iovec[n++], "PRIORITY=4");
907 IOVEC_SET_STRING(iovec[n++], buf);
908 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
909 }
910 }
911
912 void server_dispatch_message(
913 Server *s,
914 struct iovec *iovec, unsigned n, unsigned m,
915 const struct ucred *ucred,
916 const struct timeval *tv,
917 const char *label, size_t label_len,
918 const char *unit_id,
919 int priority,
920 pid_t object_pid) {
921
922 int rl, r;
923 _cleanup_free_ char *path = NULL;
924 uint64_t available = 0;
925 char *c;
926
927 assert(s);
928 assert(iovec || n == 0);
929
930 if (n == 0)
931 return;
932
933 if (LOG_PRI(priority) > s->max_level_store)
934 return;
935
936 /* Stop early in case the information will not be stored
937 * in a journal. */
938 if (s->storage == STORAGE_NONE)
939 return;
940
941 if (!ucred)
942 goto finish;
943
944 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &path);
945 if (r < 0)
946 goto finish;
947
948 /* example: /user/lennart/3/foobar
949 * /system/dbus.service/foobar
950 *
951 * So let's cut of everything past the third /, since that is
952 * where user directories start */
953
954 c = strchr(path, '/');
955 if (c) {
956 c = strchr(c+1, '/');
957 if (c) {
958 c = strchr(c+1, '/');
959 if (c)
960 *c = 0;
961 }
962 }
963
964 (void) determine_space(s, false, false, &available, NULL);
965 rl = journal_rate_limit_test(s->rate_limit, path, priority & LOG_PRIMASK, available);
966 if (rl == 0)
967 return;
968
969 /* Write a suppression message if we suppressed something */
970 if (rl > 1)
971 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
972 LOG_MESSAGE("Suppressed %u messages from %s", rl - 1, path),
973 NULL);
974
975 finish:
976 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, priority, object_pid);
977 }
978
979
980 static int system_journal_open(Server *s, bool flush_requested) {
981 const char *fn;
982 int r = 0;
983
984 if (!s->system_journal &&
985 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
986 (flush_requested
987 || access("/run/systemd/journal/flushed", F_OK) >= 0)) {
988
989 /* If in auto mode: first try to create the machine
990 * path, but not the prefix.
991 *
992 * If in persistent mode: create /var/log/journal and
993 * the machine path */
994
995 if (s->storage == STORAGE_PERSISTENT)
996 (void) mkdir_p("/var/log/journal/", 0755);
997
998 fn = strjoina("/var/log/journal/", SERVER_MACHINE_ID(s));
999 (void) mkdir(fn, 0755);
1000
1001 fn = strjoina(fn, "/system.journal");
1002 r = open_journal(s, true, fn, O_RDWR|O_CREAT, s->seal, &s->system_metrics, &s->system_journal);
1003 if (r >= 0) {
1004 server_add_acls(s->system_journal, 0);
1005 (void) determine_space_for(s, &s->system_metrics, "/var/log/journal/", "System journal", true, true, NULL, NULL);
1006 } else if (r < 0) {
1007 if (r != -ENOENT && r != -EROFS)
1008 log_warning_errno(r, "Failed to open system journal: %m");
1009
1010 r = 0;
1011 }
1012 }
1013
1014 if (!s->runtime_journal &&
1015 (s->storage != STORAGE_NONE)) {
1016
1017 fn = strjoina("/run/log/journal/", SERVER_MACHINE_ID(s), "/system.journal");
1018
1019 if (s->system_journal) {
1020
1021 /* Try to open the runtime journal, but only
1022 * if it already exists, so that we can flush
1023 * it into the system journal */
1024
1025 r = open_journal(s, false, fn, O_RDWR, false, &s->runtime_metrics, &s->runtime_journal);
1026 if (r < 0) {
1027 if (r != -ENOENT)
1028 log_warning_errno(r, "Failed to open runtime journal: %m");
1029
1030 r = 0;
1031 }
1032
1033 } else {
1034
1035 /* OK, we really need the runtime journal, so create
1036 * it if necessary. */
1037
1038 (void) mkdir("/run/log", 0755);
1039 (void) mkdir("/run/log/journal", 0755);
1040 (void) mkdir_parents(fn, 0750);
1041
1042 r = open_journal(s, true, fn, O_RDWR|O_CREAT, false, &s->runtime_metrics, &s->runtime_journal);
1043 if (r < 0)
1044 return log_error_errno(r, "Failed to open runtime journal: %m");
1045 }
1046
1047 if (s->runtime_journal) {
1048 server_add_acls(s->runtime_journal, 0);
1049 (void) determine_space_for(s, &s->runtime_metrics, "/run/log/journal/", "Runtime journal", true, true, NULL, NULL);
1050 }
1051 }
1052
1053 return r;
1054 }
1055
1056 int server_flush_to_var(Server *s) {
1057 sd_id128_t machine;
1058 sd_journal *j = NULL;
1059 char ts[FORMAT_TIMESPAN_MAX];
1060 usec_t start;
1061 unsigned n = 0;
1062 int r;
1063
1064 assert(s);
1065
1066 if (s->storage != STORAGE_AUTO &&
1067 s->storage != STORAGE_PERSISTENT)
1068 return 0;
1069
1070 if (!s->runtime_journal)
1071 return 0;
1072
1073 (void) system_journal_open(s, true);
1074
1075 if (!s->system_journal)
1076 return 0;
1077
1078 log_debug("Flushing to /var...");
1079
1080 start = now(CLOCK_MONOTONIC);
1081
1082 r = sd_id128_get_machine(&machine);
1083 if (r < 0)
1084 return r;
1085
1086 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
1087 if (r < 0)
1088 return log_error_errno(r, "Failed to read runtime journal: %m");
1089
1090 sd_journal_set_data_threshold(j, 0);
1091
1092 SD_JOURNAL_FOREACH(j) {
1093 Object *o = NULL;
1094 JournalFile *f;
1095
1096 f = j->current_file;
1097 assert(f && f->current_offset > 0);
1098
1099 n++;
1100
1101 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1102 if (r < 0) {
1103 log_error_errno(r, "Can't read entry: %m");
1104 goto finish;
1105 }
1106
1107 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1108 if (r >= 0)
1109 continue;
1110
1111 if (!shall_try_append_again(s->system_journal, r)) {
1112 log_error_errno(r, "Can't write entry: %m");
1113 goto finish;
1114 }
1115
1116 server_rotate(s);
1117 server_vacuum(s, false, false);
1118
1119 if (!s->system_journal) {
1120 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1121 r = -EIO;
1122 goto finish;
1123 }
1124
1125 log_debug("Retrying write.");
1126 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1127 if (r < 0) {
1128 log_error_errno(r, "Can't write entry: %m");
1129 goto finish;
1130 }
1131 }
1132
1133 r = 0;
1134
1135 finish:
1136 journal_file_post_change(s->system_journal);
1137
1138 s->runtime_journal = journal_file_close(s->runtime_journal);
1139
1140 if (r >= 0)
1141 (void) rm_rf("/run/log/journal", REMOVE_ROOT);
1142
1143 sd_journal_close(j);
1144
1145 server_driver_message(s, SD_ID128_NULL,
1146 LOG_MESSAGE("Time spent on flushing to /var is %s for %u entries.",
1147 format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0),
1148 n),
1149 NULL);
1150
1151 return r;
1152 }
1153
1154 int server_process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1155 Server *s = userdata;
1156 struct ucred *ucred = NULL;
1157 struct timeval *tv = NULL;
1158 struct cmsghdr *cmsg;
1159 char *label = NULL;
1160 size_t label_len = 0, m;
1161 struct iovec iovec;
1162 ssize_t n;
1163 int *fds = NULL, v = 0;
1164 unsigned n_fds = 0;
1165
1166 union {
1167 struct cmsghdr cmsghdr;
1168
1169 /* We use NAME_MAX space for the SELinux label
1170 * here. The kernel currently enforces no
1171 * limit, but according to suggestions from
1172 * the SELinux people this will change and it
1173 * will probably be identical to NAME_MAX. For
1174 * now we use that, but this should be updated
1175 * one day when the final limit is known. */
1176 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1177 CMSG_SPACE(sizeof(struct timeval)) +
1178 CMSG_SPACE(sizeof(int)) + /* fd */
1179 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1180 } control = {};
1181
1182 union sockaddr_union sa = {};
1183
1184 struct msghdr msghdr = {
1185 .msg_iov = &iovec,
1186 .msg_iovlen = 1,
1187 .msg_control = &control,
1188 .msg_controllen = sizeof(control),
1189 .msg_name = &sa,
1190 .msg_namelen = sizeof(sa),
1191 };
1192
1193 assert(s);
1194 assert(fd == s->native_fd || fd == s->syslog_fd || fd == s->audit_fd);
1195
1196 if (revents != EPOLLIN) {
1197 log_error("Got invalid event from epoll for datagram fd: %"PRIx32, revents);
1198 return -EIO;
1199 }
1200
1201 /* Try to get the right size, if we can. (Not all
1202 * sockets support SIOCINQ, hence we just try, but
1203 * don't rely on it. */
1204 (void) ioctl(fd, SIOCINQ, &v);
1205
1206 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1207 m = PAGE_ALIGN(MAX3((size_t) v + 1,
1208 (size_t) LINE_MAX,
1209 ALIGN(sizeof(struct nlmsghdr)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH)) + 1);
1210
1211 if (!GREEDY_REALLOC(s->buffer, s->buffer_size, m))
1212 return log_oom();
1213
1214 iovec.iov_base = s->buffer;
1215 iovec.iov_len = s->buffer_size - 1; /* Leave room for trailing NUL we add later */
1216
1217 n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1218 if (n < 0) {
1219 if (errno == EINTR || errno == EAGAIN)
1220 return 0;
1221
1222 return log_error_errno(errno, "recvmsg() failed: %m");
1223 }
1224
1225 CMSG_FOREACH(cmsg, &msghdr) {
1226
1227 if (cmsg->cmsg_level == SOL_SOCKET &&
1228 cmsg->cmsg_type == SCM_CREDENTIALS &&
1229 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1230 ucred = (struct ucred*) CMSG_DATA(cmsg);
1231 else if (cmsg->cmsg_level == SOL_SOCKET &&
1232 cmsg->cmsg_type == SCM_SECURITY) {
1233 label = (char*) CMSG_DATA(cmsg);
1234 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1235 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1236 cmsg->cmsg_type == SO_TIMESTAMP &&
1237 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1238 tv = (struct timeval*) CMSG_DATA(cmsg);
1239 else if (cmsg->cmsg_level == SOL_SOCKET &&
1240 cmsg->cmsg_type == SCM_RIGHTS) {
1241 fds = (int*) CMSG_DATA(cmsg);
1242 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1243 }
1244 }
1245
1246 /* And a trailing NUL, just in case */
1247 s->buffer[n] = 0;
1248
1249 if (fd == s->syslog_fd) {
1250 if (n > 0 && n_fds == 0)
1251 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1252 else if (n_fds > 0)
1253 log_warning("Got file descriptors via syslog socket. Ignoring.");
1254
1255 } else if (fd == s->native_fd) {
1256 if (n > 0 && n_fds == 0)
1257 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1258 else if (n == 0 && n_fds == 1)
1259 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1260 else if (n_fds > 0)
1261 log_warning("Got too many file descriptors via native socket. Ignoring.");
1262
1263 } else {
1264 assert(fd == s->audit_fd);
1265
1266 if (n > 0 && n_fds == 0)
1267 server_process_audit_message(s, s->buffer, n, ucred, &sa, msghdr.msg_namelen);
1268 else if (n_fds > 0)
1269 log_warning("Got file descriptors via audit socket. Ignoring.");
1270 }
1271
1272 close_many(fds, n_fds);
1273 return 0;
1274 }
1275
1276 static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1277 Server *s = userdata;
1278 int r;
1279
1280 assert(s);
1281
1282 log_info("Received request to flush runtime journal from PID " PID_FMT, si->ssi_pid);
1283
1284 server_flush_to_var(s);
1285 server_sync(s);
1286 server_vacuum(s, false, false);
1287
1288 r = touch("/run/systemd/journal/flushed");
1289 if (r < 0)
1290 log_warning_errno(r, "Failed to touch /run/systemd/journal/flushed, ignoring: %m");
1291
1292 return 0;
1293 }
1294
1295 static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1296 Server *s = userdata;
1297 int r;
1298
1299 assert(s);
1300
1301 log_info("Received request to rotate journal from PID " PID_FMT, si->ssi_pid);
1302 server_rotate(s);
1303 server_vacuum(s, true, true);
1304
1305 /* Let clients know when the most recent rotation happened. */
1306 r = write_timestamp_file_atomic("/run/systemd/journal/rotated", now(CLOCK_MONOTONIC));
1307 if (r < 0)
1308 log_warning_errno(r, "Failed to write /run/systemd/journal/rotated, ignoring: %m");
1309
1310 return 0;
1311 }
1312
1313 static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1314 Server *s = userdata;
1315
1316 assert(s);
1317
1318 log_received_signal(LOG_INFO, si);
1319
1320 sd_event_exit(s->event, 0);
1321 return 0;
1322 }
1323
1324 static int dispatch_sigrtmin1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1325 Server *s = userdata;
1326 int r;
1327
1328 assert(s);
1329
1330 log_debug("Received request to sync from PID " PID_FMT, si->ssi_pid);
1331
1332 server_sync(s);
1333
1334 /* Let clients know when the most recent sync happened. */
1335 r = write_timestamp_file_atomic("/run/systemd/journal/synced", now(CLOCK_MONOTONIC));
1336 if (r < 0)
1337 log_warning_errno(r, "Failed to write /run/systemd/journal/synced, ignoring: %m");
1338
1339 return 0;
1340 }
1341
1342 static int setup_signals(Server *s) {
1343 int r;
1344
1345 assert(s);
1346
1347 assert(sigprocmask_many(SIG_SETMASK, NULL, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, SIGRTMIN+1, -1) >= 0);
1348
1349 r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
1350 if (r < 0)
1351 return r;
1352
1353 r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s);
1354 if (r < 0)
1355 return r;
1356
1357 r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s);
1358 if (r < 0)
1359 return r;
1360
1361 /* Let's process SIGTERM late, so that we flush all queued
1362 * messages to disk before we exit */
1363 r = sd_event_source_set_priority(s->sigterm_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1364 if (r < 0)
1365 return r;
1366
1367 /* When journald is invoked on the terminal (when debugging),
1368 * it's useful if C-c is handled equivalent to SIGTERM. */
1369 r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s);
1370 if (r < 0)
1371 return r;
1372
1373 r = sd_event_source_set_priority(s->sigint_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1374 if (r < 0)
1375 return r;
1376
1377 /* SIGRTMIN+1 causes an immediate sync. We process this very
1378 * late, so that everything else queued at this point is
1379 * really written to disk. Clients can watch
1380 * /run/systemd/journal/synced with inotify until its mtime
1381 * changes to see when a sync happened. */
1382 r = sd_event_add_signal(s->event, &s->sigrtmin1_event_source, SIGRTMIN+1, dispatch_sigrtmin1, s);
1383 if (r < 0)
1384 return r;
1385
1386 r = sd_event_source_set_priority(s->sigrtmin1_event_source, SD_EVENT_PRIORITY_NORMAL+15);
1387 if (r < 0)
1388 return r;
1389
1390 return 0;
1391 }
1392
1393 static int server_parse_proc_cmdline(Server *s) {
1394 _cleanup_free_ char *line = NULL;
1395 const char *p;
1396 int r;
1397
1398 r = proc_cmdline(&line);
1399 if (r < 0) {
1400 log_warning_errno(r, "Failed to read /proc/cmdline, ignoring: %m");
1401 return 0;
1402 }
1403
1404 p = line;
1405 for(;;) {
1406 _cleanup_free_ char *word = NULL;
1407
1408 r = extract_first_word(&p, &word, NULL, 0);
1409 if (r < 0)
1410 return log_error_errno(r, "Failed to parse journald syntax \"%s\": %m", line);
1411
1412 if (r == 0)
1413 break;
1414
1415 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1416 r = parse_boolean(word + 35);
1417 if (r < 0)
1418 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1419 else
1420 s->forward_to_syslog = r;
1421 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1422 r = parse_boolean(word + 33);
1423 if (r < 0)
1424 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1425 else
1426 s->forward_to_kmsg = r;
1427 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1428 r = parse_boolean(word + 36);
1429 if (r < 0)
1430 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1431 else
1432 s->forward_to_console = r;
1433 } else if (startswith(word, "systemd.journald.forward_to_wall=")) {
1434 r = parse_boolean(word + 33);
1435 if (r < 0)
1436 log_warning("Failed to parse forward to wall switch %s. Ignoring.", word + 33);
1437 else
1438 s->forward_to_wall = r;
1439 } else if (startswith(word, "systemd.journald"))
1440 log_warning("Invalid systemd.journald parameter. Ignoring.");
1441 }
1442
1443 /* do not warn about state here, since probably systemd already did */
1444 return 0;
1445 }
1446
1447 static int server_parse_config_file(Server *s) {
1448 assert(s);
1449
1450 return config_parse_many(PKGSYSCONFDIR "/journald.conf",
1451 CONF_PATHS_NULSTR("systemd/journald.conf.d"),
1452 "Journal\0",
1453 config_item_perf_lookup, journald_gperf_lookup,
1454 false, s);
1455 }
1456
1457 static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
1458 Server *s = userdata;
1459
1460 assert(s);
1461
1462 server_sync(s);
1463 return 0;
1464 }
1465
1466 int server_schedule_sync(Server *s, int priority) {
1467 int r;
1468
1469 assert(s);
1470
1471 if (priority <= LOG_CRIT) {
1472 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1473 server_sync(s);
1474 return 0;
1475 }
1476
1477 if (s->sync_scheduled)
1478 return 0;
1479
1480 if (s->sync_interval_usec > 0) {
1481 usec_t when;
1482
1483 r = sd_event_now(s->event, CLOCK_MONOTONIC, &when);
1484 if (r < 0)
1485 return r;
1486
1487 when += s->sync_interval_usec;
1488
1489 if (!s->sync_event_source) {
1490 r = sd_event_add_time(
1491 s->event,
1492 &s->sync_event_source,
1493 CLOCK_MONOTONIC,
1494 when, 0,
1495 server_dispatch_sync, s);
1496 if (r < 0)
1497 return r;
1498
1499 r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
1500 } else {
1501 r = sd_event_source_set_time(s->sync_event_source, when);
1502 if (r < 0)
1503 return r;
1504
1505 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
1506 }
1507 if (r < 0)
1508 return r;
1509
1510 s->sync_scheduled = true;
1511 }
1512
1513 return 0;
1514 }
1515
1516 static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1517 Server *s = userdata;
1518
1519 assert(s);
1520
1521 server_cache_hostname(s);
1522 return 0;
1523 }
1524
1525 static int server_open_hostname(Server *s) {
1526 int r;
1527
1528 assert(s);
1529
1530 s->hostname_fd = open("/proc/sys/kernel/hostname", O_RDONLY|O_CLOEXEC|O_NDELAY|O_NOCTTY);
1531 if (s->hostname_fd < 0)
1532 return log_error_errno(errno, "Failed to open /proc/sys/kernel/hostname: %m");
1533
1534 r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s);
1535 if (r < 0) {
1536 /* kernels prior to 3.2 don't support polling this file. Ignore
1537 * the failure. */
1538 if (r == -EPERM) {
1539 log_warning_errno(r, "Failed to register hostname fd in event loop, ignoring: %m");
1540 s->hostname_fd = safe_close(s->hostname_fd);
1541 return 0;
1542 }
1543
1544 return log_error_errno(r, "Failed to register hostname fd in event loop: %m");
1545 }
1546
1547 r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
1548 if (r < 0)
1549 return log_error_errno(r, "Failed to adjust priority of host name event source: %m");
1550
1551 return 0;
1552 }
1553
1554 static int dispatch_notify_event(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1555 Server *s = userdata;
1556 int r;
1557
1558 assert(s);
1559 assert(s->notify_event_source == es);
1560 assert(s->notify_fd == fd);
1561
1562 /* The $NOTIFY_SOCKET is writable again, now send exactly one
1563 * message on it. Either it's the wtachdog event, the initial
1564 * READY=1 event or an stdout stream event. If there's nothing
1565 * to write anymore, turn our event source off. The next time
1566 * there's something to send it will be turned on again. */
1567
1568 if (!s->sent_notify_ready) {
1569 static const char p[] =
1570 "READY=1\n"
1571 "STATUS=Processing requests...";
1572 ssize_t l;
1573
1574 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1575 if (l < 0) {
1576 if (errno == EAGAIN)
1577 return 0;
1578
1579 return log_error_errno(errno, "Failed to send READY=1 notification message: %m");
1580 }
1581
1582 s->sent_notify_ready = true;
1583 log_debug("Sent READY=1 notification.");
1584
1585 } else if (s->send_watchdog) {
1586
1587 static const char p[] =
1588 "WATCHDOG=1";
1589
1590 ssize_t l;
1591
1592 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1593 if (l < 0) {
1594 if (errno == EAGAIN)
1595 return 0;
1596
1597 return log_error_errno(errno, "Failed to send WATCHDOG=1 notification message: %m");
1598 }
1599
1600 s->send_watchdog = false;
1601 log_debug("Sent WATCHDOG=1 notification.");
1602
1603 } else if (s->stdout_streams_notify_queue)
1604 /* Dispatch one stream notification event */
1605 stdout_stream_send_notify(s->stdout_streams_notify_queue);
1606
1607 /* Leave us enabled if there's still more to to do. */
1608 if (s->send_watchdog || s->stdout_streams_notify_queue)
1609 return 0;
1610
1611 /* There was nothing to do anymore, let's turn ourselves off. */
1612 r = sd_event_source_set_enabled(es, SD_EVENT_OFF);
1613 if (r < 0)
1614 return log_error_errno(r, "Failed to turn off notify event source: %m");
1615
1616 return 0;
1617 }
1618
1619 static int dispatch_watchdog(sd_event_source *es, uint64_t usec, void *userdata) {
1620 Server *s = userdata;
1621 int r;
1622
1623 assert(s);
1624
1625 s->send_watchdog = true;
1626
1627 r = sd_event_source_set_enabled(s->notify_event_source, SD_EVENT_ON);
1628 if (r < 0)
1629 log_warning_errno(r, "Failed to turn on notify event source: %m");
1630
1631 r = sd_event_source_set_time(s->watchdog_event_source, usec + s->watchdog_usec / 2);
1632 if (r < 0)
1633 return log_error_errno(r, "Failed to restart watchdog event source: %m");
1634
1635 r = sd_event_source_set_enabled(s->watchdog_event_source, SD_EVENT_ON);
1636 if (r < 0)
1637 return log_error_errno(r, "Failed to enable watchdog event source: %m");
1638
1639 return 0;
1640 }
1641
1642 static int server_connect_notify(Server *s) {
1643 union sockaddr_union sa = {
1644 .un.sun_family = AF_UNIX,
1645 };
1646 const char *e;
1647 int r;
1648
1649 assert(s);
1650 assert(s->notify_fd < 0);
1651 assert(!s->notify_event_source);
1652
1653 /*
1654 So here's the problem: we'd like to send notification
1655 messages to PID 1, but we cannot do that via sd_notify(),
1656 since that's synchronous, and we might end up blocking on
1657 it. Specifically: given that PID 1 might block on
1658 dbus-daemon during IPC, and dbus-daemon is logging to us,
1659 and might hence block on us, we might end up in a deadlock
1660 if we block on sending PID 1 notification messages -- by
1661 generating a full blocking circle. To avoid this, let's
1662 create a non-blocking socket, and connect it to the
1663 notification socket, and then wait for POLLOUT before we
1664 send anything. This should efficiently avoid any deadlocks,
1665 as we'll never block on PID 1, hence PID 1 can safely block
1666 on dbus-daemon which can safely block on us again.
1667
1668 Don't think that this issue is real? It is, see:
1669 https://github.com/systemd/systemd/issues/1505
1670 */
1671
1672 e = getenv("NOTIFY_SOCKET");
1673 if (!e)
1674 return 0;
1675
1676 if ((e[0] != '@' && e[0] != '/') || e[1] == 0) {
1677 log_error("NOTIFY_SOCKET set to an invalid value: %s", e);
1678 return -EINVAL;
1679 }
1680
1681 if (strlen(e) > sizeof(sa.un.sun_path)) {
1682 log_error("NOTIFY_SOCKET path too long: %s", e);
1683 return -EINVAL;
1684 }
1685
1686 s->notify_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
1687 if (s->notify_fd < 0)
1688 return log_error_errno(errno, "Failed to create notify socket: %m");
1689
1690 (void) fd_inc_sndbuf(s->notify_fd, NOTIFY_SNDBUF_SIZE);
1691
1692 strncpy(sa.un.sun_path, e, sizeof(sa.un.sun_path));
1693 if (sa.un.sun_path[0] == '@')
1694 sa.un.sun_path[0] = 0;
1695
1696 r = connect(s->notify_fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(e));
1697 if (r < 0)
1698 return log_error_errno(errno, "Failed to connect to notify socket: %m");
1699
1700 r = sd_event_add_io(s->event, &s->notify_event_source, s->notify_fd, EPOLLOUT, dispatch_notify_event, s);
1701 if (r < 0)
1702 return log_error_errno(r, "Failed to watch notification socket: %m");
1703
1704 if (sd_watchdog_enabled(false, &s->watchdog_usec) > 0) {
1705 s->send_watchdog = true;
1706
1707 r = sd_event_add_time(s->event, &s->watchdog_event_source, CLOCK_MONOTONIC, now(CLOCK_MONOTONIC) + s->watchdog_usec/2, s->watchdog_usec/4, dispatch_watchdog, s);
1708 if (r < 0)
1709 return log_error_errno(r, "Failed to add watchdog time event: %m");
1710 }
1711
1712 /* This should fire pretty soon, which we'll use to send the
1713 * READY=1 event. */
1714
1715 return 0;
1716 }
1717
1718 int server_init(Server *s) {
1719 _cleanup_fdset_free_ FDSet *fds = NULL;
1720 int n, r, fd;
1721 bool no_sockets;
1722
1723 assert(s);
1724
1725 zero(*s);
1726 s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->audit_fd = s->hostname_fd = s->notify_fd = -1;
1727 s->compress = true;
1728 s->seal = true;
1729
1730 s->watchdog_usec = USEC_INFINITY;
1731
1732 s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1733 s->sync_scheduled = false;
1734
1735 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1736 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1737
1738 s->forward_to_wall = true;
1739
1740 s->max_file_usec = DEFAULT_MAX_FILE_USEC;
1741
1742 s->max_level_store = LOG_DEBUG;
1743 s->max_level_syslog = LOG_DEBUG;
1744 s->max_level_kmsg = LOG_NOTICE;
1745 s->max_level_console = LOG_INFO;
1746 s->max_level_wall = LOG_EMERG;
1747
1748 journal_reset_metrics(&s->system_metrics);
1749 journal_reset_metrics(&s->runtime_metrics);
1750
1751 server_parse_config_file(s);
1752 server_parse_proc_cmdline(s);
1753
1754 if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1755 log_debug("Setting both rate limit interval and burst from "USEC_FMT",%u to 0,0",
1756 s->rate_limit_interval, s->rate_limit_burst);
1757 s->rate_limit_interval = s->rate_limit_burst = 0;
1758 }
1759
1760 (void) mkdir_p("/run/systemd/journal", 0755);
1761
1762 s->user_journals = ordered_hashmap_new(NULL);
1763 if (!s->user_journals)
1764 return log_oom();
1765
1766 s->mmap = mmap_cache_new();
1767 if (!s->mmap)
1768 return log_oom();
1769
1770 r = sd_event_default(&s->event);
1771 if (r < 0)
1772 return log_error_errno(r, "Failed to create event loop: %m");
1773
1774 n = sd_listen_fds(true);
1775 if (n < 0)
1776 return log_error_errno(n, "Failed to read listening file descriptors from environment: %m");
1777
1778 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1779
1780 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1781
1782 if (s->native_fd >= 0) {
1783 log_error("Too many native sockets passed.");
1784 return -EINVAL;
1785 }
1786
1787 s->native_fd = fd;
1788
1789 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1790
1791 if (s->stdout_fd >= 0) {
1792 log_error("Too many stdout sockets passed.");
1793 return -EINVAL;
1794 }
1795
1796 s->stdout_fd = fd;
1797
1798 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0 ||
1799 sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/dev-log", 0) > 0) {
1800
1801 if (s->syslog_fd >= 0) {
1802 log_error("Too many /dev/log sockets passed.");
1803 return -EINVAL;
1804 }
1805
1806 s->syslog_fd = fd;
1807
1808 } else if (sd_is_socket(fd, AF_NETLINK, SOCK_RAW, -1) > 0) {
1809
1810 if (s->audit_fd >= 0) {
1811 log_error("Too many audit sockets passed.");
1812 return -EINVAL;
1813 }
1814
1815 s->audit_fd = fd;
1816
1817 } else {
1818
1819 if (!fds) {
1820 fds = fdset_new();
1821 if (!fds)
1822 return log_oom();
1823 }
1824
1825 r = fdset_put(fds, fd);
1826 if (r < 0)
1827 return log_oom();
1828 }
1829 }
1830
1831 /* Try to restore streams, but don't bother if this fails */
1832 (void) server_restore_streams(s, fds);
1833
1834 if (fdset_size(fds) > 0) {
1835 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds));
1836 fds = fdset_free(fds);
1837 }
1838
1839 no_sockets = s->native_fd < 0 && s->stdout_fd < 0 && s->syslog_fd < 0 && s->audit_fd < 0;
1840
1841 /* always open stdout, syslog, native, and kmsg sockets */
1842
1843 /* systemd-journald.socket: /run/systemd/journal/stdout */
1844 r = server_open_stdout_socket(s);
1845 if (r < 0)
1846 return r;
1847
1848 /* systemd-journald-dev-log.socket: /run/systemd/journal/dev-log */
1849 r = server_open_syslog_socket(s);
1850 if (r < 0)
1851 return r;
1852
1853 /* systemd-journald.socket: /run/systemd/journal/socket */
1854 r = server_open_native_socket(s);
1855 if (r < 0)
1856 return r;
1857
1858 /* /dev/ksmg */
1859 r = server_open_dev_kmsg(s);
1860 if (r < 0)
1861 return r;
1862
1863 /* Unless we got *some* sockets and not audit, open audit socket */
1864 if (s->audit_fd >= 0 || no_sockets) {
1865 r = server_open_audit(s);
1866 if (r < 0)
1867 return r;
1868 }
1869
1870 r = server_open_kernel_seqnum(s);
1871 if (r < 0)
1872 return r;
1873
1874 r = server_open_hostname(s);
1875 if (r < 0)
1876 return r;
1877
1878 r = setup_signals(s);
1879 if (r < 0)
1880 return r;
1881
1882 s->udev = udev_new();
1883 if (!s->udev)
1884 return -ENOMEM;
1885
1886 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
1887 if (!s->rate_limit)
1888 return -ENOMEM;
1889
1890 r = cg_get_root_path(&s->cgroup_root);
1891 if (r < 0)
1892 return r;
1893
1894 server_cache_hostname(s);
1895 server_cache_boot_id(s);
1896 server_cache_machine_id(s);
1897
1898 (void) server_connect_notify(s);
1899
1900 return system_journal_open(s, false);
1901 }
1902
1903 void server_maybe_append_tags(Server *s) {
1904 #ifdef HAVE_GCRYPT
1905 JournalFile *f;
1906 Iterator i;
1907 usec_t n;
1908
1909 n = now(CLOCK_REALTIME);
1910
1911 if (s->system_journal)
1912 journal_file_maybe_append_tag(s->system_journal, n);
1913
1914 ORDERED_HASHMAP_FOREACH(f, s->user_journals, i)
1915 journal_file_maybe_append_tag(f, n);
1916 #endif
1917 }
1918
1919 void server_done(Server *s) {
1920 JournalFile *f;
1921 assert(s);
1922
1923 while (s->stdout_streams)
1924 stdout_stream_free(s->stdout_streams);
1925
1926 if (s->system_journal)
1927 journal_file_close(s->system_journal);
1928
1929 if (s->runtime_journal)
1930 journal_file_close(s->runtime_journal);
1931
1932 while ((f = ordered_hashmap_steal_first(s->user_journals)))
1933 journal_file_close(f);
1934
1935 ordered_hashmap_free(s->user_journals);
1936
1937 sd_event_source_unref(s->syslog_event_source);
1938 sd_event_source_unref(s->native_event_source);
1939 sd_event_source_unref(s->stdout_event_source);
1940 sd_event_source_unref(s->dev_kmsg_event_source);
1941 sd_event_source_unref(s->audit_event_source);
1942 sd_event_source_unref(s->sync_event_source);
1943 sd_event_source_unref(s->sigusr1_event_source);
1944 sd_event_source_unref(s->sigusr2_event_source);
1945 sd_event_source_unref(s->sigterm_event_source);
1946 sd_event_source_unref(s->sigint_event_source);
1947 sd_event_source_unref(s->sigrtmin1_event_source);
1948 sd_event_source_unref(s->hostname_event_source);
1949 sd_event_source_unref(s->notify_event_source);
1950 sd_event_source_unref(s->watchdog_event_source);
1951 sd_event_unref(s->event);
1952
1953 safe_close(s->syslog_fd);
1954 safe_close(s->native_fd);
1955 safe_close(s->stdout_fd);
1956 safe_close(s->dev_kmsg_fd);
1957 safe_close(s->audit_fd);
1958 safe_close(s->hostname_fd);
1959 safe_close(s->notify_fd);
1960
1961 if (s->rate_limit)
1962 journal_rate_limit_free(s->rate_limit);
1963
1964 if (s->kernel_seqnum)
1965 munmap(s->kernel_seqnum, sizeof(uint64_t));
1966
1967 free(s->buffer);
1968 free(s->tty_path);
1969 free(s->cgroup_root);
1970 free(s->hostname_field);
1971
1972 if (s->mmap)
1973 mmap_cache_unref(s->mmap);
1974
1975 udev_unref(s->udev);
1976 }
1977
1978 static const char* const storage_table[_STORAGE_MAX] = {
1979 [STORAGE_AUTO] = "auto",
1980 [STORAGE_VOLATILE] = "volatile",
1981 [STORAGE_PERSISTENT] = "persistent",
1982 [STORAGE_NONE] = "none"
1983 };
1984
1985 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
1986 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
1987
1988 static const char* const split_mode_table[_SPLIT_MAX] = {
1989 [SPLIT_LOGIN] = "login",
1990 [SPLIT_UID] = "uid",
1991 [SPLIT_NONE] = "none",
1992 };
1993
1994 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
1995 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");