]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/journal/journald-server.c
tree-wide: fix wrong indent (#5757)
[thirdparty/systemd.git] / src / journal / journald-server.c
1 /***
2 This file is part of systemd.
3
4 Copyright 2011 Lennart Poettering
5
6 systemd is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
10
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public License
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
18 ***/
19
20 #ifdef HAVE_SELINUX
21 #include <selinux/selinux.h>
22 #endif
23 #include <sys/ioctl.h>
24 #include <sys/mman.h>
25 #include <sys/signalfd.h>
26 #include <sys/statvfs.h>
27 #include <linux/sockios.h>
28
29 #include "libudev.h"
30 #include "sd-daemon.h"
31 #include "sd-journal.h"
32 #include "sd-messages.h"
33
34 #include "acl-util.h"
35 #include "alloc-util.h"
36 #include "audit-util.h"
37 #include "cgroup-util.h"
38 #include "conf-parser.h"
39 #include "dirent-util.h"
40 #include "extract-word.h"
41 #include "fd-util.h"
42 #include "fileio.h"
43 #include "format-util.h"
44 #include "fs-util.h"
45 #include "hashmap.h"
46 #include "hostname-util.h"
47 #include "id128-util.h"
48 #include "io-util.h"
49 #include "journal-authenticate.h"
50 #include "journal-file.h"
51 #include "journal-internal.h"
52 #include "journal-vacuum.h"
53 #include "journald-audit.h"
54 #include "journald-kmsg.h"
55 #include "journald-native.h"
56 #include "journald-rate-limit.h"
57 #include "journald-server.h"
58 #include "journald-stream.h"
59 #include "journald-syslog.h"
60 #include "log.h"
61 #include "missing.h"
62 #include "mkdir.h"
63 #include "parse-util.h"
64 #include "proc-cmdline.h"
65 #include "process-util.h"
66 #include "rm-rf.h"
67 #include "selinux-util.h"
68 #include "signal-util.h"
69 #include "socket-util.h"
70 #include "stdio-util.h"
71 #include "string-table.h"
72 #include "string-util.h"
73 #include "user-util.h"
74 #include "syslog-util.h"
75
76 #define USER_JOURNALS_MAX 1024
77
78 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
79 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
80 #define DEFAULT_RATE_LIMIT_BURST 1000
81 #define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
82
83 #define RECHECK_SPACE_USEC (30*USEC_PER_SEC)
84
85 #define NOTIFY_SNDBUF_SIZE (8*1024*1024)
86
87 /* The period to insert between posting changes for coalescing */
88 #define POST_CHANGE_TIMER_INTERVAL_USEC (250*USEC_PER_MSEC)
89
90 static int determine_path_usage(Server *s, const char *path, uint64_t *ret_used, uint64_t *ret_free) {
91 _cleanup_closedir_ DIR *d = NULL;
92 struct dirent *de;
93 struct statvfs ss;
94
95 assert(ret_used);
96 assert(ret_free);
97
98 d = opendir(path);
99 if (!d)
100 return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR,
101 errno, "Failed to open %s: %m", path);
102
103 if (fstatvfs(dirfd(d), &ss) < 0)
104 return log_error_errno(errno, "Failed to fstatvfs(%s): %m", path);
105
106 *ret_free = ss.f_bsize * ss.f_bavail;
107 *ret_used = 0;
108 FOREACH_DIRENT_ALL(de, d, break) {
109 struct stat st;
110
111 if (!endswith(de->d_name, ".journal") &&
112 !endswith(de->d_name, ".journal~"))
113 continue;
114
115 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) {
116 log_debug_errno(errno, "Failed to stat %s/%s, ignoring: %m", path, de->d_name);
117 continue;
118 }
119
120 if (!S_ISREG(st.st_mode))
121 continue;
122
123 *ret_used += (uint64_t) st.st_blocks * 512UL;
124 }
125
126 return 0;
127 }
128
129 static void cache_space_invalidate(JournalStorageSpace *space) {
130 memset(space, 0, sizeof(*space));
131 }
132
133 static int cache_space_refresh(Server *s, JournalStorage *storage) {
134 JournalStorageSpace *space;
135 JournalMetrics *metrics;
136 uint64_t vfs_used, vfs_avail, avail;
137 usec_t ts;
138 int r;
139
140 assert(s);
141
142 metrics = &storage->metrics;
143 space = &storage->space;
144
145 ts = now(CLOCK_MONOTONIC);
146
147 if (space->timestamp != 0 && space->timestamp + RECHECK_SPACE_USEC > ts)
148 return 0;
149
150 r = determine_path_usage(s, storage->path, &vfs_used, &vfs_avail);
151 if (r < 0)
152 return r;
153
154 space->vfs_used = vfs_used;
155 space->vfs_available = vfs_avail;
156
157 avail = LESS_BY(vfs_avail, metrics->keep_free);
158
159 space->limit = MIN(MAX(vfs_used + avail, metrics->min_use), metrics->max_use);
160 space->available = LESS_BY(space->limit, vfs_used);
161 space->timestamp = ts;
162 return 1;
163 }
164
165 static void patch_min_use(JournalStorage *storage) {
166 assert(storage);
167
168 /* Let's bump the min_use limit to the current usage on disk. We do
169 * this when starting up and first opening the journal files. This way
170 * sudden spikes in disk usage will not cause journald to vacuum files
171 * without bounds. Note that this means that only a restart of journald
172 * will make it reset this value. */
173
174 storage->metrics.min_use = MAX(storage->metrics.min_use, storage->space.vfs_used);
175 }
176
177
178 static int determine_space(Server *s, uint64_t *available, uint64_t *limit) {
179 JournalStorage *js;
180 int r;
181
182 assert(s);
183
184 js = s->system_journal ? &s->system_storage : &s->runtime_storage;
185
186 r = cache_space_refresh(s, js);
187 if (r >= 0) {
188 if (available)
189 *available = js->space.available;
190 if (limit)
191 *limit = js->space.limit;
192 }
193 return r;
194 }
195
196 void server_space_usage_message(Server *s, JournalStorage *storage) {
197 char fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
198 fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX], fb6[FORMAT_BYTES_MAX];
199 JournalMetrics *metrics;
200
201 assert(s);
202
203 if (!storage)
204 storage = s->system_journal ? &s->system_storage : &s->runtime_storage;
205
206 if (cache_space_refresh(s, storage) < 0)
207 return;
208
209 metrics = &storage->metrics;
210 format_bytes(fb1, sizeof(fb1), storage->space.vfs_used);
211 format_bytes(fb2, sizeof(fb2), metrics->max_use);
212 format_bytes(fb3, sizeof(fb3), metrics->keep_free);
213 format_bytes(fb4, sizeof(fb4), storage->space.vfs_available);
214 format_bytes(fb5, sizeof(fb5), storage->space.limit);
215 format_bytes(fb6, sizeof(fb6), storage->space.available);
216
217 server_driver_message(s, "MESSAGE_ID=" SD_MESSAGE_JOURNAL_USAGE_STR,
218 LOG_MESSAGE("%s (%s) is %s, max %s, %s free.",
219 storage->name, storage->path, fb1, fb5, fb6),
220 "JOURNAL_NAME=%s", storage->name,
221 "JOURNAL_PATH=%s", storage->path,
222 "CURRENT_USE=%"PRIu64, storage->space.vfs_used,
223 "CURRENT_USE_PRETTY=%s", fb1,
224 "MAX_USE=%"PRIu64, metrics->max_use,
225 "MAX_USE_PRETTY=%s", fb2,
226 "DISK_KEEP_FREE=%"PRIu64, metrics->keep_free,
227 "DISK_KEEP_FREE_PRETTY=%s", fb3,
228 "DISK_AVAILABLE=%"PRIu64, storage->space.vfs_available,
229 "DISK_AVAILABLE_PRETTY=%s", fb4,
230 "LIMIT=%"PRIu64, storage->space.limit,
231 "LIMIT_PRETTY=%s", fb5,
232 "AVAILABLE=%"PRIu64, storage->space.available,
233 "AVAILABLE_PRETTY=%s", fb6,
234 NULL);
235 }
236
237 static void server_add_acls(JournalFile *f, uid_t uid) {
238 #ifdef HAVE_ACL
239 int r;
240 #endif
241 assert(f);
242
243 #ifdef HAVE_ACL
244 if (uid <= SYSTEM_UID_MAX)
245 return;
246
247 r = add_acls_for_user(f->fd, uid);
248 if (r < 0)
249 log_warning_errno(r, "Failed to set ACL on %s, ignoring: %m", f->path);
250 #endif
251 }
252
253 static int open_journal(
254 Server *s,
255 bool reliably,
256 const char *fname,
257 int flags,
258 bool seal,
259 JournalMetrics *metrics,
260 JournalFile **ret) {
261 int r;
262 JournalFile *f;
263
264 assert(s);
265 assert(fname);
266 assert(ret);
267
268 if (reliably)
269 r = journal_file_open_reliably(fname, flags, 0640, s->compress, seal, metrics, s->mmap, s->deferred_closes, NULL, &f);
270 else
271 r = journal_file_open(-1, fname, flags, 0640, s->compress, seal, metrics, s->mmap, s->deferred_closes, NULL, &f);
272 if (r < 0)
273 return r;
274
275 r = journal_file_enable_post_change_timer(f, s->event, POST_CHANGE_TIMER_INTERVAL_USEC);
276 if (r < 0) {
277 (void) journal_file_close(f);
278 return r;
279 }
280
281 *ret = f;
282 return r;
283 }
284
285 static bool flushed_flag_is_set(void) {
286 return access("/run/systemd/journal/flushed", F_OK) >= 0;
287 }
288
289 static int system_journal_open(Server *s, bool flush_requested) {
290 const char *fn;
291 int r = 0;
292
293 if (!s->system_journal &&
294 IN_SET(s->storage, STORAGE_PERSISTENT, STORAGE_AUTO) &&
295 (flush_requested || flushed_flag_is_set())) {
296
297 /* If in auto mode: first try to create the machine
298 * path, but not the prefix.
299 *
300 * If in persistent mode: create /var/log/journal and
301 * the machine path */
302
303 if (s->storage == STORAGE_PERSISTENT)
304 (void) mkdir_p("/var/log/journal/", 0755);
305
306 (void) mkdir(s->system_storage.path, 0755);
307
308 fn = strjoina(s->system_storage.path, "/system.journal");
309 r = open_journal(s, true, fn, O_RDWR|O_CREAT, s->seal, &s->system_storage.metrics, &s->system_journal);
310 if (r >= 0) {
311 server_add_acls(s->system_journal, 0);
312 (void) cache_space_refresh(s, &s->system_storage);
313 patch_min_use(&s->system_storage);
314 } else if (r < 0) {
315 if (r != -ENOENT && r != -EROFS)
316 log_warning_errno(r, "Failed to open system journal: %m");
317
318 r = 0;
319 }
320
321 /* If the runtime journal is open, and we're post-flush, we're
322 * recovering from a failed system journal rotate (ENOSPC)
323 * for which the runtime journal was reopened.
324 *
325 * Perform an implicit flush to var, leaving the runtime
326 * journal closed, now that the system journal is back.
327 */
328 if (!flush_requested)
329 (void) server_flush_to_var(s, true);
330 }
331
332 if (!s->runtime_journal &&
333 (s->storage != STORAGE_NONE)) {
334
335 fn = strjoina(s->runtime_storage.path, "/system.journal");
336
337 if (s->system_journal) {
338
339 /* Try to open the runtime journal, but only
340 * if it already exists, so that we can flush
341 * it into the system journal */
342
343 r = open_journal(s, false, fn, O_RDWR, false, &s->runtime_storage.metrics, &s->runtime_journal);
344 if (r < 0) {
345 if (r != -ENOENT)
346 log_warning_errno(r, "Failed to open runtime journal: %m");
347
348 r = 0;
349 }
350
351 } else {
352
353 /* OK, we really need the runtime journal, so create
354 * it if necessary. */
355
356 (void) mkdir("/run/log", 0755);
357 (void) mkdir("/run/log/journal", 0755);
358 (void) mkdir_parents(fn, 0750);
359
360 r = open_journal(s, true, fn, O_RDWR|O_CREAT, false, &s->runtime_storage.metrics, &s->runtime_journal);
361 if (r < 0)
362 return log_error_errno(r, "Failed to open runtime journal: %m");
363 }
364
365 if (s->runtime_journal) {
366 server_add_acls(s->runtime_journal, 0);
367 (void) cache_space_refresh(s, &s->runtime_storage);
368 patch_min_use(&s->runtime_storage);
369 }
370 }
371
372 return r;
373 }
374
375 static JournalFile* find_journal(Server *s, uid_t uid) {
376 _cleanup_free_ char *p = NULL;
377 int r;
378 JournalFile *f;
379 sd_id128_t machine;
380
381 assert(s);
382
383 /* A rotate that fails to create the new journal (ENOSPC) leaves the
384 * rotated journal as NULL. Unless we revisit opening, even after
385 * space is made available we'll continue to return NULL indefinitely.
386 *
387 * system_journal_open() is a noop if the journals are already open, so
388 * we can just call it here to recover from failed rotates (or anything
389 * else that's left the journals as NULL).
390 *
391 * Fixes https://github.com/systemd/systemd/issues/3968 */
392 (void) system_journal_open(s, false);
393
394 /* We split up user logs only on /var, not on /run. If the
395 * runtime file is open, we write to it exclusively, in order
396 * to guarantee proper order as soon as we flush /run to
397 * /var and close the runtime file. */
398
399 if (s->runtime_journal)
400 return s->runtime_journal;
401
402 if (uid <= SYSTEM_UID_MAX || uid_is_dynamic(uid))
403 return s->system_journal;
404
405 r = sd_id128_get_machine(&machine);
406 if (r < 0)
407 return s->system_journal;
408
409 f = ordered_hashmap_get(s->user_journals, UID_TO_PTR(uid));
410 if (f)
411 return f;
412
413 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-"UID_FMT".journal",
414 SD_ID128_FORMAT_VAL(machine), uid) < 0)
415 return s->system_journal;
416
417 while (ordered_hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
418 /* Too many open? Then let's close one */
419 f = ordered_hashmap_steal_first(s->user_journals);
420 assert(f);
421 (void) journal_file_close(f);
422 }
423
424 r = open_journal(s, true, p, O_RDWR|O_CREAT, s->seal, &s->system_storage.metrics, &f);
425 if (r < 0)
426 return s->system_journal;
427
428 server_add_acls(f, uid);
429
430 r = ordered_hashmap_put(s->user_journals, UID_TO_PTR(uid), f);
431 if (r < 0) {
432 (void) journal_file_close(f);
433 return s->system_journal;
434 }
435
436 return f;
437 }
438
439 static int do_rotate(
440 Server *s,
441 JournalFile **f,
442 const char* name,
443 bool seal,
444 uint32_t uid) {
445
446 int r;
447 assert(s);
448
449 if (!*f)
450 return -EINVAL;
451
452 r = journal_file_rotate(f, s->compress, seal, s->deferred_closes);
453 if (r < 0)
454 if (*f)
455 log_error_errno(r, "Failed to rotate %s: %m", (*f)->path);
456 else
457 log_error_errno(r, "Failed to create new %s journal: %m", name);
458 else
459 server_add_acls(*f, uid);
460
461 return r;
462 }
463
464 void server_rotate(Server *s) {
465 JournalFile *f;
466 void *k;
467 Iterator i;
468 int r;
469
470 log_debug("Rotating...");
471
472 (void) do_rotate(s, &s->runtime_journal, "runtime", false, 0);
473 (void) do_rotate(s, &s->system_journal, "system", s->seal, 0);
474
475 ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
476 r = do_rotate(s, &f, "user", s->seal, PTR_TO_UID(k));
477 if (r >= 0)
478 ordered_hashmap_replace(s->user_journals, k, f);
479 else if (!f)
480 /* Old file has been closed and deallocated */
481 ordered_hashmap_remove(s->user_journals, k);
482 }
483
484 /* Perform any deferred closes which aren't still offlining. */
485 SET_FOREACH(f, s->deferred_closes, i)
486 if (!journal_file_is_offlining(f)) {
487 (void) set_remove(s->deferred_closes, f);
488 (void) journal_file_close(f);
489 }
490 }
491
492 void server_sync(Server *s) {
493 JournalFile *f;
494 Iterator i;
495 int r;
496
497 if (s->system_journal) {
498 r = journal_file_set_offline(s->system_journal, false);
499 if (r < 0)
500 log_warning_errno(r, "Failed to sync system journal, ignoring: %m");
501 }
502
503 ORDERED_HASHMAP_FOREACH(f, s->user_journals, i) {
504 r = journal_file_set_offline(f, false);
505 if (r < 0)
506 log_warning_errno(r, "Failed to sync user journal, ignoring: %m");
507 }
508
509 if (s->sync_event_source) {
510 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
511 if (r < 0)
512 log_error_errno(r, "Failed to disable sync timer source: %m");
513 }
514
515 s->sync_scheduled = false;
516 }
517
518 static void do_vacuum(Server *s, JournalStorage *storage, bool verbose) {
519
520 int r;
521
522 assert(s);
523 assert(storage);
524
525 (void) cache_space_refresh(s, storage);
526
527 if (verbose)
528 server_space_usage_message(s, storage);
529
530 r = journal_directory_vacuum(storage->path, storage->space.limit,
531 storage->metrics.n_max_files, s->max_retention_usec,
532 &s->oldest_file_usec, verbose);
533 if (r < 0 && r != -ENOENT)
534 log_warning_errno(r, "Failed to vacuum %s, ignoring: %m", storage->path);
535
536 cache_space_invalidate(&storage->space);
537 }
538
539 int server_vacuum(Server *s, bool verbose) {
540 assert(s);
541
542 log_debug("Vacuuming...");
543
544 s->oldest_file_usec = 0;
545
546 if (s->system_journal)
547 do_vacuum(s, &s->system_storage, verbose);
548 if (s->runtime_journal)
549 do_vacuum(s, &s->runtime_storage, verbose);
550
551 return 0;
552 }
553
554 static void server_cache_machine_id(Server *s) {
555 sd_id128_t id;
556 int r;
557
558 assert(s);
559
560 r = sd_id128_get_machine(&id);
561 if (r < 0)
562 return;
563
564 sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
565 }
566
567 static void server_cache_boot_id(Server *s) {
568 sd_id128_t id;
569 int r;
570
571 assert(s);
572
573 r = sd_id128_get_boot(&id);
574 if (r < 0)
575 return;
576
577 sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
578 }
579
580 static void server_cache_hostname(Server *s) {
581 _cleanup_free_ char *t = NULL;
582 char *x;
583
584 assert(s);
585
586 t = gethostname_malloc();
587 if (!t)
588 return;
589
590 x = strappend("_HOSTNAME=", t);
591 if (!x)
592 return;
593
594 free(s->hostname_field);
595 s->hostname_field = x;
596 }
597
598 static bool shall_try_append_again(JournalFile *f, int r) {
599 switch(r) {
600
601 case -E2BIG: /* Hit configured limit */
602 case -EFBIG: /* Hit fs limit */
603 case -EDQUOT: /* Quota limit hit */
604 case -ENOSPC: /* Disk full */
605 log_debug("%s: Allocation limit reached, rotating.", f->path);
606 return true;
607
608 case -EIO: /* I/O error of some kind (mmap) */
609 log_warning("%s: IO error, rotating.", f->path);
610 return true;
611
612 case -EHOSTDOWN: /* Other machine */
613 log_info("%s: Journal file from other machine, rotating.", f->path);
614 return true;
615
616 case -EBUSY: /* Unclean shutdown */
617 log_info("%s: Unclean shutdown, rotating.", f->path);
618 return true;
619
620 case -EPROTONOSUPPORT: /* Unsupported feature */
621 log_info("%s: Unsupported feature, rotating.", f->path);
622 return true;
623
624 case -EBADMSG: /* Corrupted */
625 case -ENODATA: /* Truncated */
626 case -ESHUTDOWN: /* Already archived */
627 log_warning("%s: Journal file corrupted, rotating.", f->path);
628 return true;
629
630 case -EIDRM: /* Journal file has been deleted */
631 log_warning("%s: Journal file has been deleted, rotating.", f->path);
632 return true;
633
634 case -ETXTBSY: /* Journal file is from the future */
635 log_warning("%s: Journal file is from the future, rotating.", f->path);
636 return true;
637
638 default:
639 return false;
640 }
641 }
642
643 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
644 bool vacuumed = false, rotate = false;
645 struct dual_timestamp ts;
646 JournalFile *f;
647 int r;
648
649 assert(s);
650 assert(iovec);
651 assert(n > 0);
652
653 /* Get the closest, linearized time we have for this log event from the event loop. (Note that we do not use
654 * the source time, and not even the time the event was originally seen, but instead simply the time we started
655 * processing it, as we want strictly linear ordering in what we write out.) */
656 assert_se(sd_event_now(s->event, CLOCK_REALTIME, &ts.realtime) >= 0);
657 assert_se(sd_event_now(s->event, CLOCK_MONOTONIC, &ts.monotonic) >= 0);
658
659 if (ts.realtime < s->last_realtime_clock) {
660 /* When the time jumps backwards, let's immediately rotate. Of course, this should not happen during
661 * regular operation. However, when it does happen, then we should make sure that we start fresh files
662 * to ensure that the entries in the journal files are strictly ordered by time, in order to ensure
663 * bisection works correctly. */
664
665 log_debug("Time jumped backwards, rotating.");
666 rotate = true;
667 } else {
668
669 f = find_journal(s, uid);
670 if (!f)
671 return;
672
673 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
674 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
675 rotate = true;
676 }
677 }
678
679 if (rotate) {
680 server_rotate(s);
681 server_vacuum(s, false);
682 vacuumed = true;
683
684 f = find_journal(s, uid);
685 if (!f)
686 return;
687 }
688
689 s->last_realtime_clock = ts.realtime;
690
691 r = journal_file_append_entry(f, &ts, iovec, n, &s->seqnum, NULL, NULL);
692 if (r >= 0) {
693 server_schedule_sync(s, priority);
694 return;
695 }
696
697 if (vacuumed || !shall_try_append_again(f, r)) {
698 log_error_errno(r, "Failed to write entry (%d items, %zu bytes), ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
699 return;
700 }
701
702 server_rotate(s);
703 server_vacuum(s, false);
704
705 f = find_journal(s, uid);
706 if (!f)
707 return;
708
709 log_debug("Retrying write.");
710 r = journal_file_append_entry(f, &ts, iovec, n, &s->seqnum, NULL, NULL);
711 if (r < 0)
712 log_error_errno(r, "Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
713 else
714 server_schedule_sync(s, priority);
715 }
716
717 static int get_invocation_id(const char *cgroup_root, const char *slice, const char *unit, char **ret) {
718 _cleanup_free_ char *escaped = NULL, *slice_path = NULL, *p = NULL;
719 char *copy, ids[SD_ID128_STRING_MAX];
720 int r;
721
722 /* Read the invocation ID of a unit off a unit. It's stored in the "trusted.invocation_id" extended attribute
723 * on the cgroup path. */
724
725 r = cg_slice_to_path(slice, &slice_path);
726 if (r < 0)
727 return r;
728
729 escaped = cg_escape(unit);
730 if (!escaped)
731 return -ENOMEM;
732
733 p = strjoin(cgroup_root, "/", slice_path, "/", escaped);
734 if (!p)
735 return -ENOMEM;
736
737 r = cg_get_xattr(SYSTEMD_CGROUP_CONTROLLER, p, "trusted.invocation_id", ids, 32);
738 if (r < 0)
739 return r;
740 if (r != 32)
741 return -EINVAL;
742 ids[32] = 0;
743
744 if (!id128_is_valid(ids))
745 return -EINVAL;
746
747 copy = strdup(ids);
748 if (!copy)
749 return -ENOMEM;
750
751 *ret = copy;
752 return 0;
753 }
754
755 static void dispatch_message_real(
756 Server *s,
757 struct iovec *iovec, unsigned n, unsigned m,
758 const struct ucred *ucred,
759 const struct timeval *tv,
760 const char *label, size_t label_len,
761 const char *unit_id,
762 int priority,
763 pid_t object_pid,
764 char *cgroup) {
765
766 char pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
767 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
768 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
769 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
770 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
771 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
772 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
773 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
774 uid_t object_uid;
775 gid_t object_gid;
776 char *x;
777 int r;
778 char *t, *c;
779 uid_t realuid = 0, owner = 0, journal_uid;
780 bool owner_valid = false;
781 #ifdef HAVE_AUDIT
782 char audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
783 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
784 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
785 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
786
787 uint32_t audit;
788 uid_t loginuid;
789 #endif
790
791 assert(s);
792 assert(iovec);
793 assert(n > 0);
794 assert(n + N_IOVEC_META_FIELDS + (object_pid > 0 ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
795
796 if (ucred) {
797 realuid = ucred->uid;
798
799 sprintf(pid, "_PID="PID_FMT, ucred->pid);
800 IOVEC_SET_STRING(iovec[n++], pid);
801
802 sprintf(uid, "_UID="UID_FMT, ucred->uid);
803 IOVEC_SET_STRING(iovec[n++], uid);
804
805 sprintf(gid, "_GID="GID_FMT, ucred->gid);
806 IOVEC_SET_STRING(iovec[n++], gid);
807
808 r = get_process_comm(ucred->pid, &t);
809 if (r >= 0) {
810 x = strjoina("_COMM=", t);
811 free(t);
812 IOVEC_SET_STRING(iovec[n++], x);
813 }
814
815 r = get_process_exe(ucred->pid, &t);
816 if (r >= 0) {
817 x = strjoina("_EXE=", t);
818 free(t);
819 IOVEC_SET_STRING(iovec[n++], x);
820 }
821
822 r = get_process_cmdline(ucred->pid, 0, false, &t);
823 if (r >= 0) {
824 x = strjoina("_CMDLINE=", t);
825 free(t);
826 IOVEC_SET_STRING(iovec[n++], x);
827 }
828
829 r = get_process_capeff(ucred->pid, &t);
830 if (r >= 0) {
831 x = strjoina("_CAP_EFFECTIVE=", t);
832 free(t);
833 IOVEC_SET_STRING(iovec[n++], x);
834 }
835
836 #ifdef HAVE_AUDIT
837 r = audit_session_from_pid(ucred->pid, &audit);
838 if (r >= 0) {
839 sprintf(audit_session, "_AUDIT_SESSION=%"PRIu32, audit);
840 IOVEC_SET_STRING(iovec[n++], audit_session);
841 }
842
843 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
844 if (r >= 0) {
845 sprintf(audit_loginuid, "_AUDIT_LOGINUID="UID_FMT, loginuid);
846 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
847 }
848 #endif
849
850 r = 0;
851 if (cgroup)
852 c = cgroup;
853 else
854 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &c);
855
856 if (r >= 0) {
857 _cleanup_free_ char *raw_unit = NULL, *raw_slice = NULL;
858 char *session = NULL;
859
860 x = strjoina("_SYSTEMD_CGROUP=", c);
861 IOVEC_SET_STRING(iovec[n++], x);
862
863 r = cg_path_get_session(c, &t);
864 if (r >= 0) {
865 session = strjoina("_SYSTEMD_SESSION=", t);
866 free(t);
867 IOVEC_SET_STRING(iovec[n++], session);
868 }
869
870 if (cg_path_get_owner_uid(c, &owner) >= 0) {
871 owner_valid = true;
872
873 sprintf(owner_uid, "_SYSTEMD_OWNER_UID="UID_FMT, owner);
874 IOVEC_SET_STRING(iovec[n++], owner_uid);
875 }
876
877 if (cg_path_get_unit(c, &raw_unit) >= 0) {
878 x = strjoina("_SYSTEMD_UNIT=", raw_unit);
879 IOVEC_SET_STRING(iovec[n++], x);
880 } else if (unit_id && !session) {
881 x = strjoina("_SYSTEMD_UNIT=", unit_id);
882 IOVEC_SET_STRING(iovec[n++], x);
883 }
884
885 if (cg_path_get_user_unit(c, &t) >= 0) {
886 x = strjoina("_SYSTEMD_USER_UNIT=", t);
887 free(t);
888 IOVEC_SET_STRING(iovec[n++], x);
889 } else if (unit_id && session) {
890 x = strjoina("_SYSTEMD_USER_UNIT=", unit_id);
891 IOVEC_SET_STRING(iovec[n++], x);
892 }
893
894 if (cg_path_get_slice(c, &raw_slice) >= 0) {
895 x = strjoina("_SYSTEMD_SLICE=", raw_slice);
896 IOVEC_SET_STRING(iovec[n++], x);
897 }
898
899 if (cg_path_get_user_slice(c, &t) >= 0) {
900 x = strjoina("_SYSTEMD_USER_SLICE=", t);
901 free(t);
902 IOVEC_SET_STRING(iovec[n++], x);
903 }
904
905 if (raw_slice && raw_unit) {
906 if (get_invocation_id(s->cgroup_root, raw_slice, raw_unit, &t) >= 0) {
907 x = strjoina("_SYSTEMD_INVOCATION_ID=", t);
908 free(t);
909 IOVEC_SET_STRING(iovec[n++], x);
910 }
911 }
912
913 if (!cgroup)
914 free(c);
915 } else if (unit_id) {
916 x = strjoina("_SYSTEMD_UNIT=", unit_id);
917 IOVEC_SET_STRING(iovec[n++], x);
918 }
919
920 #ifdef HAVE_SELINUX
921 if (mac_selinux_have()) {
922 if (label) {
923 x = alloca(strlen("_SELINUX_CONTEXT=") + label_len + 1);
924
925 *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
926 IOVEC_SET_STRING(iovec[n++], x);
927 } else {
928 char *con;
929
930 if (getpidcon(ucred->pid, &con) >= 0) {
931 x = strjoina("_SELINUX_CONTEXT=", con);
932
933 freecon(con);
934 IOVEC_SET_STRING(iovec[n++], x);
935 }
936 }
937 }
938 #endif
939 }
940 assert(n <= m);
941
942 if (object_pid) {
943 r = get_process_uid(object_pid, &object_uid);
944 if (r >= 0) {
945 sprintf(o_uid, "OBJECT_UID="UID_FMT, object_uid);
946 IOVEC_SET_STRING(iovec[n++], o_uid);
947 }
948
949 r = get_process_gid(object_pid, &object_gid);
950 if (r >= 0) {
951 sprintf(o_gid, "OBJECT_GID="GID_FMT, object_gid);
952 IOVEC_SET_STRING(iovec[n++], o_gid);
953 }
954
955 r = get_process_comm(object_pid, &t);
956 if (r >= 0) {
957 x = strjoina("OBJECT_COMM=", t);
958 free(t);
959 IOVEC_SET_STRING(iovec[n++], x);
960 }
961
962 r = get_process_exe(object_pid, &t);
963 if (r >= 0) {
964 x = strjoina("OBJECT_EXE=", t);
965 free(t);
966 IOVEC_SET_STRING(iovec[n++], x);
967 }
968
969 r = get_process_cmdline(object_pid, 0, false, &t);
970 if (r >= 0) {
971 x = strjoina("OBJECT_CMDLINE=", t);
972 free(t);
973 IOVEC_SET_STRING(iovec[n++], x);
974 }
975
976 #ifdef HAVE_AUDIT
977 r = audit_session_from_pid(object_pid, &audit);
978 if (r >= 0) {
979 sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%"PRIu32, audit);
980 IOVEC_SET_STRING(iovec[n++], o_audit_session);
981 }
982
983 r = audit_loginuid_from_pid(object_pid, &loginuid);
984 if (r >= 0) {
985 sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID="UID_FMT, loginuid);
986 IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
987 }
988 #endif
989
990 r = cg_pid_get_path_shifted(object_pid, s->cgroup_root, &c);
991 if (r >= 0) {
992 x = strjoina("OBJECT_SYSTEMD_CGROUP=", c);
993 IOVEC_SET_STRING(iovec[n++], x);
994
995 r = cg_path_get_session(c, &t);
996 if (r >= 0) {
997 x = strjoina("OBJECT_SYSTEMD_SESSION=", t);
998 free(t);
999 IOVEC_SET_STRING(iovec[n++], x);
1000 }
1001
1002 if (cg_path_get_owner_uid(c, &owner) >= 0) {
1003 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID="UID_FMT, owner);
1004 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
1005 }
1006
1007 if (cg_path_get_unit(c, &t) >= 0) {
1008 x = strjoina("OBJECT_SYSTEMD_UNIT=", t);
1009 free(t);
1010 IOVEC_SET_STRING(iovec[n++], x);
1011 }
1012
1013 if (cg_path_get_user_unit(c, &t) >= 0) {
1014 x = strjoina("OBJECT_SYSTEMD_USER_UNIT=", t);
1015 free(t);
1016 IOVEC_SET_STRING(iovec[n++], x);
1017 }
1018
1019 if (cg_path_get_slice(c, &t) >= 0) {
1020 x = strjoina("OBJECT_SYSTEMD_SLICE=", t);
1021 free(t);
1022 IOVEC_SET_STRING(iovec[n++], x);
1023 }
1024
1025 if (cg_path_get_user_slice(c, &t) >= 0) {
1026 x = strjoina("OBJECT_SYSTEMD_USER_SLICE=", t);
1027 free(t);
1028 IOVEC_SET_STRING(iovec[n++], x);
1029 }
1030
1031 free(c);
1032 }
1033 }
1034 assert(n <= m);
1035
1036 if (tv) {
1037 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=" USEC_FMT, timeval_load(tv));
1038 IOVEC_SET_STRING(iovec[n++], source_time);
1039 }
1040
1041 /* Note that strictly speaking storing the boot id here is
1042 * redundant since the entry includes this in-line
1043 * anyway. However, we need this indexed, too. */
1044 if (!isempty(s->boot_id_field))
1045 IOVEC_SET_STRING(iovec[n++], s->boot_id_field);
1046
1047 if (!isempty(s->machine_id_field))
1048 IOVEC_SET_STRING(iovec[n++], s->machine_id_field);
1049
1050 if (!isempty(s->hostname_field))
1051 IOVEC_SET_STRING(iovec[n++], s->hostname_field);
1052
1053 assert(n <= m);
1054
1055 if (s->split_mode == SPLIT_UID && realuid > 0)
1056 /* Split up strictly by any UID */
1057 journal_uid = realuid;
1058 else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
1059 /* Split up by login UIDs. We do this only if the
1060 * realuid is not root, in order not to accidentally
1061 * leak privileged information to the user that is
1062 * logged by a privileged process that is part of an
1063 * unprivileged session. */
1064 journal_uid = owner;
1065 else
1066 journal_uid = 0;
1067
1068 write_to_journal(s, journal_uid, iovec, n, priority);
1069 }
1070
1071 void server_driver_message(Server *s, const char *message_id, const char *format, ...) {
1072 struct iovec iovec[N_IOVEC_META_FIELDS + 5 + N_IOVEC_PAYLOAD_FIELDS];
1073 unsigned n = 0, m;
1074 int r;
1075 va_list ap;
1076 struct ucred ucred = {};
1077
1078 assert(s);
1079 assert(format);
1080
1081 assert_cc(3 == LOG_FAC(LOG_DAEMON));
1082 IOVEC_SET_STRING(iovec[n++], "SYSLOG_FACILITY=3");
1083 IOVEC_SET_STRING(iovec[n++], "SYSLOG_IDENTIFIER=systemd-journald");
1084
1085 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
1086 assert_cc(6 == LOG_INFO);
1087 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
1088
1089 if (message_id)
1090 IOVEC_SET_STRING(iovec[n++], message_id);
1091 m = n;
1092
1093 va_start(ap, format);
1094 r = log_format_iovec(iovec, ELEMENTSOF(iovec), &n, false, 0, format, ap);
1095 /* Error handling below */
1096 va_end(ap);
1097
1098 ucred.pid = getpid();
1099 ucred.uid = getuid();
1100 ucred.gid = getgid();
1101
1102 if (r >= 0)
1103 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0, NULL);
1104
1105 while (m < n)
1106 free(iovec[m++].iov_base);
1107
1108 if (r < 0) {
1109 /* We failed to format the message. Emit a warning instead. */
1110 char buf[LINE_MAX];
1111
1112 xsprintf(buf, "MESSAGE=Entry printing failed: %s", strerror(-r));
1113
1114 n = 3;
1115 IOVEC_SET_STRING(iovec[n++], "PRIORITY=4");
1116 IOVEC_SET_STRING(iovec[n++], buf);
1117 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0, NULL);
1118 }
1119 }
1120
1121 void server_dispatch_message(
1122 Server *s,
1123 struct iovec *iovec, unsigned n, unsigned m,
1124 const struct ucred *ucred,
1125 const struct timeval *tv,
1126 const char *label, size_t label_len,
1127 const char *unit_id,
1128 int priority,
1129 pid_t object_pid) {
1130
1131 int rl, r;
1132 _cleanup_free_ char *path = NULL;
1133 uint64_t available = 0;
1134 char *c = NULL;
1135
1136 assert(s);
1137 assert(iovec || n == 0);
1138
1139 if (n == 0)
1140 return;
1141
1142 if (LOG_PRI(priority) > s->max_level_store)
1143 return;
1144
1145 /* Stop early in case the information will not be stored
1146 * in a journal. */
1147 if (s->storage == STORAGE_NONE)
1148 return;
1149
1150 if (!ucred)
1151 goto finish;
1152
1153 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &path);
1154 if (r < 0)
1155 goto finish;
1156
1157 /* example: /user/lennart/3/foobar
1158 * /system/dbus.service/foobar
1159 *
1160 * So let's cut of everything past the third /, since that is
1161 * where user directories start */
1162
1163 c = strchr(path, '/');
1164 if (c) {
1165 c = strchr(c+1, '/');
1166 if (c) {
1167 c = strchr(c+1, '/');
1168 if (c)
1169 *c = 0;
1170 }
1171 }
1172
1173 (void) determine_space(s, &available, NULL);
1174 rl = journal_rate_limit_test(s->rate_limit, path, priority & LOG_PRIMASK, available);
1175 if (rl == 0)
1176 return;
1177
1178 /* Write a suppression message if we suppressed something */
1179 if (rl > 1)
1180 server_driver_message(s, "MESSAGE_ID=" SD_MESSAGE_JOURNAL_DROPPED_STR,
1181 LOG_MESSAGE("Suppressed %u messages from %s", rl - 1, path),
1182 NULL);
1183
1184 finish:
1185 /* restore cgroup path for logging */
1186 if (c)
1187 *c = '/';
1188 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, priority, object_pid, path);
1189 }
1190
1191 int server_flush_to_var(Server *s, bool require_flag_file) {
1192 sd_id128_t machine;
1193 sd_journal *j = NULL;
1194 char ts[FORMAT_TIMESPAN_MAX];
1195 usec_t start;
1196 unsigned n = 0;
1197 int r;
1198
1199 assert(s);
1200
1201 if (!IN_SET(s->storage, STORAGE_AUTO, STORAGE_PERSISTENT))
1202 return 0;
1203
1204 if (!s->runtime_journal)
1205 return 0;
1206
1207 if (require_flag_file && !flushed_flag_is_set())
1208 return 0;
1209
1210 (void) system_journal_open(s, true);
1211
1212 if (!s->system_journal)
1213 return 0;
1214
1215 log_debug("Flushing to /var...");
1216
1217 start = now(CLOCK_MONOTONIC);
1218
1219 r = sd_id128_get_machine(&machine);
1220 if (r < 0)
1221 return r;
1222
1223 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
1224 if (r < 0)
1225 return log_error_errno(r, "Failed to read runtime journal: %m");
1226
1227 sd_journal_set_data_threshold(j, 0);
1228
1229 SD_JOURNAL_FOREACH(j) {
1230 Object *o = NULL;
1231 JournalFile *f;
1232
1233 f = j->current_file;
1234 assert(f && f->current_offset > 0);
1235
1236 n++;
1237
1238 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1239 if (r < 0) {
1240 log_error_errno(r, "Can't read entry: %m");
1241 goto finish;
1242 }
1243
1244 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1245 if (r >= 0)
1246 continue;
1247
1248 if (!shall_try_append_again(s->system_journal, r)) {
1249 log_error_errno(r, "Can't write entry: %m");
1250 goto finish;
1251 }
1252
1253 server_rotate(s);
1254 server_vacuum(s, false);
1255
1256 if (!s->system_journal) {
1257 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1258 r = -EIO;
1259 goto finish;
1260 }
1261
1262 log_debug("Retrying write.");
1263 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1264 if (r < 0) {
1265 log_error_errno(r, "Can't write entry: %m");
1266 goto finish;
1267 }
1268 }
1269
1270 r = 0;
1271
1272 finish:
1273 journal_file_post_change(s->system_journal);
1274
1275 s->runtime_journal = journal_file_close(s->runtime_journal);
1276
1277 if (r >= 0)
1278 (void) rm_rf("/run/log/journal", REMOVE_ROOT);
1279
1280 sd_journal_close(j);
1281
1282 server_driver_message(s, NULL,
1283 LOG_MESSAGE("Time spent on flushing to /var is %s for %u entries.",
1284 format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0),
1285 n),
1286 NULL);
1287
1288 return r;
1289 }
1290
1291 int server_process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1292 Server *s = userdata;
1293 struct ucred *ucred = NULL;
1294 struct timeval *tv = NULL;
1295 struct cmsghdr *cmsg;
1296 char *label = NULL;
1297 size_t label_len = 0, m;
1298 struct iovec iovec;
1299 ssize_t n;
1300 int *fds = NULL, v = 0;
1301 unsigned n_fds = 0;
1302
1303 union {
1304 struct cmsghdr cmsghdr;
1305
1306 /* We use NAME_MAX space for the SELinux label
1307 * here. The kernel currently enforces no
1308 * limit, but according to suggestions from
1309 * the SELinux people this will change and it
1310 * will probably be identical to NAME_MAX. For
1311 * now we use that, but this should be updated
1312 * one day when the final limit is known. */
1313 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1314 CMSG_SPACE(sizeof(struct timeval)) +
1315 CMSG_SPACE(sizeof(int)) + /* fd */
1316 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1317 } control = {};
1318
1319 union sockaddr_union sa = {};
1320
1321 struct msghdr msghdr = {
1322 .msg_iov = &iovec,
1323 .msg_iovlen = 1,
1324 .msg_control = &control,
1325 .msg_controllen = sizeof(control),
1326 .msg_name = &sa,
1327 .msg_namelen = sizeof(sa),
1328 };
1329
1330 assert(s);
1331 assert(fd == s->native_fd || fd == s->syslog_fd || fd == s->audit_fd);
1332
1333 if (revents != EPOLLIN) {
1334 log_error("Got invalid event from epoll for datagram fd: %"PRIx32, revents);
1335 return -EIO;
1336 }
1337
1338 /* Try to get the right size, if we can. (Not all
1339 * sockets support SIOCINQ, hence we just try, but
1340 * don't rely on it. */
1341 (void) ioctl(fd, SIOCINQ, &v);
1342
1343 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1344 m = PAGE_ALIGN(MAX3((size_t) v + 1,
1345 (size_t) LINE_MAX,
1346 ALIGN(sizeof(struct nlmsghdr)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH)) + 1);
1347
1348 if (!GREEDY_REALLOC(s->buffer, s->buffer_size, m))
1349 return log_oom();
1350
1351 iovec.iov_base = s->buffer;
1352 iovec.iov_len = s->buffer_size - 1; /* Leave room for trailing NUL we add later */
1353
1354 n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1355 if (n < 0) {
1356 if (errno == EINTR || errno == EAGAIN)
1357 return 0;
1358
1359 return log_error_errno(errno, "recvmsg() failed: %m");
1360 }
1361
1362 CMSG_FOREACH(cmsg, &msghdr) {
1363
1364 if (cmsg->cmsg_level == SOL_SOCKET &&
1365 cmsg->cmsg_type == SCM_CREDENTIALS &&
1366 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1367 ucred = (struct ucred*) CMSG_DATA(cmsg);
1368 else if (cmsg->cmsg_level == SOL_SOCKET &&
1369 cmsg->cmsg_type == SCM_SECURITY) {
1370 label = (char*) CMSG_DATA(cmsg);
1371 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1372 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1373 cmsg->cmsg_type == SO_TIMESTAMP &&
1374 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1375 tv = (struct timeval*) CMSG_DATA(cmsg);
1376 else if (cmsg->cmsg_level == SOL_SOCKET &&
1377 cmsg->cmsg_type == SCM_RIGHTS) {
1378 fds = (int*) CMSG_DATA(cmsg);
1379 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1380 }
1381 }
1382
1383 /* And a trailing NUL, just in case */
1384 s->buffer[n] = 0;
1385
1386 if (fd == s->syslog_fd) {
1387 if (n > 0 && n_fds == 0)
1388 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1389 else if (n_fds > 0)
1390 log_warning("Got file descriptors via syslog socket. Ignoring.");
1391
1392 } else if (fd == s->native_fd) {
1393 if (n > 0 && n_fds == 0)
1394 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1395 else if (n == 0 && n_fds == 1)
1396 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1397 else if (n_fds > 0)
1398 log_warning("Got too many file descriptors via native socket. Ignoring.");
1399
1400 } else {
1401 assert(fd == s->audit_fd);
1402
1403 if (n > 0 && n_fds == 0)
1404 server_process_audit_message(s, s->buffer, n, ucred, &sa, msghdr.msg_namelen);
1405 else if (n_fds > 0)
1406 log_warning("Got file descriptors via audit socket. Ignoring.");
1407 }
1408
1409 close_many(fds, n_fds);
1410 return 0;
1411 }
1412
1413 static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1414 Server *s = userdata;
1415 int r;
1416
1417 assert(s);
1418
1419 log_info("Received request to flush runtime journal from PID " PID_FMT, si->ssi_pid);
1420
1421 (void) server_flush_to_var(s, false);
1422 server_sync(s);
1423 server_vacuum(s, false);
1424
1425 r = touch("/run/systemd/journal/flushed");
1426 if (r < 0)
1427 log_warning_errno(r, "Failed to touch /run/systemd/journal/flushed, ignoring: %m");
1428
1429 server_space_usage_message(s, NULL);
1430 return 0;
1431 }
1432
1433 static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1434 Server *s = userdata;
1435 int r;
1436
1437 assert(s);
1438
1439 log_info("Received request to rotate journal from PID " PID_FMT, si->ssi_pid);
1440 server_rotate(s);
1441 server_vacuum(s, true);
1442
1443 if (s->system_journal)
1444 patch_min_use(&s->system_storage);
1445 if (s->runtime_journal)
1446 patch_min_use(&s->runtime_storage);
1447
1448 /* Let clients know when the most recent rotation happened. */
1449 r = write_timestamp_file_atomic("/run/systemd/journal/rotated", now(CLOCK_MONOTONIC));
1450 if (r < 0)
1451 log_warning_errno(r, "Failed to write /run/systemd/journal/rotated, ignoring: %m");
1452
1453 return 0;
1454 }
1455
1456 static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1457 Server *s = userdata;
1458
1459 assert(s);
1460
1461 log_received_signal(LOG_INFO, si);
1462
1463 sd_event_exit(s->event, 0);
1464 return 0;
1465 }
1466
1467 static int dispatch_sigrtmin1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1468 Server *s = userdata;
1469 int r;
1470
1471 assert(s);
1472
1473 log_debug("Received request to sync from PID " PID_FMT, si->ssi_pid);
1474
1475 server_sync(s);
1476
1477 /* Let clients know when the most recent sync happened. */
1478 r = write_timestamp_file_atomic("/run/systemd/journal/synced", now(CLOCK_MONOTONIC));
1479 if (r < 0)
1480 log_warning_errno(r, "Failed to write /run/systemd/journal/synced, ignoring: %m");
1481
1482 return 0;
1483 }
1484
1485 static int setup_signals(Server *s) {
1486 int r;
1487
1488 assert(s);
1489
1490 assert_se(sigprocmask_many(SIG_SETMASK, NULL, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, SIGRTMIN+1, -1) >= 0);
1491
1492 r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
1493 if (r < 0)
1494 return r;
1495
1496 r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s);
1497 if (r < 0)
1498 return r;
1499
1500 r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s);
1501 if (r < 0)
1502 return r;
1503
1504 /* Let's process SIGTERM late, so that we flush all queued
1505 * messages to disk before we exit */
1506 r = sd_event_source_set_priority(s->sigterm_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1507 if (r < 0)
1508 return r;
1509
1510 /* When journald is invoked on the terminal (when debugging),
1511 * it's useful if C-c is handled equivalent to SIGTERM. */
1512 r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s);
1513 if (r < 0)
1514 return r;
1515
1516 r = sd_event_source_set_priority(s->sigint_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1517 if (r < 0)
1518 return r;
1519
1520 /* SIGRTMIN+1 causes an immediate sync. We process this very
1521 * late, so that everything else queued at this point is
1522 * really written to disk. Clients can watch
1523 * /run/systemd/journal/synced with inotify until its mtime
1524 * changes to see when a sync happened. */
1525 r = sd_event_add_signal(s->event, &s->sigrtmin1_event_source, SIGRTMIN+1, dispatch_sigrtmin1, s);
1526 if (r < 0)
1527 return r;
1528
1529 r = sd_event_source_set_priority(s->sigrtmin1_event_source, SD_EVENT_PRIORITY_NORMAL+15);
1530 if (r < 0)
1531 return r;
1532
1533 return 0;
1534 }
1535
1536 static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
1537 Server *s = data;
1538 int r;
1539
1540 assert(s);
1541
1542 if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_syslog")) {
1543
1544 r = value ? parse_boolean(value) : true;
1545 if (r < 0)
1546 log_warning("Failed to parse forward to syslog switch \"%s\". Ignoring.", value);
1547 else
1548 s->forward_to_syslog = r;
1549
1550 } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_kmsg")) {
1551
1552 r = value ? parse_boolean(value) : true;
1553 if (r < 0)
1554 log_warning("Failed to parse forward to kmsg switch \"%s\". Ignoring.", value);
1555 else
1556 s->forward_to_kmsg = r;
1557
1558 } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_console")) {
1559
1560 r = value ? parse_boolean(value) : true;
1561 if (r < 0)
1562 log_warning("Failed to parse forward to console switch \"%s\". Ignoring.", value);
1563 else
1564 s->forward_to_console = r;
1565
1566 } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_wall")) {
1567
1568 r = value ? parse_boolean(value) : true;
1569 if (r < 0)
1570 log_warning("Failed to parse forward to wall switch \"%s\". Ignoring.", value);
1571 else
1572 s->forward_to_wall = r;
1573
1574 } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_console")) {
1575
1576 if (proc_cmdline_value_missing(key, value))
1577 return 0;
1578
1579 r = log_level_from_string(value);
1580 if (r < 0)
1581 log_warning("Failed to parse max level console value \"%s\". Ignoring.", value);
1582 else
1583 s->max_level_console = r;
1584
1585 } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_store")) {
1586
1587 if (proc_cmdline_value_missing(key, value))
1588 return 0;
1589
1590 r = log_level_from_string(value);
1591 if (r < 0)
1592 log_warning("Failed to parse max level store value \"%s\". Ignoring.", value);
1593 else
1594 s->max_level_store = r;
1595
1596 } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_syslog")) {
1597
1598 if (proc_cmdline_value_missing(key, value))
1599 return 0;
1600
1601 r = log_level_from_string(value);
1602 if (r < 0)
1603 log_warning("Failed to parse max level syslog value \"%s\". Ignoring.", value);
1604 else
1605 s->max_level_syslog = r;
1606
1607 } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_kmsg")) {
1608
1609 if (proc_cmdline_value_missing(key, value))
1610 return 0;
1611
1612 r = log_level_from_string(value);
1613 if (r < 0)
1614 log_warning("Failed to parse max level kmsg value \"%s\". Ignoring.", value);
1615 else
1616 s->max_level_kmsg = r;
1617
1618 } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_wall")) {
1619
1620 if (proc_cmdline_value_missing(key, value))
1621 return 0;
1622
1623 r = log_level_from_string(value);
1624 if (r < 0)
1625 log_warning("Failed to parse max level wall value \"%s\". Ignoring.", value);
1626 else
1627 s->max_level_wall = r;
1628
1629 } else if (startswith(key, "systemd.journald"))
1630 log_warning("Unknown journald kernel command line option \"%s\". Ignoring.", key);
1631
1632 /* do not warn about state here, since probably systemd already did */
1633 return 0;
1634 }
1635
1636 static int server_parse_config_file(Server *s) {
1637 assert(s);
1638
1639 return config_parse_many_nulstr(PKGSYSCONFDIR "/journald.conf",
1640 CONF_PATHS_NULSTR("systemd/journald.conf.d"),
1641 "Journal\0",
1642 config_item_perf_lookup, journald_gperf_lookup,
1643 false, s);
1644 }
1645
1646 static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
1647 Server *s = userdata;
1648
1649 assert(s);
1650
1651 server_sync(s);
1652 return 0;
1653 }
1654
1655 int server_schedule_sync(Server *s, int priority) {
1656 int r;
1657
1658 assert(s);
1659
1660 if (priority <= LOG_CRIT) {
1661 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1662 server_sync(s);
1663 return 0;
1664 }
1665
1666 if (s->sync_scheduled)
1667 return 0;
1668
1669 if (s->sync_interval_usec > 0) {
1670 usec_t when;
1671
1672 r = sd_event_now(s->event, CLOCK_MONOTONIC, &when);
1673 if (r < 0)
1674 return r;
1675
1676 when += s->sync_interval_usec;
1677
1678 if (!s->sync_event_source) {
1679 r = sd_event_add_time(
1680 s->event,
1681 &s->sync_event_source,
1682 CLOCK_MONOTONIC,
1683 when, 0,
1684 server_dispatch_sync, s);
1685 if (r < 0)
1686 return r;
1687
1688 r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
1689 } else {
1690 r = sd_event_source_set_time(s->sync_event_source, when);
1691 if (r < 0)
1692 return r;
1693
1694 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
1695 }
1696 if (r < 0)
1697 return r;
1698
1699 s->sync_scheduled = true;
1700 }
1701
1702 return 0;
1703 }
1704
1705 static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1706 Server *s = userdata;
1707
1708 assert(s);
1709
1710 server_cache_hostname(s);
1711 return 0;
1712 }
1713
1714 static int server_open_hostname(Server *s) {
1715 int r;
1716
1717 assert(s);
1718
1719 s->hostname_fd = open("/proc/sys/kernel/hostname", O_RDONLY|O_CLOEXEC|O_NDELAY|O_NOCTTY);
1720 if (s->hostname_fd < 0)
1721 return log_error_errno(errno, "Failed to open /proc/sys/kernel/hostname: %m");
1722
1723 r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s);
1724 if (r < 0) {
1725 /* kernels prior to 3.2 don't support polling this file. Ignore
1726 * the failure. */
1727 if (r == -EPERM) {
1728 log_warning_errno(r, "Failed to register hostname fd in event loop, ignoring: %m");
1729 s->hostname_fd = safe_close(s->hostname_fd);
1730 return 0;
1731 }
1732
1733 return log_error_errno(r, "Failed to register hostname fd in event loop: %m");
1734 }
1735
1736 r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
1737 if (r < 0)
1738 return log_error_errno(r, "Failed to adjust priority of host name event source: %m");
1739
1740 return 0;
1741 }
1742
1743 static int dispatch_notify_event(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1744 Server *s = userdata;
1745 int r;
1746
1747 assert(s);
1748 assert(s->notify_event_source == es);
1749 assert(s->notify_fd == fd);
1750
1751 /* The $NOTIFY_SOCKET is writable again, now send exactly one
1752 * message on it. Either it's the watchdog event, the initial
1753 * READY=1 event or an stdout stream event. If there's nothing
1754 * to write anymore, turn our event source off. The next time
1755 * there's something to send it will be turned on again. */
1756
1757 if (!s->sent_notify_ready) {
1758 static const char p[] =
1759 "READY=1\n"
1760 "STATUS=Processing requests...";
1761 ssize_t l;
1762
1763 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1764 if (l < 0) {
1765 if (errno == EAGAIN)
1766 return 0;
1767
1768 return log_error_errno(errno, "Failed to send READY=1 notification message: %m");
1769 }
1770
1771 s->sent_notify_ready = true;
1772 log_debug("Sent READY=1 notification.");
1773
1774 } else if (s->send_watchdog) {
1775
1776 static const char p[] =
1777 "WATCHDOG=1";
1778
1779 ssize_t l;
1780
1781 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1782 if (l < 0) {
1783 if (errno == EAGAIN)
1784 return 0;
1785
1786 return log_error_errno(errno, "Failed to send WATCHDOG=1 notification message: %m");
1787 }
1788
1789 s->send_watchdog = false;
1790 log_debug("Sent WATCHDOG=1 notification.");
1791
1792 } else if (s->stdout_streams_notify_queue)
1793 /* Dispatch one stream notification event */
1794 stdout_stream_send_notify(s->stdout_streams_notify_queue);
1795
1796 /* Leave us enabled if there's still more to do. */
1797 if (s->send_watchdog || s->stdout_streams_notify_queue)
1798 return 0;
1799
1800 /* There was nothing to do anymore, let's turn ourselves off. */
1801 r = sd_event_source_set_enabled(es, SD_EVENT_OFF);
1802 if (r < 0)
1803 return log_error_errno(r, "Failed to turn off notify event source: %m");
1804
1805 return 0;
1806 }
1807
1808 static int dispatch_watchdog(sd_event_source *es, uint64_t usec, void *userdata) {
1809 Server *s = userdata;
1810 int r;
1811
1812 assert(s);
1813
1814 s->send_watchdog = true;
1815
1816 r = sd_event_source_set_enabled(s->notify_event_source, SD_EVENT_ON);
1817 if (r < 0)
1818 log_warning_errno(r, "Failed to turn on notify event source: %m");
1819
1820 r = sd_event_source_set_time(s->watchdog_event_source, usec + s->watchdog_usec / 2);
1821 if (r < 0)
1822 return log_error_errno(r, "Failed to restart watchdog event source: %m");
1823
1824 r = sd_event_source_set_enabled(s->watchdog_event_source, SD_EVENT_ON);
1825 if (r < 0)
1826 return log_error_errno(r, "Failed to enable watchdog event source: %m");
1827
1828 return 0;
1829 }
1830
1831 static int server_connect_notify(Server *s) {
1832 union sockaddr_union sa = {
1833 .un.sun_family = AF_UNIX,
1834 };
1835 const char *e;
1836 int r;
1837
1838 assert(s);
1839 assert(s->notify_fd < 0);
1840 assert(!s->notify_event_source);
1841
1842 /*
1843 So here's the problem: we'd like to send notification
1844 messages to PID 1, but we cannot do that via sd_notify(),
1845 since that's synchronous, and we might end up blocking on
1846 it. Specifically: given that PID 1 might block on
1847 dbus-daemon during IPC, and dbus-daemon is logging to us,
1848 and might hence block on us, we might end up in a deadlock
1849 if we block on sending PID 1 notification messages — by
1850 generating a full blocking circle. To avoid this, let's
1851 create a non-blocking socket, and connect it to the
1852 notification socket, and then wait for POLLOUT before we
1853 send anything. This should efficiently avoid any deadlocks,
1854 as we'll never block on PID 1, hence PID 1 can safely block
1855 on dbus-daemon which can safely block on us again.
1856
1857 Don't think that this issue is real? It is, see:
1858 https://github.com/systemd/systemd/issues/1505
1859 */
1860
1861 e = getenv("NOTIFY_SOCKET");
1862 if (!e)
1863 return 0;
1864
1865 if ((e[0] != '@' && e[0] != '/') || e[1] == 0) {
1866 log_error("NOTIFY_SOCKET set to an invalid value: %s", e);
1867 return -EINVAL;
1868 }
1869
1870 if (strlen(e) > sizeof(sa.un.sun_path)) {
1871 log_error("NOTIFY_SOCKET path too long: %s", e);
1872 return -EINVAL;
1873 }
1874
1875 s->notify_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
1876 if (s->notify_fd < 0)
1877 return log_error_errno(errno, "Failed to create notify socket: %m");
1878
1879 (void) fd_inc_sndbuf(s->notify_fd, NOTIFY_SNDBUF_SIZE);
1880
1881 strncpy(sa.un.sun_path, e, sizeof(sa.un.sun_path));
1882 if (sa.un.sun_path[0] == '@')
1883 sa.un.sun_path[0] = 0;
1884
1885 r = connect(s->notify_fd, &sa.sa, SOCKADDR_UN_LEN(sa.un));
1886 if (r < 0)
1887 return log_error_errno(errno, "Failed to connect to notify socket: %m");
1888
1889 r = sd_event_add_io(s->event, &s->notify_event_source, s->notify_fd, EPOLLOUT, dispatch_notify_event, s);
1890 if (r < 0)
1891 return log_error_errno(r, "Failed to watch notification socket: %m");
1892
1893 if (sd_watchdog_enabled(false, &s->watchdog_usec) > 0) {
1894 s->send_watchdog = true;
1895
1896 r = sd_event_add_time(s->event, &s->watchdog_event_source, CLOCK_MONOTONIC, now(CLOCK_MONOTONIC) + s->watchdog_usec/2, s->watchdog_usec/4, dispatch_watchdog, s);
1897 if (r < 0)
1898 return log_error_errno(r, "Failed to add watchdog time event: %m");
1899 }
1900
1901 /* This should fire pretty soon, which we'll use to send the
1902 * READY=1 event. */
1903
1904 return 0;
1905 }
1906
1907 int server_init(Server *s) {
1908 _cleanup_fdset_free_ FDSet *fds = NULL;
1909 int n, r, fd;
1910 bool no_sockets;
1911
1912 assert(s);
1913
1914 zero(*s);
1915 s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->audit_fd = s->hostname_fd = s->notify_fd = -1;
1916 s->compress = true;
1917 s->seal = true;
1918
1919 s->watchdog_usec = USEC_INFINITY;
1920
1921 s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1922 s->sync_scheduled = false;
1923
1924 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1925 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1926
1927 s->forward_to_wall = true;
1928
1929 s->max_file_usec = DEFAULT_MAX_FILE_USEC;
1930
1931 s->max_level_store = LOG_DEBUG;
1932 s->max_level_syslog = LOG_DEBUG;
1933 s->max_level_kmsg = LOG_NOTICE;
1934 s->max_level_console = LOG_INFO;
1935 s->max_level_wall = LOG_EMERG;
1936
1937 journal_reset_metrics(&s->system_storage.metrics);
1938 journal_reset_metrics(&s->runtime_storage.metrics);
1939
1940 server_parse_config_file(s);
1941
1942 r = proc_cmdline_parse(parse_proc_cmdline_item, s, PROC_CMDLINE_STRIP_RD_PREFIX);
1943 if (r < 0)
1944 log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
1945
1946 if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1947 log_debug("Setting both rate limit interval and burst from "USEC_FMT",%u to 0,0",
1948 s->rate_limit_interval, s->rate_limit_burst);
1949 s->rate_limit_interval = s->rate_limit_burst = 0;
1950 }
1951
1952 (void) mkdir_p("/run/systemd/journal", 0755);
1953
1954 s->user_journals = ordered_hashmap_new(NULL);
1955 if (!s->user_journals)
1956 return log_oom();
1957
1958 s->mmap = mmap_cache_new();
1959 if (!s->mmap)
1960 return log_oom();
1961
1962 s->deferred_closes = set_new(NULL);
1963 if (!s->deferred_closes)
1964 return log_oom();
1965
1966 r = sd_event_default(&s->event);
1967 if (r < 0)
1968 return log_error_errno(r, "Failed to create event loop: %m");
1969
1970 n = sd_listen_fds(true);
1971 if (n < 0)
1972 return log_error_errno(n, "Failed to read listening file descriptors from environment: %m");
1973
1974 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1975
1976 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1977
1978 if (s->native_fd >= 0) {
1979 log_error("Too many native sockets passed.");
1980 return -EINVAL;
1981 }
1982
1983 s->native_fd = fd;
1984
1985 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1986
1987 if (s->stdout_fd >= 0) {
1988 log_error("Too many stdout sockets passed.");
1989 return -EINVAL;
1990 }
1991
1992 s->stdout_fd = fd;
1993
1994 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0 ||
1995 sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/dev-log", 0) > 0) {
1996
1997 if (s->syslog_fd >= 0) {
1998 log_error("Too many /dev/log sockets passed.");
1999 return -EINVAL;
2000 }
2001
2002 s->syslog_fd = fd;
2003
2004 } else if (sd_is_socket(fd, AF_NETLINK, SOCK_RAW, -1) > 0) {
2005
2006 if (s->audit_fd >= 0) {
2007 log_error("Too many audit sockets passed.");
2008 return -EINVAL;
2009 }
2010
2011 s->audit_fd = fd;
2012
2013 } else {
2014
2015 if (!fds) {
2016 fds = fdset_new();
2017 if (!fds)
2018 return log_oom();
2019 }
2020
2021 r = fdset_put(fds, fd);
2022 if (r < 0)
2023 return log_oom();
2024 }
2025 }
2026
2027 /* Try to restore streams, but don't bother if this fails */
2028 (void) server_restore_streams(s, fds);
2029
2030 if (fdset_size(fds) > 0) {
2031 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds));
2032 fds = fdset_free(fds);
2033 }
2034
2035 no_sockets = s->native_fd < 0 && s->stdout_fd < 0 && s->syslog_fd < 0 && s->audit_fd < 0;
2036
2037 /* always open stdout, syslog, native, and kmsg sockets */
2038
2039 /* systemd-journald.socket: /run/systemd/journal/stdout */
2040 r = server_open_stdout_socket(s);
2041 if (r < 0)
2042 return r;
2043
2044 /* systemd-journald-dev-log.socket: /run/systemd/journal/dev-log */
2045 r = server_open_syslog_socket(s);
2046 if (r < 0)
2047 return r;
2048
2049 /* systemd-journald.socket: /run/systemd/journal/socket */
2050 r = server_open_native_socket(s);
2051 if (r < 0)
2052 return r;
2053
2054 /* /dev/ksmg */
2055 r = server_open_dev_kmsg(s);
2056 if (r < 0)
2057 return r;
2058
2059 /* Unless we got *some* sockets and not audit, open audit socket */
2060 if (s->audit_fd >= 0 || no_sockets) {
2061 r = server_open_audit(s);
2062 if (r < 0)
2063 return r;
2064 }
2065
2066 r = server_open_kernel_seqnum(s);
2067 if (r < 0)
2068 return r;
2069
2070 r = server_open_hostname(s);
2071 if (r < 0)
2072 return r;
2073
2074 r = setup_signals(s);
2075 if (r < 0)
2076 return r;
2077
2078 s->udev = udev_new();
2079 if (!s->udev)
2080 return -ENOMEM;
2081
2082 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
2083 if (!s->rate_limit)
2084 return -ENOMEM;
2085
2086 r = cg_get_root_path(&s->cgroup_root);
2087 if (r < 0)
2088 return r;
2089
2090 server_cache_hostname(s);
2091 server_cache_boot_id(s);
2092 server_cache_machine_id(s);
2093
2094 s->runtime_storage.name = "Runtime journal";
2095 s->system_storage.name = "System journal";
2096
2097 s->runtime_storage.path = strjoin("/run/log/journal/", SERVER_MACHINE_ID(s));
2098 s->system_storage.path = strjoin("/var/log/journal/", SERVER_MACHINE_ID(s));
2099 if (!s->runtime_storage.path || !s->system_storage.path)
2100 return -ENOMEM;
2101
2102 (void) server_connect_notify(s);
2103
2104 return system_journal_open(s, false);
2105 }
2106
2107 void server_maybe_append_tags(Server *s) {
2108 #ifdef HAVE_GCRYPT
2109 JournalFile *f;
2110 Iterator i;
2111 usec_t n;
2112
2113 n = now(CLOCK_REALTIME);
2114
2115 if (s->system_journal)
2116 journal_file_maybe_append_tag(s->system_journal, n);
2117
2118 ORDERED_HASHMAP_FOREACH(f, s->user_journals, i)
2119 journal_file_maybe_append_tag(f, n);
2120 #endif
2121 }
2122
2123 void server_done(Server *s) {
2124 JournalFile *f;
2125 assert(s);
2126
2127 if (s->deferred_closes) {
2128 journal_file_close_set(s->deferred_closes);
2129 set_free(s->deferred_closes);
2130 }
2131
2132 while (s->stdout_streams)
2133 stdout_stream_free(s->stdout_streams);
2134
2135 if (s->system_journal)
2136 (void) journal_file_close(s->system_journal);
2137
2138 if (s->runtime_journal)
2139 (void) journal_file_close(s->runtime_journal);
2140
2141 while ((f = ordered_hashmap_steal_first(s->user_journals)))
2142 (void) journal_file_close(f);
2143
2144 ordered_hashmap_free(s->user_journals);
2145
2146 sd_event_source_unref(s->syslog_event_source);
2147 sd_event_source_unref(s->native_event_source);
2148 sd_event_source_unref(s->stdout_event_source);
2149 sd_event_source_unref(s->dev_kmsg_event_source);
2150 sd_event_source_unref(s->audit_event_source);
2151 sd_event_source_unref(s->sync_event_source);
2152 sd_event_source_unref(s->sigusr1_event_source);
2153 sd_event_source_unref(s->sigusr2_event_source);
2154 sd_event_source_unref(s->sigterm_event_source);
2155 sd_event_source_unref(s->sigint_event_source);
2156 sd_event_source_unref(s->sigrtmin1_event_source);
2157 sd_event_source_unref(s->hostname_event_source);
2158 sd_event_source_unref(s->notify_event_source);
2159 sd_event_source_unref(s->watchdog_event_source);
2160 sd_event_unref(s->event);
2161
2162 safe_close(s->syslog_fd);
2163 safe_close(s->native_fd);
2164 safe_close(s->stdout_fd);
2165 safe_close(s->dev_kmsg_fd);
2166 safe_close(s->audit_fd);
2167 safe_close(s->hostname_fd);
2168 safe_close(s->notify_fd);
2169
2170 if (s->rate_limit)
2171 journal_rate_limit_free(s->rate_limit);
2172
2173 if (s->kernel_seqnum)
2174 munmap(s->kernel_seqnum, sizeof(uint64_t));
2175
2176 free(s->buffer);
2177 free(s->tty_path);
2178 free(s->cgroup_root);
2179 free(s->hostname_field);
2180
2181 if (s->mmap)
2182 mmap_cache_unref(s->mmap);
2183
2184 udev_unref(s->udev);
2185 }
2186
2187 static const char* const storage_table[_STORAGE_MAX] = {
2188 [STORAGE_AUTO] = "auto",
2189 [STORAGE_VOLATILE] = "volatile",
2190 [STORAGE_PERSISTENT] = "persistent",
2191 [STORAGE_NONE] = "none"
2192 };
2193
2194 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
2195 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
2196
2197 static const char* const split_mode_table[_SPLIT_MAX] = {
2198 [SPLIT_LOGIN] = "login",
2199 [SPLIT_UID] = "uid",
2200 [SPLIT_NONE] = "none",
2201 };
2202
2203 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
2204 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");