]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/journal/journald-server.c
e7dcbba04f286474fec10a14aec40627714a5620
[thirdparty/systemd.git] / src / journal / journald-server.c
1 /***
2 This file is part of systemd.
3
4 Copyright 2011 Lennart Poettering
5
6 systemd is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
10
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public License
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
18 ***/
19
20 #ifdef HAVE_SELINUX
21 #include <selinux/selinux.h>
22 #endif
23 #include <sys/ioctl.h>
24 #include <sys/mman.h>
25 #include <sys/signalfd.h>
26 #include <sys/statvfs.h>
27 #include <linux/sockios.h>
28
29 #include "libudev.h"
30 #include "sd-daemon.h"
31 #include "sd-journal.h"
32 #include "sd-messages.h"
33
34 #include "acl-util.h"
35 #include "alloc-util.h"
36 #include "audit-util.h"
37 #include "cgroup-util.h"
38 #include "conf-parser.h"
39 #include "dirent-util.h"
40 #include "extract-word.h"
41 #include "fd-util.h"
42 #include "fileio.h"
43 #include "formats-util.h"
44 #include "fs-util.h"
45 #include "hashmap.h"
46 #include "hostname-util.h"
47 #include "id128-util.h"
48 #include "io-util.h"
49 #include "journal-authenticate.h"
50 #include "journal-file.h"
51 #include "journal-internal.h"
52 #include "journal-vacuum.h"
53 #include "journald-audit.h"
54 #include "journald-kmsg.h"
55 #include "journald-native.h"
56 #include "journald-rate-limit.h"
57 #include "journald-server.h"
58 #include "journald-stream.h"
59 #include "journald-syslog.h"
60 #include "log.h"
61 #include "missing.h"
62 #include "mkdir.h"
63 #include "parse-util.h"
64 #include "proc-cmdline.h"
65 #include "process-util.h"
66 #include "rm-rf.h"
67 #include "selinux-util.h"
68 #include "signal-util.h"
69 #include "socket-util.h"
70 #include "stdio-util.h"
71 #include "string-table.h"
72 #include "string-util.h"
73 #include "user-util.h"
74 #include "syslog-util.h"
75
76 #define USER_JOURNALS_MAX 1024
77
78 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
79 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
80 #define DEFAULT_RATE_LIMIT_BURST 1000
81 #define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
82
83 #define RECHECK_SPACE_USEC (30*USEC_PER_SEC)
84
85 #define NOTIFY_SNDBUF_SIZE (8*1024*1024)
86
87 /* The period to insert between posting changes for coalescing */
88 #define POST_CHANGE_TIMER_INTERVAL_USEC (250*USEC_PER_MSEC)
89
90 static int determine_path_usage(Server *s, const char *path, uint64_t *ret_used, uint64_t *ret_free) {
91 _cleanup_closedir_ DIR *d = NULL;
92 struct dirent *de;
93 struct statvfs ss;
94
95 assert(ret_used);
96 assert(ret_free);
97
98 d = opendir(path);
99 if (!d)
100 return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR,
101 errno, "Failed to open %s: %m", path);
102
103 if (fstatvfs(dirfd(d), &ss) < 0)
104 return log_error_errno(errno, "Failed to fstatvfs(%s): %m", path);
105
106 *ret_free = ss.f_bsize * ss.f_bavail;
107 *ret_used = 0;
108 FOREACH_DIRENT_ALL(de, d, break) {
109 struct stat st;
110
111 if (!endswith(de->d_name, ".journal") &&
112 !endswith(de->d_name, ".journal~"))
113 continue;
114
115 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) {
116 log_debug_errno(errno, "Failed to stat %s/%s, ignoring: %m", path, de->d_name);
117 continue;
118 }
119
120 if (!S_ISREG(st.st_mode))
121 continue;
122
123 *ret_used += (uint64_t) st.st_blocks * 512UL;
124 }
125
126 return 0;
127 }
128
129 static void cache_space_invalidate(JournalStorageSpace *space) {
130 memset(space, 0, sizeof(*space));
131 }
132
133 static int cache_space_refresh(Server *s, JournalStorage *storage) {
134
135 _cleanup_closedir_ DIR *d = NULL;
136 JournalStorageSpace *space;
137 JournalMetrics *metrics;
138 uint64_t vfs_used, vfs_avail, avail;
139 usec_t ts;
140 int r;
141
142 assert(s);
143
144 metrics = &storage->metrics;
145 space = &storage->space;
146
147 ts = now(CLOCK_MONOTONIC);
148
149 if (space->timestamp + RECHECK_SPACE_USEC > ts)
150 return 0;
151
152 r = determine_path_usage(s, storage->path, &vfs_used, &vfs_avail);
153 if (r < 0)
154 return r;
155
156 space->vfs_used = vfs_used;
157 space->vfs_available = vfs_avail;
158
159 avail = LESS_BY(vfs_avail, metrics->keep_free);
160
161 space->limit = MIN(MAX(vfs_used + avail, metrics->min_use), metrics->max_use);
162 space->available = LESS_BY(space->limit, vfs_used);
163 space->timestamp = ts;
164 return 1;
165 }
166
167 static void patch_min_use(JournalStorage *storage) {
168 assert(storage);
169
170 /* Let's bump the min_use limit to the current usage on disk. We do
171 * this when starting up and first opening the journal files. This way
172 * sudden spikes in disk usage will not cause journald to vacuum files
173 * without bounds. Note that this means that only a restart of journald
174 * will make it reset this value. */
175
176 storage->metrics.min_use = MAX(storage->metrics.min_use, storage->space.vfs_used);
177 }
178
179
180 static int determine_space(Server *s, uint64_t *available, uint64_t *limit) {
181 JournalStorage *js;
182 int r;
183
184 assert(s);
185
186 js = s->system_journal ? &s->system_storage : &s->runtime_storage;
187
188 r = cache_space_refresh(s, js);
189 if (r >= 0) {
190 if (available)
191 *available = js->space.available;
192 if (limit)
193 *limit = js->space.limit;
194 }
195 return r;
196 }
197
198 void server_space_usage_message(Server *s, JournalStorage *storage) {
199 char fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
200 fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX], fb6[FORMAT_BYTES_MAX];
201 JournalMetrics *metrics;
202
203 assert(s);
204
205 if (!storage)
206 storage = s->system_journal ? &s->system_storage : &s->runtime_storage;
207
208 if (cache_space_refresh(s, storage) < 0)
209 return;
210
211 metrics = &storage->metrics;
212 format_bytes(fb1, sizeof(fb1), storage->space.vfs_used);
213 format_bytes(fb2, sizeof(fb2), metrics->max_use);
214 format_bytes(fb3, sizeof(fb3), metrics->keep_free);
215 format_bytes(fb4, sizeof(fb4), storage->space.vfs_available);
216 format_bytes(fb5, sizeof(fb5), storage->space.limit);
217 format_bytes(fb6, sizeof(fb6), storage->space.available);
218
219 server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
220 LOG_MESSAGE("%s (%s) is %s, max %s, %s free.",
221 storage->name, storage->path, fb1, fb5, fb6),
222 "JOURNAL_NAME=%s", storage->name,
223 "JOURNAL_PATH=%s", storage->path,
224 "CURRENT_USE=%"PRIu64, storage->space.vfs_used,
225 "CURRENT_USE_PRETTY=%s", fb1,
226 "MAX_USE=%"PRIu64, metrics->max_use,
227 "MAX_USE_PRETTY=%s", fb2,
228 "DISK_KEEP_FREE=%"PRIu64, metrics->keep_free,
229 "DISK_KEEP_FREE_PRETTY=%s", fb3,
230 "DISK_AVAILABLE=%"PRIu64, storage->space.vfs_available,
231 "DISK_AVAILABLE_PRETTY=%s", fb4,
232 "LIMIT=%"PRIu64, storage->space.limit,
233 "LIMIT_PRETTY=%s", fb5,
234 "AVAILABLE=%"PRIu64, storage->space.available,
235 "AVAILABLE_PRETTY=%s", fb6,
236 NULL);
237 }
238
239 static void server_add_acls(JournalFile *f, uid_t uid) {
240 #ifdef HAVE_ACL
241 int r;
242 #endif
243 assert(f);
244
245 #ifdef HAVE_ACL
246 if (uid <= SYSTEM_UID_MAX)
247 return;
248
249 r = add_acls_for_user(f->fd, uid);
250 if (r < 0)
251 log_warning_errno(r, "Failed to set ACL on %s, ignoring: %m", f->path);
252 #endif
253 }
254
255 static int open_journal(
256 Server *s,
257 bool reliably,
258 const char *fname,
259 int flags,
260 bool seal,
261 JournalMetrics *metrics,
262 JournalFile **ret) {
263 int r;
264 JournalFile *f;
265
266 assert(s);
267 assert(fname);
268 assert(ret);
269
270 if (reliably)
271 r = journal_file_open_reliably(fname, flags, 0640, s->compress, seal, metrics, s->mmap, s->deferred_closes, NULL, &f);
272 else
273 r = journal_file_open(-1, fname, flags, 0640, s->compress, seal, metrics, s->mmap, s->deferred_closes, NULL, &f);
274 if (r < 0)
275 return r;
276
277 r = journal_file_enable_post_change_timer(f, s->event, POST_CHANGE_TIMER_INTERVAL_USEC);
278 if (r < 0) {
279 (void) journal_file_close(f);
280 return r;
281 }
282
283 *ret = f;
284 return r;
285 }
286
287 static bool flushed_flag_is_set(void) {
288 return (access("/run/systemd/journal/flushed", F_OK) >= 0);
289 }
290
291 static int system_journal_open(Server *s, bool flush_requested) {
292 bool flushed = false;
293 const char *fn;
294 int r = 0;
295
296 if (!s->system_journal &&
297 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
298 (flush_requested || (flushed = flushed_flag_is_set()))) {
299
300 /* If in auto mode: first try to create the machine
301 * path, but not the prefix.
302 *
303 * If in persistent mode: create /var/log/journal and
304 * the machine path */
305
306 if (s->storage == STORAGE_PERSISTENT)
307 (void) mkdir_p("/var/log/journal/", 0755);
308
309 (void) mkdir(s->system_storage.path, 0755);
310
311 fn = strjoina(s->system_storage.path, "/system.journal");
312 r = open_journal(s, true, fn, O_RDWR|O_CREAT, s->seal, &s->system_storage.metrics, &s->system_journal);
313 if (r >= 0) {
314 server_add_acls(s->system_journal, 0);
315 (void) cache_space_refresh(s, &s->system_storage);
316 patch_min_use(&s->system_storage);
317 } else if (r < 0) {
318 if (r != -ENOENT && r != -EROFS)
319 log_warning_errno(r, "Failed to open system journal: %m");
320
321 r = 0;
322 }
323
324 /* If the runtime journal is open, and we're post-flush, we're
325 * recovering from a failed system journal rotate (ENOSPC)
326 * for which the runtime journal was reopened.
327 *
328 * Perform an implicit flush to var, leaving the runtime
329 * journal closed, now that the system journal is back.
330 */
331 if (s->runtime_journal && flushed)
332 (void) server_flush_to_var(s);
333 }
334
335 if (!s->runtime_journal &&
336 (s->storage != STORAGE_NONE)) {
337
338 fn = strjoina(s->runtime_storage.path, "/system.journal");
339
340 if (s->system_journal) {
341
342 /* Try to open the runtime journal, but only
343 * if it already exists, so that we can flush
344 * it into the system journal */
345
346 r = open_journal(s, false, fn, O_RDWR, false, &s->runtime_storage.metrics, &s->runtime_journal);
347 if (r < 0) {
348 if (r != -ENOENT)
349 log_warning_errno(r, "Failed to open runtime journal: %m");
350
351 r = 0;
352 }
353
354 } else {
355
356 /* OK, we really need the runtime journal, so create
357 * it if necessary. */
358
359 (void) mkdir("/run/log", 0755);
360 (void) mkdir("/run/log/journal", 0755);
361 (void) mkdir_parents(fn, 0750);
362
363 r = open_journal(s, true, fn, O_RDWR|O_CREAT, false, &s->runtime_storage.metrics, &s->runtime_journal);
364 if (r < 0)
365 return log_error_errno(r, "Failed to open runtime journal: %m");
366 }
367
368 if (s->runtime_journal) {
369 server_add_acls(s->runtime_journal, 0);
370 (void) cache_space_refresh(s, &s->runtime_storage);
371 patch_min_use(&s->runtime_storage);
372 }
373 }
374
375 return r;
376 }
377
378 static JournalFile* find_journal(Server *s, uid_t uid) {
379 _cleanup_free_ char *p = NULL;
380 int r;
381 JournalFile *f;
382 sd_id128_t machine;
383
384 assert(s);
385
386 /* A rotate that fails to create the new journal (ENOSPC) leaves the
387 * rotated journal as NULL. Unless we revisit opening, even after
388 * space is made available we'll continue to return NULL indefinitely.
389 *
390 * system_journal_open() is a noop if the journals are already open, so
391 * we can just call it here to recover from failed rotates (or anything
392 * else that's left the journals as NULL).
393 *
394 * Fixes https://github.com/systemd/systemd/issues/3968 */
395 (void) system_journal_open(s, false);
396
397 /* We split up user logs only on /var, not on /run. If the
398 * runtime file is open, we write to it exclusively, in order
399 * to guarantee proper order as soon as we flush /run to
400 * /var and close the runtime file. */
401
402 if (s->runtime_journal)
403 return s->runtime_journal;
404
405 if (uid <= SYSTEM_UID_MAX || uid_is_dynamic(uid))
406 return s->system_journal;
407
408 r = sd_id128_get_machine(&machine);
409 if (r < 0)
410 return s->system_journal;
411
412 f = ordered_hashmap_get(s->user_journals, UID_TO_PTR(uid));
413 if (f)
414 return f;
415
416 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-"UID_FMT".journal",
417 SD_ID128_FORMAT_VAL(machine), uid) < 0)
418 return s->system_journal;
419
420 while (ordered_hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
421 /* Too many open? Then let's close one */
422 f = ordered_hashmap_steal_first(s->user_journals);
423 assert(f);
424 (void) journal_file_close(f);
425 }
426
427 r = open_journal(s, true, p, O_RDWR|O_CREAT, s->seal, &s->system_storage.metrics, &f);
428 if (r < 0)
429 return s->system_journal;
430
431 server_add_acls(f, uid);
432
433 r = ordered_hashmap_put(s->user_journals, UID_TO_PTR(uid), f);
434 if (r < 0) {
435 (void) journal_file_close(f);
436 return s->system_journal;
437 }
438
439 return f;
440 }
441
442 static int do_rotate(
443 Server *s,
444 JournalFile **f,
445 const char* name,
446 bool seal,
447 uint32_t uid) {
448
449 int r;
450 assert(s);
451
452 if (!*f)
453 return -EINVAL;
454
455 r = journal_file_rotate(f, s->compress, seal, s->deferred_closes);
456 if (r < 0)
457 if (*f)
458 log_error_errno(r, "Failed to rotate %s: %m", (*f)->path);
459 else
460 log_error_errno(r, "Failed to create new %s journal: %m", name);
461 else
462 server_add_acls(*f, uid);
463
464 return r;
465 }
466
467 void server_rotate(Server *s) {
468 JournalFile *f;
469 void *k;
470 Iterator i;
471 int r;
472
473 log_debug("Rotating...");
474
475 (void) do_rotate(s, &s->runtime_journal, "runtime", false, 0);
476 (void) do_rotate(s, &s->system_journal, "system", s->seal, 0);
477
478 ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
479 r = do_rotate(s, &f, "user", s->seal, PTR_TO_UID(k));
480 if (r >= 0)
481 ordered_hashmap_replace(s->user_journals, k, f);
482 else if (!f)
483 /* Old file has been closed and deallocated */
484 ordered_hashmap_remove(s->user_journals, k);
485 }
486
487 /* Perform any deferred closes which aren't still offlining. */
488 SET_FOREACH(f, s->deferred_closes, i)
489 if (!journal_file_is_offlining(f)) {
490 (void) set_remove(s->deferred_closes, f);
491 (void) journal_file_close(f);
492 }
493 }
494
495 void server_sync(Server *s) {
496 JournalFile *f;
497 Iterator i;
498 int r;
499
500 if (s->system_journal) {
501 r = journal_file_set_offline(s->system_journal, false);
502 if (r < 0)
503 log_warning_errno(r, "Failed to sync system journal, ignoring: %m");
504 }
505
506 ORDERED_HASHMAP_FOREACH(f, s->user_journals, i) {
507 r = journal_file_set_offline(f, false);
508 if (r < 0)
509 log_warning_errno(r, "Failed to sync user journal, ignoring: %m");
510 }
511
512 if (s->sync_event_source) {
513 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
514 if (r < 0)
515 log_error_errno(r, "Failed to disable sync timer source: %m");
516 }
517
518 s->sync_scheduled = false;
519 }
520
521 static void do_vacuum(Server *s, JournalStorage *storage, bool verbose) {
522
523 int r;
524
525 assert(s);
526 assert(storage);
527
528 (void) cache_space_refresh(s, storage);
529
530 if (verbose)
531 server_space_usage_message(s, storage);
532
533 r = journal_directory_vacuum(storage->path, storage->space.limit,
534 storage->metrics.n_max_files, s->max_retention_usec,
535 &s->oldest_file_usec, verbose);
536 if (r < 0 && r != -ENOENT)
537 log_warning_errno(r, "Failed to vacuum %s, ignoring: %m", storage->path);
538
539 cache_space_invalidate(&storage->space);
540 }
541
542 int server_vacuum(Server *s, bool verbose) {
543 assert(s);
544
545 log_debug("Vacuuming...");
546
547 s->oldest_file_usec = 0;
548
549 if (s->system_journal)
550 do_vacuum(s, &s->system_storage, verbose);
551 if (s->runtime_journal)
552 do_vacuum(s, &s->runtime_storage, verbose);
553
554 return 0;
555 }
556
557 static void server_cache_machine_id(Server *s) {
558 sd_id128_t id;
559 int r;
560
561 assert(s);
562
563 r = sd_id128_get_machine(&id);
564 if (r < 0)
565 return;
566
567 sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
568 }
569
570 static void server_cache_boot_id(Server *s) {
571 sd_id128_t id;
572 int r;
573
574 assert(s);
575
576 r = sd_id128_get_boot(&id);
577 if (r < 0)
578 return;
579
580 sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
581 }
582
583 static void server_cache_hostname(Server *s) {
584 _cleanup_free_ char *t = NULL;
585 char *x;
586
587 assert(s);
588
589 t = gethostname_malloc();
590 if (!t)
591 return;
592
593 x = strappend("_HOSTNAME=", t);
594 if (!x)
595 return;
596
597 free(s->hostname_field);
598 s->hostname_field = x;
599 }
600
601 static bool shall_try_append_again(JournalFile *f, int r) {
602 switch(r) {
603
604 case -E2BIG: /* Hit configured limit */
605 case -EFBIG: /* Hit fs limit */
606 case -EDQUOT: /* Quota limit hit */
607 case -ENOSPC: /* Disk full */
608 log_debug("%s: Allocation limit reached, rotating.", f->path);
609 return true;
610
611 case -EIO: /* I/O error of some kind (mmap) */
612 log_warning("%s: IO error, rotating.", f->path);
613 return true;
614
615 case -EHOSTDOWN: /* Other machine */
616 log_info("%s: Journal file from other machine, rotating.", f->path);
617 return true;
618
619 case -EBUSY: /* Unclean shutdown */
620 log_info("%s: Unclean shutdown, rotating.", f->path);
621 return true;
622
623 case -EPROTONOSUPPORT: /* Unsupported feature */
624 log_info("%s: Unsupported feature, rotating.", f->path);
625 return true;
626
627 case -EBADMSG: /* Corrupted */
628 case -ENODATA: /* Truncated */
629 case -ESHUTDOWN: /* Already archived */
630 log_warning("%s: Journal file corrupted, rotating.", f->path);
631 return true;
632
633 case -EIDRM: /* Journal file has been deleted */
634 log_warning("%s: Journal file has been deleted, rotating.", f->path);
635 return true;
636
637 case -ETXTBSY: /* Journal file is from the future */
638 log_warning("%s: Journal file is from the future, rotating.", f->path);
639 return true;
640
641 default:
642 return false;
643 }
644 }
645
646 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
647 bool vacuumed = false, rotate = false;
648 struct dual_timestamp ts;
649 JournalFile *f;
650 int r;
651
652 assert(s);
653 assert(iovec);
654 assert(n > 0);
655
656 /* Get the closest, linearized time we have for this log event from the event loop. (Note that we do not use
657 * the source time, and not even the time the event was originally seen, but instead simply the time we started
658 * processing it, as we want strictly linear ordering in what we write out.) */
659 assert_se(sd_event_now(s->event, CLOCK_REALTIME, &ts.realtime) >= 0);
660 assert_se(sd_event_now(s->event, CLOCK_MONOTONIC, &ts.monotonic) >= 0);
661
662 if (ts.realtime < s->last_realtime_clock) {
663 /* When the time jumps backwards, let's immediately rotate. Of course, this should not happen during
664 * regular operation. However, when it does happen, then we should make sure that we start fresh files
665 * to ensure that the entries in the journal files are strictly ordered by time, in order to ensure
666 * bisection works correctly. */
667
668 log_debug("Time jumped backwards, rotating.");
669 rotate = true;
670 } else {
671
672 f = find_journal(s, uid);
673 if (!f)
674 return;
675
676 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
677 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
678 rotate = true;
679 }
680 }
681
682 if (rotate) {
683 server_rotate(s);
684 server_vacuum(s, false);
685 vacuumed = true;
686
687 f = find_journal(s, uid);
688 if (!f)
689 return;
690 }
691
692 s->last_realtime_clock = ts.realtime;
693
694 r = journal_file_append_entry(f, &ts, iovec, n, &s->seqnum, NULL, NULL);
695 if (r >= 0) {
696 server_schedule_sync(s, priority);
697 return;
698 }
699
700 if (vacuumed || !shall_try_append_again(f, r)) {
701 log_error_errno(r, "Failed to write entry (%d items, %zu bytes), ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
702 return;
703 }
704
705 server_rotate(s);
706 server_vacuum(s, false);
707
708 f = find_journal(s, uid);
709 if (!f)
710 return;
711
712 log_debug("Retrying write.");
713 r = journal_file_append_entry(f, &ts, iovec, n, &s->seqnum, NULL, NULL);
714 if (r < 0)
715 log_error_errno(r, "Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
716 else
717 server_schedule_sync(s, priority);
718 }
719
720 static int get_invocation_id(const char *cgroup_root, const char *slice, const char *unit, char **ret) {
721 _cleanup_free_ char *escaped = NULL, *slice_path = NULL, *p = NULL;
722 char *copy, ids[SD_ID128_STRING_MAX];
723 int r;
724
725 /* Read the invocation ID of a unit off a unit. It's stored in the "trusted.invocation_id" extended attribute
726 * on the cgroup path. */
727
728 r = cg_slice_to_path(slice, &slice_path);
729 if (r < 0)
730 return r;
731
732 escaped = cg_escape(unit);
733 if (!escaped)
734 return -ENOMEM;
735
736 p = strjoin(cgroup_root, "/", slice_path, "/", escaped, NULL);
737 if (!p)
738 return -ENOMEM;
739
740 r = cg_get_xattr(SYSTEMD_CGROUP_CONTROLLER, p, "trusted.invocation_id", ids, 32);
741 if (r < 0)
742 return r;
743 if (r != 32)
744 return -EINVAL;
745 ids[32] = 0;
746
747 if (!id128_is_valid(ids))
748 return -EINVAL;
749
750 copy = strdup(ids);
751 if (!copy)
752 return -ENOMEM;
753
754 *ret = copy;
755 return 0;
756 }
757
758 static void dispatch_message_real(
759 Server *s,
760 struct iovec *iovec, unsigned n, unsigned m,
761 const struct ucred *ucred,
762 const struct timeval *tv,
763 const char *label, size_t label_len,
764 const char *unit_id,
765 int priority,
766 pid_t object_pid) {
767
768 char pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
769 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
770 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
771 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
772 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
773 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
774 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
775 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
776 uid_t object_uid;
777 gid_t object_gid;
778 char *x;
779 int r;
780 char *t, *c;
781 uid_t realuid = 0, owner = 0, journal_uid;
782 bool owner_valid = false;
783 #ifdef HAVE_AUDIT
784 char audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
785 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
786 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
787 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
788
789 uint32_t audit;
790 uid_t loginuid;
791 #endif
792
793 assert(s);
794 assert(iovec);
795 assert(n > 0);
796 assert(n + N_IOVEC_META_FIELDS + (object_pid > 0 ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
797
798 if (ucred) {
799 realuid = ucred->uid;
800
801 sprintf(pid, "_PID="PID_FMT, ucred->pid);
802 IOVEC_SET_STRING(iovec[n++], pid);
803
804 sprintf(uid, "_UID="UID_FMT, ucred->uid);
805 IOVEC_SET_STRING(iovec[n++], uid);
806
807 sprintf(gid, "_GID="GID_FMT, ucred->gid);
808 IOVEC_SET_STRING(iovec[n++], gid);
809
810 r = get_process_comm(ucred->pid, &t);
811 if (r >= 0) {
812 x = strjoina("_COMM=", t);
813 free(t);
814 IOVEC_SET_STRING(iovec[n++], x);
815 }
816
817 r = get_process_exe(ucred->pid, &t);
818 if (r >= 0) {
819 x = strjoina("_EXE=", t);
820 free(t);
821 IOVEC_SET_STRING(iovec[n++], x);
822 }
823
824 r = get_process_cmdline(ucred->pid, 0, false, &t);
825 if (r >= 0) {
826 x = strjoina("_CMDLINE=", t);
827 free(t);
828 IOVEC_SET_STRING(iovec[n++], x);
829 }
830
831 r = get_process_capeff(ucred->pid, &t);
832 if (r >= 0) {
833 x = strjoina("_CAP_EFFECTIVE=", t);
834 free(t);
835 IOVEC_SET_STRING(iovec[n++], x);
836 }
837
838 #ifdef HAVE_AUDIT
839 r = audit_session_from_pid(ucred->pid, &audit);
840 if (r >= 0) {
841 sprintf(audit_session, "_AUDIT_SESSION=%"PRIu32, audit);
842 IOVEC_SET_STRING(iovec[n++], audit_session);
843 }
844
845 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
846 if (r >= 0) {
847 sprintf(audit_loginuid, "_AUDIT_LOGINUID="UID_FMT, loginuid);
848 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
849 }
850 #endif
851
852 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &c);
853 if (r >= 0) {
854 _cleanup_free_ char *raw_unit = NULL, *raw_slice = NULL;
855 char *session = NULL;
856
857 x = strjoina("_SYSTEMD_CGROUP=", c);
858 IOVEC_SET_STRING(iovec[n++], x);
859
860 r = cg_path_get_session(c, &t);
861 if (r >= 0) {
862 session = strjoina("_SYSTEMD_SESSION=", t);
863 free(t);
864 IOVEC_SET_STRING(iovec[n++], session);
865 }
866
867 if (cg_path_get_owner_uid(c, &owner) >= 0) {
868 owner_valid = true;
869
870 sprintf(owner_uid, "_SYSTEMD_OWNER_UID="UID_FMT, owner);
871 IOVEC_SET_STRING(iovec[n++], owner_uid);
872 }
873
874 if (cg_path_get_unit(c, &raw_unit) >= 0) {
875 x = strjoina("_SYSTEMD_UNIT=", raw_unit);
876 IOVEC_SET_STRING(iovec[n++], x);
877 } else if (unit_id && !session) {
878 x = strjoina("_SYSTEMD_UNIT=", unit_id);
879 IOVEC_SET_STRING(iovec[n++], x);
880 }
881
882 if (cg_path_get_user_unit(c, &t) >= 0) {
883 x = strjoina("_SYSTEMD_USER_UNIT=", t);
884 free(t);
885 IOVEC_SET_STRING(iovec[n++], x);
886 } else if (unit_id && session) {
887 x = strjoina("_SYSTEMD_USER_UNIT=", unit_id);
888 IOVEC_SET_STRING(iovec[n++], x);
889 }
890
891 if (cg_path_get_slice(c, &raw_slice) >= 0) {
892 x = strjoina("_SYSTEMD_SLICE=", raw_slice);
893 IOVEC_SET_STRING(iovec[n++], x);
894 }
895
896 if (cg_path_get_user_slice(c, &t) >= 0) {
897 x = strjoina("_SYSTEMD_USER_SLICE=", t);
898 free(t);
899 IOVEC_SET_STRING(iovec[n++], x);
900 }
901
902 if (raw_slice && raw_unit) {
903 if (get_invocation_id(s->cgroup_root, raw_slice, raw_unit, &t) >= 0) {
904 x = strjoina("_SYSTEMD_INVOCATION_ID=", t);
905 free(t);
906 IOVEC_SET_STRING(iovec[n++], x);
907 }
908 }
909
910 free(c);
911 } else if (unit_id) {
912 x = strjoina("_SYSTEMD_UNIT=", unit_id);
913 IOVEC_SET_STRING(iovec[n++], x);
914 }
915
916 #ifdef HAVE_SELINUX
917 if (mac_selinux_have()) {
918 if (label) {
919 x = alloca(strlen("_SELINUX_CONTEXT=") + label_len + 1);
920
921 *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
922 IOVEC_SET_STRING(iovec[n++], x);
923 } else {
924 char *con;
925
926 if (getpidcon(ucred->pid, &con) >= 0) {
927 x = strjoina("_SELINUX_CONTEXT=", con);
928
929 freecon(con);
930 IOVEC_SET_STRING(iovec[n++], x);
931 }
932 }
933 }
934 #endif
935 }
936 assert(n <= m);
937
938 if (object_pid) {
939 r = get_process_uid(object_pid, &object_uid);
940 if (r >= 0) {
941 sprintf(o_uid, "OBJECT_UID="UID_FMT, object_uid);
942 IOVEC_SET_STRING(iovec[n++], o_uid);
943 }
944
945 r = get_process_gid(object_pid, &object_gid);
946 if (r >= 0) {
947 sprintf(o_gid, "OBJECT_GID="GID_FMT, object_gid);
948 IOVEC_SET_STRING(iovec[n++], o_gid);
949 }
950
951 r = get_process_comm(object_pid, &t);
952 if (r >= 0) {
953 x = strjoina("OBJECT_COMM=", t);
954 free(t);
955 IOVEC_SET_STRING(iovec[n++], x);
956 }
957
958 r = get_process_exe(object_pid, &t);
959 if (r >= 0) {
960 x = strjoina("OBJECT_EXE=", t);
961 free(t);
962 IOVEC_SET_STRING(iovec[n++], x);
963 }
964
965 r = get_process_cmdline(object_pid, 0, false, &t);
966 if (r >= 0) {
967 x = strjoina("OBJECT_CMDLINE=", t);
968 free(t);
969 IOVEC_SET_STRING(iovec[n++], x);
970 }
971
972 #ifdef HAVE_AUDIT
973 r = audit_session_from_pid(object_pid, &audit);
974 if (r >= 0) {
975 sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%"PRIu32, audit);
976 IOVEC_SET_STRING(iovec[n++], o_audit_session);
977 }
978
979 r = audit_loginuid_from_pid(object_pid, &loginuid);
980 if (r >= 0) {
981 sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID="UID_FMT, loginuid);
982 IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
983 }
984 #endif
985
986 r = cg_pid_get_path_shifted(object_pid, s->cgroup_root, &c);
987 if (r >= 0) {
988 x = strjoina("OBJECT_SYSTEMD_CGROUP=", c);
989 IOVEC_SET_STRING(iovec[n++], x);
990
991 r = cg_path_get_session(c, &t);
992 if (r >= 0) {
993 x = strjoina("OBJECT_SYSTEMD_SESSION=", t);
994 free(t);
995 IOVEC_SET_STRING(iovec[n++], x);
996 }
997
998 if (cg_path_get_owner_uid(c, &owner) >= 0) {
999 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID="UID_FMT, owner);
1000 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
1001 }
1002
1003 if (cg_path_get_unit(c, &t) >= 0) {
1004 x = strjoina("OBJECT_SYSTEMD_UNIT=", t);
1005 free(t);
1006 IOVEC_SET_STRING(iovec[n++], x);
1007 }
1008
1009 if (cg_path_get_user_unit(c, &t) >= 0) {
1010 x = strjoina("OBJECT_SYSTEMD_USER_UNIT=", t);
1011 free(t);
1012 IOVEC_SET_STRING(iovec[n++], x);
1013 }
1014
1015 if (cg_path_get_slice(c, &t) >= 0) {
1016 x = strjoina("OBJECT_SYSTEMD_SLICE=", t);
1017 free(t);
1018 IOVEC_SET_STRING(iovec[n++], x);
1019 }
1020
1021 if (cg_path_get_user_slice(c, &t) >= 0) {
1022 x = strjoina("OBJECT_SYSTEMD_USER_SLICE=", t);
1023 free(t);
1024 IOVEC_SET_STRING(iovec[n++], x);
1025 }
1026
1027 free(c);
1028 }
1029 }
1030 assert(n <= m);
1031
1032 if (tv) {
1033 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=" USEC_FMT, timeval_load(tv));
1034 IOVEC_SET_STRING(iovec[n++], source_time);
1035 }
1036
1037 /* Note that strictly speaking storing the boot id here is
1038 * redundant since the entry includes this in-line
1039 * anyway. However, we need this indexed, too. */
1040 if (!isempty(s->boot_id_field))
1041 IOVEC_SET_STRING(iovec[n++], s->boot_id_field);
1042
1043 if (!isempty(s->machine_id_field))
1044 IOVEC_SET_STRING(iovec[n++], s->machine_id_field);
1045
1046 if (!isempty(s->hostname_field))
1047 IOVEC_SET_STRING(iovec[n++], s->hostname_field);
1048
1049 assert(n <= m);
1050
1051 if (s->split_mode == SPLIT_UID && realuid > 0)
1052 /* Split up strictly by any UID */
1053 journal_uid = realuid;
1054 else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
1055 /* Split up by login UIDs. We do this only if the
1056 * realuid is not root, in order not to accidentally
1057 * leak privileged information to the user that is
1058 * logged by a privileged process that is part of an
1059 * unprivileged session. */
1060 journal_uid = owner;
1061 else
1062 journal_uid = 0;
1063
1064 write_to_journal(s, journal_uid, iovec, n, priority);
1065 }
1066
1067 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
1068 char mid[11 + 32 + 1];
1069 struct iovec iovec[N_IOVEC_META_FIELDS + 5 + N_IOVEC_PAYLOAD_FIELDS];
1070 unsigned n = 0, m;
1071 int r;
1072 va_list ap;
1073 struct ucred ucred = {};
1074
1075 assert(s);
1076 assert(format);
1077
1078 assert_cc(3 == LOG_FAC(LOG_DAEMON));
1079 IOVEC_SET_STRING(iovec[n++], "SYSLOG_FACILITY=3");
1080 IOVEC_SET_STRING(iovec[n++], "SYSLOG_IDENTIFIER=systemd-journald");
1081
1082 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
1083 assert_cc(6 == LOG_INFO);
1084 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
1085
1086 if (!sd_id128_is_null(message_id)) {
1087 snprintf(mid, sizeof(mid), LOG_MESSAGE_ID(message_id));
1088 IOVEC_SET_STRING(iovec[n++], mid);
1089 }
1090
1091 m = n;
1092
1093 va_start(ap, format);
1094 r = log_format_iovec(iovec, ELEMENTSOF(iovec), &n, false, 0, format, ap);
1095 /* Error handling below */
1096 va_end(ap);
1097
1098 ucred.pid = getpid();
1099 ucred.uid = getuid();
1100 ucred.gid = getgid();
1101
1102 if (r >= 0)
1103 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
1104
1105 while (m < n)
1106 free(iovec[m++].iov_base);
1107
1108 if (r < 0) {
1109 /* We failed to format the message. Emit a warning instead. */
1110 char buf[LINE_MAX];
1111
1112 xsprintf(buf, "MESSAGE=Entry printing failed: %s", strerror(-r));
1113
1114 n = 3;
1115 IOVEC_SET_STRING(iovec[n++], "PRIORITY=4");
1116 IOVEC_SET_STRING(iovec[n++], buf);
1117 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
1118 }
1119 }
1120
1121 void server_dispatch_message(
1122 Server *s,
1123 struct iovec *iovec, unsigned n, unsigned m,
1124 const struct ucred *ucred,
1125 const struct timeval *tv,
1126 const char *label, size_t label_len,
1127 const char *unit_id,
1128 int priority,
1129 pid_t object_pid) {
1130
1131 int rl, r;
1132 _cleanup_free_ char *path = NULL;
1133 uint64_t available = 0;
1134 char *c;
1135
1136 assert(s);
1137 assert(iovec || n == 0);
1138
1139 if (n == 0)
1140 return;
1141
1142 if (LOG_PRI(priority) > s->max_level_store)
1143 return;
1144
1145 /* Stop early in case the information will not be stored
1146 * in a journal. */
1147 if (s->storage == STORAGE_NONE)
1148 return;
1149
1150 if (!ucred)
1151 goto finish;
1152
1153 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &path);
1154 if (r < 0)
1155 goto finish;
1156
1157 /* example: /user/lennart/3/foobar
1158 * /system/dbus.service/foobar
1159 *
1160 * So let's cut of everything past the third /, since that is
1161 * where user directories start */
1162
1163 c = strchr(path, '/');
1164 if (c) {
1165 c = strchr(c+1, '/');
1166 if (c) {
1167 c = strchr(c+1, '/');
1168 if (c)
1169 *c = 0;
1170 }
1171 }
1172
1173 (void) determine_space(s, &available, NULL);
1174 rl = journal_rate_limit_test(s->rate_limit, path, priority & LOG_PRIMASK, available);
1175 if (rl == 0)
1176 return;
1177
1178 /* Write a suppression message if we suppressed something */
1179 if (rl > 1)
1180 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
1181 LOG_MESSAGE("Suppressed %u messages from %s", rl - 1, path),
1182 NULL);
1183
1184 finish:
1185 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, priority, object_pid);
1186 }
1187
1188 int server_flush_to_var(Server *s) {
1189 sd_id128_t machine;
1190 sd_journal *j = NULL;
1191 char ts[FORMAT_TIMESPAN_MAX];
1192 usec_t start;
1193 unsigned n = 0;
1194 int r;
1195
1196 assert(s);
1197
1198 if (s->storage != STORAGE_AUTO &&
1199 s->storage != STORAGE_PERSISTENT)
1200 return 0;
1201
1202 if (!s->runtime_journal)
1203 return 0;
1204
1205 (void) system_journal_open(s, true);
1206
1207 if (!s->system_journal)
1208 return 0;
1209
1210 log_debug("Flushing to /var...");
1211
1212 start = now(CLOCK_MONOTONIC);
1213
1214 r = sd_id128_get_machine(&machine);
1215 if (r < 0)
1216 return r;
1217
1218 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
1219 if (r < 0)
1220 return log_error_errno(r, "Failed to read runtime journal: %m");
1221
1222 sd_journal_set_data_threshold(j, 0);
1223
1224 SD_JOURNAL_FOREACH(j) {
1225 Object *o = NULL;
1226 JournalFile *f;
1227
1228 f = j->current_file;
1229 assert(f && f->current_offset > 0);
1230
1231 n++;
1232
1233 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1234 if (r < 0) {
1235 log_error_errno(r, "Can't read entry: %m");
1236 goto finish;
1237 }
1238
1239 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1240 if (r >= 0)
1241 continue;
1242
1243 if (!shall_try_append_again(s->system_journal, r)) {
1244 log_error_errno(r, "Can't write entry: %m");
1245 goto finish;
1246 }
1247
1248 server_rotate(s);
1249 server_vacuum(s, false);
1250
1251 if (!s->system_journal) {
1252 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1253 r = -EIO;
1254 goto finish;
1255 }
1256
1257 log_debug("Retrying write.");
1258 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1259 if (r < 0) {
1260 log_error_errno(r, "Can't write entry: %m");
1261 goto finish;
1262 }
1263 }
1264
1265 r = 0;
1266
1267 finish:
1268 journal_file_post_change(s->system_journal);
1269
1270 s->runtime_journal = journal_file_close(s->runtime_journal);
1271
1272 if (r >= 0)
1273 (void) rm_rf("/run/log/journal", REMOVE_ROOT);
1274
1275 sd_journal_close(j);
1276
1277 server_driver_message(s, SD_ID128_NULL,
1278 LOG_MESSAGE("Time spent on flushing to /var is %s for %u entries.",
1279 format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0),
1280 n),
1281 NULL);
1282
1283 return r;
1284 }
1285
1286 int server_process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1287 Server *s = userdata;
1288 struct ucred *ucred = NULL;
1289 struct timeval *tv = NULL;
1290 struct cmsghdr *cmsg;
1291 char *label = NULL;
1292 size_t label_len = 0, m;
1293 struct iovec iovec;
1294 ssize_t n;
1295 int *fds = NULL, v = 0;
1296 unsigned n_fds = 0;
1297
1298 union {
1299 struct cmsghdr cmsghdr;
1300
1301 /* We use NAME_MAX space for the SELinux label
1302 * here. The kernel currently enforces no
1303 * limit, but according to suggestions from
1304 * the SELinux people this will change and it
1305 * will probably be identical to NAME_MAX. For
1306 * now we use that, but this should be updated
1307 * one day when the final limit is known. */
1308 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1309 CMSG_SPACE(sizeof(struct timeval)) +
1310 CMSG_SPACE(sizeof(int)) + /* fd */
1311 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1312 } control = {};
1313
1314 union sockaddr_union sa = {};
1315
1316 struct msghdr msghdr = {
1317 .msg_iov = &iovec,
1318 .msg_iovlen = 1,
1319 .msg_control = &control,
1320 .msg_controllen = sizeof(control),
1321 .msg_name = &sa,
1322 .msg_namelen = sizeof(sa),
1323 };
1324
1325 assert(s);
1326 assert(fd == s->native_fd || fd == s->syslog_fd || fd == s->audit_fd);
1327
1328 if (revents != EPOLLIN) {
1329 log_error("Got invalid event from epoll for datagram fd: %"PRIx32, revents);
1330 return -EIO;
1331 }
1332
1333 /* Try to get the right size, if we can. (Not all
1334 * sockets support SIOCINQ, hence we just try, but
1335 * don't rely on it. */
1336 (void) ioctl(fd, SIOCINQ, &v);
1337
1338 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1339 m = PAGE_ALIGN(MAX3((size_t) v + 1,
1340 (size_t) LINE_MAX,
1341 ALIGN(sizeof(struct nlmsghdr)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH)) + 1);
1342
1343 if (!GREEDY_REALLOC(s->buffer, s->buffer_size, m))
1344 return log_oom();
1345
1346 iovec.iov_base = s->buffer;
1347 iovec.iov_len = s->buffer_size - 1; /* Leave room for trailing NUL we add later */
1348
1349 n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1350 if (n < 0) {
1351 if (errno == EINTR || errno == EAGAIN)
1352 return 0;
1353
1354 return log_error_errno(errno, "recvmsg() failed: %m");
1355 }
1356
1357 CMSG_FOREACH(cmsg, &msghdr) {
1358
1359 if (cmsg->cmsg_level == SOL_SOCKET &&
1360 cmsg->cmsg_type == SCM_CREDENTIALS &&
1361 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1362 ucred = (struct ucred*) CMSG_DATA(cmsg);
1363 else if (cmsg->cmsg_level == SOL_SOCKET &&
1364 cmsg->cmsg_type == SCM_SECURITY) {
1365 label = (char*) CMSG_DATA(cmsg);
1366 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1367 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1368 cmsg->cmsg_type == SO_TIMESTAMP &&
1369 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1370 tv = (struct timeval*) CMSG_DATA(cmsg);
1371 else if (cmsg->cmsg_level == SOL_SOCKET &&
1372 cmsg->cmsg_type == SCM_RIGHTS) {
1373 fds = (int*) CMSG_DATA(cmsg);
1374 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1375 }
1376 }
1377
1378 /* And a trailing NUL, just in case */
1379 s->buffer[n] = 0;
1380
1381 if (fd == s->syslog_fd) {
1382 if (n > 0 && n_fds == 0)
1383 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1384 else if (n_fds > 0)
1385 log_warning("Got file descriptors via syslog socket. Ignoring.");
1386
1387 } else if (fd == s->native_fd) {
1388 if (n > 0 && n_fds == 0)
1389 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1390 else if (n == 0 && n_fds == 1)
1391 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1392 else if (n_fds > 0)
1393 log_warning("Got too many file descriptors via native socket. Ignoring.");
1394
1395 } else {
1396 assert(fd == s->audit_fd);
1397
1398 if (n > 0 && n_fds == 0)
1399 server_process_audit_message(s, s->buffer, n, ucred, &sa, msghdr.msg_namelen);
1400 else if (n_fds > 0)
1401 log_warning("Got file descriptors via audit socket. Ignoring.");
1402 }
1403
1404 close_many(fds, n_fds);
1405 return 0;
1406 }
1407
1408 static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1409 Server *s = userdata;
1410 int r;
1411
1412 assert(s);
1413
1414 log_info("Received request to flush runtime journal from PID " PID_FMT, si->ssi_pid);
1415
1416 (void) server_flush_to_var(s);
1417 server_sync(s);
1418 server_vacuum(s, false);
1419
1420 r = touch("/run/systemd/journal/flushed");
1421 if (r < 0)
1422 log_warning_errno(r, "Failed to touch /run/systemd/journal/flushed, ignoring: %m");
1423
1424 server_space_usage_message(s, NULL);
1425 return 0;
1426 }
1427
1428 static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1429 Server *s = userdata;
1430 int r;
1431
1432 assert(s);
1433
1434 log_info("Received request to rotate journal from PID " PID_FMT, si->ssi_pid);
1435 server_rotate(s);
1436 server_vacuum(s, true);
1437
1438 if (s->system_journal)
1439 patch_min_use(&s->system_storage);
1440 if (s->runtime_journal)
1441 patch_min_use(&s->runtime_storage);
1442
1443 /* Let clients know when the most recent rotation happened. */
1444 r = write_timestamp_file_atomic("/run/systemd/journal/rotated", now(CLOCK_MONOTONIC));
1445 if (r < 0)
1446 log_warning_errno(r, "Failed to write /run/systemd/journal/rotated, ignoring: %m");
1447
1448 return 0;
1449 }
1450
1451 static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1452 Server *s = userdata;
1453
1454 assert(s);
1455
1456 log_received_signal(LOG_INFO, si);
1457
1458 sd_event_exit(s->event, 0);
1459 return 0;
1460 }
1461
1462 static int dispatch_sigrtmin1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1463 Server *s = userdata;
1464 int r;
1465
1466 assert(s);
1467
1468 log_debug("Received request to sync from PID " PID_FMT, si->ssi_pid);
1469
1470 server_sync(s);
1471
1472 /* Let clients know when the most recent sync happened. */
1473 r = write_timestamp_file_atomic("/run/systemd/journal/synced", now(CLOCK_MONOTONIC));
1474 if (r < 0)
1475 log_warning_errno(r, "Failed to write /run/systemd/journal/synced, ignoring: %m");
1476
1477 return 0;
1478 }
1479
1480 static int setup_signals(Server *s) {
1481 int r;
1482
1483 assert(s);
1484
1485 assert(sigprocmask_many(SIG_SETMASK, NULL, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, SIGRTMIN+1, -1) >= 0);
1486
1487 r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
1488 if (r < 0)
1489 return r;
1490
1491 r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s);
1492 if (r < 0)
1493 return r;
1494
1495 r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s);
1496 if (r < 0)
1497 return r;
1498
1499 /* Let's process SIGTERM late, so that we flush all queued
1500 * messages to disk before we exit */
1501 r = sd_event_source_set_priority(s->sigterm_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1502 if (r < 0)
1503 return r;
1504
1505 /* When journald is invoked on the terminal (when debugging),
1506 * it's useful if C-c is handled equivalent to SIGTERM. */
1507 r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s);
1508 if (r < 0)
1509 return r;
1510
1511 r = sd_event_source_set_priority(s->sigint_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1512 if (r < 0)
1513 return r;
1514
1515 /* SIGRTMIN+1 causes an immediate sync. We process this very
1516 * late, so that everything else queued at this point is
1517 * really written to disk. Clients can watch
1518 * /run/systemd/journal/synced with inotify until its mtime
1519 * changes to see when a sync happened. */
1520 r = sd_event_add_signal(s->event, &s->sigrtmin1_event_source, SIGRTMIN+1, dispatch_sigrtmin1, s);
1521 if (r < 0)
1522 return r;
1523
1524 r = sd_event_source_set_priority(s->sigrtmin1_event_source, SD_EVENT_PRIORITY_NORMAL+15);
1525 if (r < 0)
1526 return r;
1527
1528 return 0;
1529 }
1530
1531 static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
1532 Server *s = data;
1533 int r;
1534
1535 assert(s);
1536
1537 if (streq(key, "systemd.journald.forward_to_syslog")) {
1538 r = value ? parse_boolean(value) : true;
1539 if (r < 0)
1540 log_warning("Failed to parse forward to syslog switch \"%s\". Ignoring.", value);
1541 else
1542 s->forward_to_syslog = r;
1543 } else if (streq(key, "systemd.journald.forward_to_kmsg")) {
1544 r = value ? parse_boolean(value) : true;
1545 if (r < 0)
1546 log_warning("Failed to parse forward to kmsg switch \"%s\". Ignoring.", value);
1547 else
1548 s->forward_to_kmsg = r;
1549 } else if (streq(key, "systemd.journald.forward_to_console")) {
1550 r = value ? parse_boolean(value) : true;
1551 if (r < 0)
1552 log_warning("Failed to parse forward to console switch \"%s\". Ignoring.", value);
1553 else
1554 s->forward_to_console = r;
1555 } else if (streq(key, "systemd.journald.forward_to_wall")) {
1556 r = value ? parse_boolean(value) : true;
1557 if (r < 0)
1558 log_warning("Failed to parse forward to wall switch \"%s\". Ignoring.", value);
1559 else
1560 s->forward_to_wall = r;
1561 } else if (streq(key, "systemd.journald.max_level_console") && value) {
1562 r = log_level_from_string(value);
1563 if (r < 0)
1564 log_warning("Failed to parse max level console value \"%s\". Ignoring.", value);
1565 else
1566 s->max_level_console = r;
1567 } else if (streq(key, "systemd.journald.max_level_store") && value) {
1568 r = log_level_from_string(value);
1569 if (r < 0)
1570 log_warning("Failed to parse max level store value \"%s\". Ignoring.", value);
1571 else
1572 s->max_level_store = r;
1573 } else if (streq(key, "systemd.journald.max_level_syslog") && value) {
1574 r = log_level_from_string(value);
1575 if (r < 0)
1576 log_warning("Failed to parse max level syslog value \"%s\". Ignoring.", value);
1577 else
1578 s->max_level_syslog = r;
1579 } else if (streq(key, "systemd.journald.max_level_kmsg") && value) {
1580 r = log_level_from_string(value);
1581 if (r < 0)
1582 log_warning("Failed to parse max level kmsg value \"%s\". Ignoring.", value);
1583 else
1584 s->max_level_kmsg = r;
1585 } else if (streq(key, "systemd.journald.max_level_wall") && value) {
1586 r = log_level_from_string(value);
1587 if (r < 0)
1588 log_warning("Failed to parse max level wall value \"%s\". Ignoring.", value);
1589 else
1590 s->max_level_wall = r;
1591 } else if (startswith(key, "systemd.journald"))
1592 log_warning("Unknown journald kernel command line option \"%s\". Ignoring.", key);
1593
1594 /* do not warn about state here, since probably systemd already did */
1595 return 0;
1596 }
1597
1598 static int server_parse_config_file(Server *s) {
1599 assert(s);
1600
1601 return config_parse_many_nulstr(PKGSYSCONFDIR "/journald.conf",
1602 CONF_PATHS_NULSTR("systemd/journald.conf.d"),
1603 "Journal\0",
1604 config_item_perf_lookup, journald_gperf_lookup,
1605 false, s);
1606 }
1607
1608 static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
1609 Server *s = userdata;
1610
1611 assert(s);
1612
1613 server_sync(s);
1614 return 0;
1615 }
1616
1617 int server_schedule_sync(Server *s, int priority) {
1618 int r;
1619
1620 assert(s);
1621
1622 if (priority <= LOG_CRIT) {
1623 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1624 server_sync(s);
1625 return 0;
1626 }
1627
1628 if (s->sync_scheduled)
1629 return 0;
1630
1631 if (s->sync_interval_usec > 0) {
1632 usec_t when;
1633
1634 r = sd_event_now(s->event, CLOCK_MONOTONIC, &when);
1635 if (r < 0)
1636 return r;
1637
1638 when += s->sync_interval_usec;
1639
1640 if (!s->sync_event_source) {
1641 r = sd_event_add_time(
1642 s->event,
1643 &s->sync_event_source,
1644 CLOCK_MONOTONIC,
1645 when, 0,
1646 server_dispatch_sync, s);
1647 if (r < 0)
1648 return r;
1649
1650 r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
1651 } else {
1652 r = sd_event_source_set_time(s->sync_event_source, when);
1653 if (r < 0)
1654 return r;
1655
1656 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
1657 }
1658 if (r < 0)
1659 return r;
1660
1661 s->sync_scheduled = true;
1662 }
1663
1664 return 0;
1665 }
1666
1667 static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1668 Server *s = userdata;
1669
1670 assert(s);
1671
1672 server_cache_hostname(s);
1673 return 0;
1674 }
1675
1676 static int server_open_hostname(Server *s) {
1677 int r;
1678
1679 assert(s);
1680
1681 s->hostname_fd = open("/proc/sys/kernel/hostname", O_RDONLY|O_CLOEXEC|O_NDELAY|O_NOCTTY);
1682 if (s->hostname_fd < 0)
1683 return log_error_errno(errno, "Failed to open /proc/sys/kernel/hostname: %m");
1684
1685 r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s);
1686 if (r < 0) {
1687 /* kernels prior to 3.2 don't support polling this file. Ignore
1688 * the failure. */
1689 if (r == -EPERM) {
1690 log_warning_errno(r, "Failed to register hostname fd in event loop, ignoring: %m");
1691 s->hostname_fd = safe_close(s->hostname_fd);
1692 return 0;
1693 }
1694
1695 return log_error_errno(r, "Failed to register hostname fd in event loop: %m");
1696 }
1697
1698 r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
1699 if (r < 0)
1700 return log_error_errno(r, "Failed to adjust priority of host name event source: %m");
1701
1702 return 0;
1703 }
1704
1705 static int dispatch_notify_event(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1706 Server *s = userdata;
1707 int r;
1708
1709 assert(s);
1710 assert(s->notify_event_source == es);
1711 assert(s->notify_fd == fd);
1712
1713 /* The $NOTIFY_SOCKET is writable again, now send exactly one
1714 * message on it. Either it's the watchdog event, the initial
1715 * READY=1 event or an stdout stream event. If there's nothing
1716 * to write anymore, turn our event source off. The next time
1717 * there's something to send it will be turned on again. */
1718
1719 if (!s->sent_notify_ready) {
1720 static const char p[] =
1721 "READY=1\n"
1722 "STATUS=Processing requests...";
1723 ssize_t l;
1724
1725 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1726 if (l < 0) {
1727 if (errno == EAGAIN)
1728 return 0;
1729
1730 return log_error_errno(errno, "Failed to send READY=1 notification message: %m");
1731 }
1732
1733 s->sent_notify_ready = true;
1734 log_debug("Sent READY=1 notification.");
1735
1736 } else if (s->send_watchdog) {
1737
1738 static const char p[] =
1739 "WATCHDOG=1";
1740
1741 ssize_t l;
1742
1743 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1744 if (l < 0) {
1745 if (errno == EAGAIN)
1746 return 0;
1747
1748 return log_error_errno(errno, "Failed to send WATCHDOG=1 notification message: %m");
1749 }
1750
1751 s->send_watchdog = false;
1752 log_debug("Sent WATCHDOG=1 notification.");
1753
1754 } else if (s->stdout_streams_notify_queue)
1755 /* Dispatch one stream notification event */
1756 stdout_stream_send_notify(s->stdout_streams_notify_queue);
1757
1758 /* Leave us enabled if there's still more to do. */
1759 if (s->send_watchdog || s->stdout_streams_notify_queue)
1760 return 0;
1761
1762 /* There was nothing to do anymore, let's turn ourselves off. */
1763 r = sd_event_source_set_enabled(es, SD_EVENT_OFF);
1764 if (r < 0)
1765 return log_error_errno(r, "Failed to turn off notify event source: %m");
1766
1767 return 0;
1768 }
1769
1770 static int dispatch_watchdog(sd_event_source *es, uint64_t usec, void *userdata) {
1771 Server *s = userdata;
1772 int r;
1773
1774 assert(s);
1775
1776 s->send_watchdog = true;
1777
1778 r = sd_event_source_set_enabled(s->notify_event_source, SD_EVENT_ON);
1779 if (r < 0)
1780 log_warning_errno(r, "Failed to turn on notify event source: %m");
1781
1782 r = sd_event_source_set_time(s->watchdog_event_source, usec + s->watchdog_usec / 2);
1783 if (r < 0)
1784 return log_error_errno(r, "Failed to restart watchdog event source: %m");
1785
1786 r = sd_event_source_set_enabled(s->watchdog_event_source, SD_EVENT_ON);
1787 if (r < 0)
1788 return log_error_errno(r, "Failed to enable watchdog event source: %m");
1789
1790 return 0;
1791 }
1792
1793 static int server_connect_notify(Server *s) {
1794 union sockaddr_union sa = {
1795 .un.sun_family = AF_UNIX,
1796 };
1797 const char *e;
1798 int r;
1799
1800 assert(s);
1801 assert(s->notify_fd < 0);
1802 assert(!s->notify_event_source);
1803
1804 /*
1805 So here's the problem: we'd like to send notification
1806 messages to PID 1, but we cannot do that via sd_notify(),
1807 since that's synchronous, and we might end up blocking on
1808 it. Specifically: given that PID 1 might block on
1809 dbus-daemon during IPC, and dbus-daemon is logging to us,
1810 and might hence block on us, we might end up in a deadlock
1811 if we block on sending PID 1 notification messages — by
1812 generating a full blocking circle. To avoid this, let's
1813 create a non-blocking socket, and connect it to the
1814 notification socket, and then wait for POLLOUT before we
1815 send anything. This should efficiently avoid any deadlocks,
1816 as we'll never block on PID 1, hence PID 1 can safely block
1817 on dbus-daemon which can safely block on us again.
1818
1819 Don't think that this issue is real? It is, see:
1820 https://github.com/systemd/systemd/issues/1505
1821 */
1822
1823 e = getenv("NOTIFY_SOCKET");
1824 if (!e)
1825 return 0;
1826
1827 if ((e[0] != '@' && e[0] != '/') || e[1] == 0) {
1828 log_error("NOTIFY_SOCKET set to an invalid value: %s", e);
1829 return -EINVAL;
1830 }
1831
1832 if (strlen(e) > sizeof(sa.un.sun_path)) {
1833 log_error("NOTIFY_SOCKET path too long: %s", e);
1834 return -EINVAL;
1835 }
1836
1837 s->notify_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
1838 if (s->notify_fd < 0)
1839 return log_error_errno(errno, "Failed to create notify socket: %m");
1840
1841 (void) fd_inc_sndbuf(s->notify_fd, NOTIFY_SNDBUF_SIZE);
1842
1843 strncpy(sa.un.sun_path, e, sizeof(sa.un.sun_path));
1844 if (sa.un.sun_path[0] == '@')
1845 sa.un.sun_path[0] = 0;
1846
1847 r = connect(s->notify_fd, &sa.sa, SOCKADDR_UN_LEN(sa.un));
1848 if (r < 0)
1849 return log_error_errno(errno, "Failed to connect to notify socket: %m");
1850
1851 r = sd_event_add_io(s->event, &s->notify_event_source, s->notify_fd, EPOLLOUT, dispatch_notify_event, s);
1852 if (r < 0)
1853 return log_error_errno(r, "Failed to watch notification socket: %m");
1854
1855 if (sd_watchdog_enabled(false, &s->watchdog_usec) > 0) {
1856 s->send_watchdog = true;
1857
1858 r = sd_event_add_time(s->event, &s->watchdog_event_source, CLOCK_MONOTONIC, now(CLOCK_MONOTONIC) + s->watchdog_usec/2, s->watchdog_usec/4, dispatch_watchdog, s);
1859 if (r < 0)
1860 return log_error_errno(r, "Failed to add watchdog time event: %m");
1861 }
1862
1863 /* This should fire pretty soon, which we'll use to send the
1864 * READY=1 event. */
1865
1866 return 0;
1867 }
1868
1869 int server_init(Server *s) {
1870 _cleanup_fdset_free_ FDSet *fds = NULL;
1871 int n, r, fd;
1872 bool no_sockets;
1873
1874 assert(s);
1875
1876 zero(*s);
1877 s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->audit_fd = s->hostname_fd = s->notify_fd = -1;
1878 s->compress = true;
1879 s->seal = true;
1880
1881 s->watchdog_usec = USEC_INFINITY;
1882
1883 s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1884 s->sync_scheduled = false;
1885
1886 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1887 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1888
1889 s->forward_to_wall = true;
1890
1891 s->max_file_usec = DEFAULT_MAX_FILE_USEC;
1892
1893 s->max_level_store = LOG_DEBUG;
1894 s->max_level_syslog = LOG_DEBUG;
1895 s->max_level_kmsg = LOG_NOTICE;
1896 s->max_level_console = LOG_INFO;
1897 s->max_level_wall = LOG_EMERG;
1898
1899 journal_reset_metrics(&s->system_storage.metrics);
1900 journal_reset_metrics(&s->runtime_storage.metrics);
1901
1902 server_parse_config_file(s);
1903 parse_proc_cmdline(parse_proc_cmdline_item, s);
1904
1905 if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1906 log_debug("Setting both rate limit interval and burst from "USEC_FMT",%u to 0,0",
1907 s->rate_limit_interval, s->rate_limit_burst);
1908 s->rate_limit_interval = s->rate_limit_burst = 0;
1909 }
1910
1911 (void) mkdir_p("/run/systemd/journal", 0755);
1912
1913 s->user_journals = ordered_hashmap_new(NULL);
1914 if (!s->user_journals)
1915 return log_oom();
1916
1917 s->mmap = mmap_cache_new();
1918 if (!s->mmap)
1919 return log_oom();
1920
1921 s->deferred_closes = set_new(NULL);
1922 if (!s->deferred_closes)
1923 return log_oom();
1924
1925 r = sd_event_default(&s->event);
1926 if (r < 0)
1927 return log_error_errno(r, "Failed to create event loop: %m");
1928
1929 n = sd_listen_fds(true);
1930 if (n < 0)
1931 return log_error_errno(n, "Failed to read listening file descriptors from environment: %m");
1932
1933 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1934
1935 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1936
1937 if (s->native_fd >= 0) {
1938 log_error("Too many native sockets passed.");
1939 return -EINVAL;
1940 }
1941
1942 s->native_fd = fd;
1943
1944 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1945
1946 if (s->stdout_fd >= 0) {
1947 log_error("Too many stdout sockets passed.");
1948 return -EINVAL;
1949 }
1950
1951 s->stdout_fd = fd;
1952
1953 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0 ||
1954 sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/dev-log", 0) > 0) {
1955
1956 if (s->syslog_fd >= 0) {
1957 log_error("Too many /dev/log sockets passed.");
1958 return -EINVAL;
1959 }
1960
1961 s->syslog_fd = fd;
1962
1963 } else if (sd_is_socket(fd, AF_NETLINK, SOCK_RAW, -1) > 0) {
1964
1965 if (s->audit_fd >= 0) {
1966 log_error("Too many audit sockets passed.");
1967 return -EINVAL;
1968 }
1969
1970 s->audit_fd = fd;
1971
1972 } else {
1973
1974 if (!fds) {
1975 fds = fdset_new();
1976 if (!fds)
1977 return log_oom();
1978 }
1979
1980 r = fdset_put(fds, fd);
1981 if (r < 0)
1982 return log_oom();
1983 }
1984 }
1985
1986 /* Try to restore streams, but don't bother if this fails */
1987 (void) server_restore_streams(s, fds);
1988
1989 if (fdset_size(fds) > 0) {
1990 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds));
1991 fds = fdset_free(fds);
1992 }
1993
1994 no_sockets = s->native_fd < 0 && s->stdout_fd < 0 && s->syslog_fd < 0 && s->audit_fd < 0;
1995
1996 /* always open stdout, syslog, native, and kmsg sockets */
1997
1998 /* systemd-journald.socket: /run/systemd/journal/stdout */
1999 r = server_open_stdout_socket(s);
2000 if (r < 0)
2001 return r;
2002
2003 /* systemd-journald-dev-log.socket: /run/systemd/journal/dev-log */
2004 r = server_open_syslog_socket(s);
2005 if (r < 0)
2006 return r;
2007
2008 /* systemd-journald.socket: /run/systemd/journal/socket */
2009 r = server_open_native_socket(s);
2010 if (r < 0)
2011 return r;
2012
2013 /* /dev/ksmg */
2014 r = server_open_dev_kmsg(s);
2015 if (r < 0)
2016 return r;
2017
2018 /* Unless we got *some* sockets and not audit, open audit socket */
2019 if (s->audit_fd >= 0 || no_sockets) {
2020 r = server_open_audit(s);
2021 if (r < 0)
2022 return r;
2023 }
2024
2025 r = server_open_kernel_seqnum(s);
2026 if (r < 0)
2027 return r;
2028
2029 r = server_open_hostname(s);
2030 if (r < 0)
2031 return r;
2032
2033 r = setup_signals(s);
2034 if (r < 0)
2035 return r;
2036
2037 s->udev = udev_new();
2038 if (!s->udev)
2039 return -ENOMEM;
2040
2041 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
2042 if (!s->rate_limit)
2043 return -ENOMEM;
2044
2045 r = cg_get_root_path(&s->cgroup_root);
2046 if (r < 0)
2047 return r;
2048
2049 server_cache_hostname(s);
2050 server_cache_boot_id(s);
2051 server_cache_machine_id(s);
2052
2053 s->runtime_storage.name = "Runtime journal";
2054 s->system_storage.name = "System journal";
2055
2056 s->runtime_storage.path = strjoin("/run/log/journal/", SERVER_MACHINE_ID(s), NULL);
2057 s->system_storage.path = strjoin("/var/log/journal/", SERVER_MACHINE_ID(s), NULL);
2058 if (!s->runtime_storage.path || !s->system_storage.path)
2059 return -ENOMEM;
2060
2061 (void) server_connect_notify(s);
2062
2063 return system_journal_open(s, false);
2064 }
2065
2066 void server_maybe_append_tags(Server *s) {
2067 #ifdef HAVE_GCRYPT
2068 JournalFile *f;
2069 Iterator i;
2070 usec_t n;
2071
2072 n = now(CLOCK_REALTIME);
2073
2074 if (s->system_journal)
2075 journal_file_maybe_append_tag(s->system_journal, n);
2076
2077 ORDERED_HASHMAP_FOREACH(f, s->user_journals, i)
2078 journal_file_maybe_append_tag(f, n);
2079 #endif
2080 }
2081
2082 void server_done(Server *s) {
2083 JournalFile *f;
2084 assert(s);
2085
2086 if (s->deferred_closes) {
2087 journal_file_close_set(s->deferred_closes);
2088 set_free(s->deferred_closes);
2089 }
2090
2091 while (s->stdout_streams)
2092 stdout_stream_free(s->stdout_streams);
2093
2094 if (s->system_journal)
2095 (void) journal_file_close(s->system_journal);
2096
2097 if (s->runtime_journal)
2098 (void) journal_file_close(s->runtime_journal);
2099
2100 while ((f = ordered_hashmap_steal_first(s->user_journals)))
2101 (void) journal_file_close(f);
2102
2103 ordered_hashmap_free(s->user_journals);
2104
2105 sd_event_source_unref(s->syslog_event_source);
2106 sd_event_source_unref(s->native_event_source);
2107 sd_event_source_unref(s->stdout_event_source);
2108 sd_event_source_unref(s->dev_kmsg_event_source);
2109 sd_event_source_unref(s->audit_event_source);
2110 sd_event_source_unref(s->sync_event_source);
2111 sd_event_source_unref(s->sigusr1_event_source);
2112 sd_event_source_unref(s->sigusr2_event_source);
2113 sd_event_source_unref(s->sigterm_event_source);
2114 sd_event_source_unref(s->sigint_event_source);
2115 sd_event_source_unref(s->sigrtmin1_event_source);
2116 sd_event_source_unref(s->hostname_event_source);
2117 sd_event_source_unref(s->notify_event_source);
2118 sd_event_source_unref(s->watchdog_event_source);
2119 sd_event_unref(s->event);
2120
2121 safe_close(s->syslog_fd);
2122 safe_close(s->native_fd);
2123 safe_close(s->stdout_fd);
2124 safe_close(s->dev_kmsg_fd);
2125 safe_close(s->audit_fd);
2126 safe_close(s->hostname_fd);
2127 safe_close(s->notify_fd);
2128
2129 if (s->rate_limit)
2130 journal_rate_limit_free(s->rate_limit);
2131
2132 if (s->kernel_seqnum)
2133 munmap(s->kernel_seqnum, sizeof(uint64_t));
2134
2135 free(s->buffer);
2136 free(s->tty_path);
2137 free(s->cgroup_root);
2138 free(s->hostname_field);
2139
2140 if (s->mmap)
2141 mmap_cache_unref(s->mmap);
2142
2143 udev_unref(s->udev);
2144 }
2145
2146 static const char* const storage_table[_STORAGE_MAX] = {
2147 [STORAGE_AUTO] = "auto",
2148 [STORAGE_VOLATILE] = "volatile",
2149 [STORAGE_PERSISTENT] = "persistent",
2150 [STORAGE_NONE] = "none"
2151 };
2152
2153 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
2154 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
2155
2156 static const char* const split_mode_table[_SPLIT_MAX] = {
2157 [SPLIT_LOGIN] = "login",
2158 [SPLIT_UID] = "uid",
2159 [SPLIT_NONE] = "none",
2160 };
2161
2162 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
2163 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");