]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/journal/journald-server.c
journald: make reading /dev/kmsg optional (#6362)
[thirdparty/systemd.git] / src / journal / journald-server.c
CommitLineData
d025f1e4
ZJS
1/***
2 This file is part of systemd.
3
4 Copyright 2011 Lennart Poettering
5
6 systemd is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
10
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public License
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
18***/
19
24882e06
LP
20#ifdef HAVE_SELINUX
21#include <selinux/selinux.h>
22#endif
8580d1f7
LP
23#include <sys/ioctl.h>
24#include <sys/mman.h>
25#include <sys/signalfd.h>
26#include <sys/statvfs.h>
07630cea 27#include <linux/sockios.h>
24882e06 28
b4bbcaa9 29#include "libudev.h"
8580d1f7 30#include "sd-daemon.h"
74df0fca
LP
31#include "sd-journal.h"
32#include "sd-messages.h"
8580d1f7
LP
33
34#include "acl-util.h"
b5efdb8a 35#include "alloc-util.h"
430f0182 36#include "audit-util.h"
d025f1e4 37#include "cgroup-util.h"
d025f1e4 38#include "conf-parser.h"
a0956174 39#include "dirent-util.h"
0dec689b 40#include "extract-word.h"
3ffd4af2 41#include "fd-util.h"
33d52ab9 42#include "fileio.h"
f97b34a6 43#include "format-util.h"
f4f15635 44#include "fs-util.h"
8580d1f7 45#include "hashmap.h"
958b66ea 46#include "hostname-util.h"
4b58153d 47#include "id128-util.h"
afc5dbf3 48#include "io-util.h"
8580d1f7
LP
49#include "journal-authenticate.h"
50#include "journal-file.h"
d025f1e4
ZJS
51#include "journal-internal.h"
52#include "journal-vacuum.h"
8580d1f7 53#include "journald-audit.h"
d025f1e4 54#include "journald-kmsg.h"
d025f1e4 55#include "journald-native.h"
8580d1f7 56#include "journald-rate-limit.h"
3ffd4af2 57#include "journald-server.h"
8580d1f7
LP
58#include "journald-stream.h"
59#include "journald-syslog.h"
4b58153d 60#include "log.h"
07630cea
LP
61#include "missing.h"
62#include "mkdir.h"
6bedfcbb 63#include "parse-util.h"
4e731273 64#include "proc-cmdline.h"
07630cea
LP
65#include "process-util.h"
66#include "rm-rf.h"
67#include "selinux-util.h"
68#include "signal-util.h"
69#include "socket-util.h"
32917e33 70#include "stdio-util.h"
8b43440b 71#include "string-table.h"
07630cea 72#include "string-util.h"
4a0b58c4 73#include "user-util.h"
863a5610 74#include "syslog-util.h"
d025f1e4 75
d025f1e4
ZJS
76#define USER_JOURNALS_MAX 1024
77
26687bf8 78#define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
7f1ad696
LP
79#define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
80#define DEFAULT_RATE_LIMIT_BURST 1000
e150e820 81#define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
d025f1e4 82
8580d1f7 83#define RECHECK_SPACE_USEC (30*USEC_PER_SEC)
d025f1e4 84
e22aa3d3
LP
85#define NOTIFY_SNDBUF_SIZE (8*1024*1024)
86
7a24f3bf
VC
87/* The period to insert between posting changes for coalescing */
88#define POST_CHANGE_TIMER_INTERVAL_USEC (250*USEC_PER_MSEC)
89
e0ed6db9
FB
90static int determine_path_usage(Server *s, const char *path, uint64_t *ret_used, uint64_t *ret_free) {
91 _cleanup_closedir_ DIR *d = NULL;
92 struct dirent *de;
93 struct statvfs ss;
e0ed6db9
FB
94
95 assert(ret_used);
96 assert(ret_free);
97
266a4700 98 d = opendir(path);
e0ed6db9
FB
99 if (!d)
100 return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR,
266a4700 101 errno, "Failed to open %s: %m", path);
e0ed6db9
FB
102
103 if (fstatvfs(dirfd(d), &ss) < 0)
266a4700 104 return log_error_errno(errno, "Failed to fstatvfs(%s): %m", path);
e0ed6db9
FB
105
106 *ret_free = ss.f_bsize * ss.f_bavail;
107 *ret_used = 0;
108 FOREACH_DIRENT_ALL(de, d, break) {
109 struct stat st;
110
111 if (!endswith(de->d_name, ".journal") &&
112 !endswith(de->d_name, ".journal~"))
113 continue;
114
115 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) {
266a4700 116 log_debug_errno(errno, "Failed to stat %s/%s, ignoring: %m", path, de->d_name);
e0ed6db9
FB
117 continue;
118 }
119
120 if (!S_ISREG(st.st_mode))
121 continue;
122
123 *ret_used += (uint64_t) st.st_blocks * 512UL;
124 }
125
126 return 0;
127}
128
a0edc477
FB
129static void cache_space_invalidate(JournalStorageSpace *space) {
130 memset(space, 0, sizeof(*space));
131}
132
57f443a6 133static int cache_space_refresh(Server *s, JournalStorage *storage) {
23aba343 134 JournalStorageSpace *space;
266a4700 135 JournalMetrics *metrics;
23aba343 136 uint64_t vfs_used, vfs_avail, avail;
d025f1e4 137 usec_t ts;
e0ed6db9 138 int r;
d025f1e4 139
8580d1f7 140 assert(s);
266a4700 141
266a4700 142 metrics = &storage->metrics;
23aba343 143 space = &storage->space;
d025f1e4 144
8580d1f7 145 ts = now(CLOCK_MONOTONIC);
d025f1e4 146
3099caf2 147 if (space->timestamp != 0 && space->timestamp + RECHECK_SPACE_USEC > ts)
d025f1e4
ZJS
148 return 0;
149
23aba343 150 r = determine_path_usage(s, storage->path, &vfs_used, &vfs_avail);
e0ed6db9
FB
151 if (r < 0)
152 return r;
d025f1e4 153
23aba343
FB
154 space->vfs_used = vfs_used;
155 space->vfs_available = vfs_avail;
156
157 avail = LESS_BY(vfs_avail, metrics->keep_free);
158
23aba343
FB
159 space->limit = MIN(MAX(vfs_used + avail, metrics->min_use), metrics->max_use);
160 space->available = LESS_BY(space->limit, vfs_used);
161 space->timestamp = ts;
8580d1f7
LP
162 return 1;
163}
164
3a19f215
FB
165static void patch_min_use(JournalStorage *storage) {
166 assert(storage);
167
168 /* Let's bump the min_use limit to the current usage on disk. We do
169 * this when starting up and first opening the journal files. This way
170 * sudden spikes in disk usage will not cause journald to vacuum files
171 * without bounds. Note that this means that only a restart of journald
172 * will make it reset this value. */
173
174 storage->metrics.min_use = MAX(storage->metrics.min_use, storage->space.vfs_used);
175}
176
177
178static int determine_space(Server *s, uint64_t *available, uint64_t *limit) {
266a4700 179 JournalStorage *js;
57f443a6 180 int r;
8580d1f7
LP
181
182 assert(s);
183
266a4700 184 js = s->system_journal ? &s->system_storage : &s->runtime_storage;
57f443a6
FB
185
186 r = cache_space_refresh(s, js);
187 if (r >= 0) {
188 if (available)
189 *available = js->space.available;
190 if (limit)
191 *limit = js->space.limit;
192 }
193 return r;
d025f1e4
ZJS
194}
195
cba5629e
FB
196void server_space_usage_message(Server *s, JournalStorage *storage) {
197 char fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
198 fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX], fb6[FORMAT_BYTES_MAX];
199 JournalMetrics *metrics;
cba5629e
FB
200
201 assert(s);
202
203 if (!storage)
204 storage = s->system_journal ? &s->system_storage : &s->runtime_storage;
205
57f443a6 206 if (cache_space_refresh(s, storage) < 0)
cba5629e
FB
207 return;
208
209 metrics = &storage->metrics;
23aba343 210 format_bytes(fb1, sizeof(fb1), storage->space.vfs_used);
cba5629e
FB
211 format_bytes(fb2, sizeof(fb2), metrics->max_use);
212 format_bytes(fb3, sizeof(fb3), metrics->keep_free);
23aba343 213 format_bytes(fb4, sizeof(fb4), storage->space.vfs_available);
cba5629e
FB
214 format_bytes(fb5, sizeof(fb5), storage->space.limit);
215 format_bytes(fb6, sizeof(fb6), storage->space.available);
216
2b044526 217 server_driver_message(s, "MESSAGE_ID=" SD_MESSAGE_JOURNAL_USAGE_STR,
cba5629e
FB
218 LOG_MESSAGE("%s (%s) is %s, max %s, %s free.",
219 storage->name, storage->path, fb1, fb5, fb6),
220 "JOURNAL_NAME=%s", storage->name,
221 "JOURNAL_PATH=%s", storage->path,
23aba343 222 "CURRENT_USE=%"PRIu64, storage->space.vfs_used,
cba5629e
FB
223 "CURRENT_USE_PRETTY=%s", fb1,
224 "MAX_USE=%"PRIu64, metrics->max_use,
225 "MAX_USE_PRETTY=%s", fb2,
226 "DISK_KEEP_FREE=%"PRIu64, metrics->keep_free,
227 "DISK_KEEP_FREE_PRETTY=%s", fb3,
23aba343 228 "DISK_AVAILABLE=%"PRIu64, storage->space.vfs_available,
cba5629e
FB
229 "DISK_AVAILABLE_PRETTY=%s", fb4,
230 "LIMIT=%"PRIu64, storage->space.limit,
231 "LIMIT_PRETTY=%s", fb5,
232 "AVAILABLE=%"PRIu64, storage->space.available,
233 "AVAILABLE_PRETTY=%s", fb6,
234 NULL);
235}
236
5c3bde3f 237static void server_add_acls(JournalFile *f, uid_t uid) {
d025f1e4 238#ifdef HAVE_ACL
5c3bde3f 239 int r;
d025f1e4 240#endif
d025f1e4
ZJS
241 assert(f);
242
d025f1e4 243#ifdef HAVE_ACL
34c10968 244 if (uid <= SYSTEM_UID_MAX)
d025f1e4
ZJS
245 return;
246
5c3bde3f
ZJS
247 r = add_acls_for_user(f->fd, uid);
248 if (r < 0)
249 log_warning_errno(r, "Failed to set ACL on %s, ignoring: %m", f->path);
d025f1e4
ZJS
250#endif
251}
252
7a24f3bf
VC
253static int open_journal(
254 Server *s,
255 bool reliably,
256 const char *fname,
257 int flags,
258 bool seal,
259 JournalMetrics *metrics,
7a24f3bf
VC
260 JournalFile **ret) {
261 int r;
e167d7fd 262 JournalFile *f;
7a24f3bf
VC
263
264 assert(s);
265 assert(fname);
266 assert(ret);
267
268 if (reliably)
b58c888f 269 r = journal_file_open_reliably(fname, flags, 0640, s->compress, seal, metrics, s->mmap, s->deferred_closes, NULL, &f);
7a24f3bf 270 else
5d1ce257 271 r = journal_file_open(-1, fname, flags, 0640, s->compress, seal, metrics, s->mmap, s->deferred_closes, NULL, &f);
7a24f3bf
VC
272 if (r < 0)
273 return r;
274
e167d7fd 275 r = journal_file_enable_post_change_timer(f, s->event, POST_CHANGE_TIMER_INTERVAL_USEC);
7a24f3bf 276 if (r < 0) {
69a3a6fd 277 (void) journal_file_close(f);
7a24f3bf
VC
278 return r;
279 }
280
e167d7fd 281 *ret = f;
7a24f3bf
VC
282 return r;
283}
284
6431c7e2 285static bool flushed_flag_is_set(void) {
f78273c8 286 return access("/run/systemd/journal/flushed", F_OK) >= 0;
6431c7e2
VC
287}
288
105bdb46
VC
289static int system_journal_open(Server *s, bool flush_requested) {
290 const char *fn;
291 int r = 0;
292
293 if (!s->system_journal &&
f78273c8
LP
294 IN_SET(s->storage, STORAGE_PERSISTENT, STORAGE_AUTO) &&
295 (flush_requested || flushed_flag_is_set())) {
105bdb46
VC
296
297 /* If in auto mode: first try to create the machine
298 * path, but not the prefix.
299 *
300 * If in persistent mode: create /var/log/journal and
301 * the machine path */
302
303 if (s->storage == STORAGE_PERSISTENT)
304 (void) mkdir_p("/var/log/journal/", 0755);
305
266a4700 306 (void) mkdir(s->system_storage.path, 0755);
105bdb46 307
266a4700
FB
308 fn = strjoina(s->system_storage.path, "/system.journal");
309 r = open_journal(s, true, fn, O_RDWR|O_CREAT, s->seal, &s->system_storage.metrics, &s->system_journal);
105bdb46
VC
310 if (r >= 0) {
311 server_add_acls(s->system_journal, 0);
57f443a6 312 (void) cache_space_refresh(s, &s->system_storage);
3a19f215 313 patch_min_use(&s->system_storage);
105bdb46
VC
314 } else if (r < 0) {
315 if (r != -ENOENT && r != -EROFS)
316 log_warning_errno(r, "Failed to open system journal: %m");
317
318 r = 0;
319 }
929eeb54
VC
320
321 /* If the runtime journal is open, and we're post-flush, we're
322 * recovering from a failed system journal rotate (ENOSPC)
323 * for which the runtime journal was reopened.
324 *
325 * Perform an implicit flush to var, leaving the runtime
326 * journal closed, now that the system journal is back.
327 */
f78273c8
LP
328 if (!flush_requested)
329 (void) server_flush_to_var(s, true);
105bdb46
VC
330 }
331
332 if (!s->runtime_journal &&
333 (s->storage != STORAGE_NONE)) {
334
266a4700 335 fn = strjoina(s->runtime_storage.path, "/system.journal");
105bdb46
VC
336
337 if (s->system_journal) {
338
339 /* Try to open the runtime journal, but only
340 * if it already exists, so that we can flush
341 * it into the system journal */
342
266a4700 343 r = open_journal(s, false, fn, O_RDWR, false, &s->runtime_storage.metrics, &s->runtime_journal);
105bdb46
VC
344 if (r < 0) {
345 if (r != -ENOENT)
346 log_warning_errno(r, "Failed to open runtime journal: %m");
347
348 r = 0;
349 }
350
351 } else {
352
353 /* OK, we really need the runtime journal, so create
354 * it if necessary. */
355
356 (void) mkdir("/run/log", 0755);
357 (void) mkdir("/run/log/journal", 0755);
358 (void) mkdir_parents(fn, 0750);
359
266a4700 360 r = open_journal(s, true, fn, O_RDWR|O_CREAT, false, &s->runtime_storage.metrics, &s->runtime_journal);
105bdb46
VC
361 if (r < 0)
362 return log_error_errno(r, "Failed to open runtime journal: %m");
363 }
364
365 if (s->runtime_journal) {
366 server_add_acls(s->runtime_journal, 0);
57f443a6 367 (void) cache_space_refresh(s, &s->runtime_storage);
3a19f215 368 patch_min_use(&s->runtime_storage);
105bdb46
VC
369 }
370 }
371
372 return r;
373}
374
d025f1e4 375static JournalFile* find_journal(Server *s, uid_t uid) {
ed375beb 376 _cleanup_free_ char *p = NULL;
d025f1e4
ZJS
377 int r;
378 JournalFile *f;
379 sd_id128_t machine;
380
381 assert(s);
382
105bdb46
VC
383 /* A rotate that fails to create the new journal (ENOSPC) leaves the
384 * rotated journal as NULL. Unless we revisit opening, even after
385 * space is made available we'll continue to return NULL indefinitely.
386 *
387 * system_journal_open() is a noop if the journals are already open, so
388 * we can just call it here to recover from failed rotates (or anything
389 * else that's left the journals as NULL).
390 *
391 * Fixes https://github.com/systemd/systemd/issues/3968 */
392 (void) system_journal_open(s, false);
393
d025f1e4
ZJS
394 /* We split up user logs only on /var, not on /run. If the
395 * runtime file is open, we write to it exclusively, in order
396 * to guarantee proper order as soon as we flush /run to
397 * /var and close the runtime file. */
398
399 if (s->runtime_journal)
400 return s->runtime_journal;
401
61755fda 402 if (uid <= SYSTEM_UID_MAX || uid_is_dynamic(uid))
d025f1e4
ZJS
403 return s->system_journal;
404
405 r = sd_id128_get_machine(&machine);
406 if (r < 0)
407 return s->system_journal;
408
4a0b58c4 409 f = ordered_hashmap_get(s->user_journals, UID_TO_PTR(uid));
d025f1e4
ZJS
410 if (f)
411 return f;
412
de0671ee
ZJS
413 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-"UID_FMT".journal",
414 SD_ID128_FORMAT_VAL(machine), uid) < 0)
d025f1e4
ZJS
415 return s->system_journal;
416
43cf8388 417 while (ordered_hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
d025f1e4 418 /* Too many open? Then let's close one */
43cf8388 419 f = ordered_hashmap_steal_first(s->user_journals);
d025f1e4 420 assert(f);
69a3a6fd 421 (void) journal_file_close(f);
d025f1e4
ZJS
422 }
423
266a4700 424 r = open_journal(s, true, p, O_RDWR|O_CREAT, s->seal, &s->system_storage.metrics, &f);
d025f1e4
ZJS
425 if (r < 0)
426 return s->system_journal;
427
5c3bde3f 428 server_add_acls(f, uid);
d025f1e4 429
4a0b58c4 430 r = ordered_hashmap_put(s->user_journals, UID_TO_PTR(uid), f);
d025f1e4 431 if (r < 0) {
69a3a6fd 432 (void) journal_file_close(f);
d025f1e4
ZJS
433 return s->system_journal;
434 }
435
436 return f;
437}
438
ea69bd41
LP
439static int do_rotate(
440 Server *s,
441 JournalFile **f,
442 const char* name,
443 bool seal,
444 uint32_t uid) {
445
fc55baee
ZJS
446 int r;
447 assert(s);
448
449 if (!*f)
450 return -EINVAL;
451
b58c888f 452 r = journal_file_rotate(f, s->compress, seal, s->deferred_closes);
fc55baee
ZJS
453 if (r < 0)
454 if (*f)
ea69bd41 455 log_error_errno(r, "Failed to rotate %s: %m", (*f)->path);
fc55baee 456 else
ea69bd41 457 log_error_errno(r, "Failed to create new %s journal: %m", name);
fc55baee 458 else
5c3bde3f 459 server_add_acls(*f, uid);
2678031a 460
fc55baee
ZJS
461 return r;
462}
463
d025f1e4
ZJS
464void server_rotate(Server *s) {
465 JournalFile *f;
466 void *k;
467 Iterator i;
468 int r;
469
470 log_debug("Rotating...");
471
8580d1f7
LP
472 (void) do_rotate(s, &s->runtime_journal, "runtime", false, 0);
473 (void) do_rotate(s, &s->system_journal, "system", s->seal, 0);
d025f1e4 474
43cf8388 475 ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
4a0b58c4 476 r = do_rotate(s, &f, "user", s->seal, PTR_TO_UID(k));
fc55baee 477 if (r >= 0)
43cf8388 478 ordered_hashmap_replace(s->user_journals, k, f);
fc55baee
ZJS
479 else if (!f)
480 /* Old file has been closed and deallocated */
43cf8388 481 ordered_hashmap_remove(s->user_journals, k);
d025f1e4 482 }
b58c888f
VC
483
484 /* Perform any deferred closes which aren't still offlining. */
485 SET_FOREACH(f, s->deferred_closes, i)
486 if (!journal_file_is_offlining(f)) {
487 (void) set_remove(s->deferred_closes, f);
488 (void) journal_file_close(f);
489 }
d025f1e4
ZJS
490}
491
26687bf8
OS
492void server_sync(Server *s) {
493 JournalFile *f;
26687bf8
OS
494 Iterator i;
495 int r;
496
26687bf8 497 if (s->system_journal) {
ac2e41f5 498 r = journal_file_set_offline(s->system_journal, false);
26687bf8 499 if (r < 0)
65089b82 500 log_warning_errno(r, "Failed to sync system journal, ignoring: %m");
26687bf8
OS
501 }
502
65c1d46b 503 ORDERED_HASHMAP_FOREACH(f, s->user_journals, i) {
ac2e41f5 504 r = journal_file_set_offline(f, false);
26687bf8 505 if (r < 0)
65089b82 506 log_warning_errno(r, "Failed to sync user journal, ignoring: %m");
26687bf8
OS
507 }
508
f9a810be
LP
509 if (s->sync_event_source) {
510 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
511 if (r < 0)
da927ba9 512 log_error_errno(r, "Failed to disable sync timer source: %m");
f9a810be 513 }
26687bf8
OS
514
515 s->sync_scheduled = false;
516}
517
3a19f215 518static void do_vacuum(Server *s, JournalStorage *storage, bool verbose) {
ea69bd41 519
63c8666b
ZJS
520 int r;
521
8580d1f7 522 assert(s);
266a4700 523 assert(storage);
8580d1f7 524
57f443a6 525 (void) cache_space_refresh(s, storage);
18e758bf
FB
526
527 if (verbose)
528 server_space_usage_message(s, storage);
8580d1f7 529
57f443a6
FB
530 r = journal_directory_vacuum(storage->path, storage->space.limit,
531 storage->metrics.n_max_files, s->max_retention_usec,
532 &s->oldest_file_usec, verbose);
63c8666b 533 if (r < 0 && r != -ENOENT)
266a4700
FB
534 log_warning_errno(r, "Failed to vacuum %s, ignoring: %m", storage->path);
535
a0edc477 536 cache_space_invalidate(&storage->space);
63c8666b
ZJS
537}
538
3a19f215 539int server_vacuum(Server *s, bool verbose) {
8580d1f7 540 assert(s);
d025f1e4
ZJS
541
542 log_debug("Vacuuming...");
543
544 s->oldest_file_usec = 0;
545
266a4700 546 if (s->system_journal)
3a19f215 547 do_vacuum(s, &s->system_storage, verbose);
266a4700 548 if (s->runtime_journal)
3a19f215 549 do_vacuum(s, &s->runtime_storage, verbose);
d025f1e4 550
8580d1f7 551 return 0;
d025f1e4
ZJS
552}
553
0c24bb23
LP
554static void server_cache_machine_id(Server *s) {
555 sd_id128_t id;
556 int r;
557
558 assert(s);
559
560 r = sd_id128_get_machine(&id);
561 if (r < 0)
562 return;
563
564 sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
565}
566
567static void server_cache_boot_id(Server *s) {
568 sd_id128_t id;
569 int r;
570
571 assert(s);
572
573 r = sd_id128_get_boot(&id);
574 if (r < 0)
575 return;
576
577 sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
578}
579
580static void server_cache_hostname(Server *s) {
581 _cleanup_free_ char *t = NULL;
582 char *x;
583
584 assert(s);
585
586 t = gethostname_malloc();
587 if (!t)
588 return;
589
590 x = strappend("_HOSTNAME=", t);
591 if (!x)
592 return;
593
594 free(s->hostname_field);
595 s->hostname_field = x;
596}
597
8531ae70 598static bool shall_try_append_again(JournalFile *f, int r) {
6e1045e5 599 switch(r) {
ae739cc1 600
6e1045e5
ZJS
601 case -E2BIG: /* Hit configured limit */
602 case -EFBIG: /* Hit fs limit */
603 case -EDQUOT: /* Quota limit hit */
604 case -ENOSPC: /* Disk full */
d025f1e4 605 log_debug("%s: Allocation limit reached, rotating.", f->path);
6e1045e5 606 return true;
ae739cc1 607
6e1045e5
ZJS
608 case -EIO: /* I/O error of some kind (mmap) */
609 log_warning("%s: IO error, rotating.", f->path);
610 return true;
ae739cc1 611
6e1045e5 612 case -EHOSTDOWN: /* Other machine */
d025f1e4 613 log_info("%s: Journal file from other machine, rotating.", f->path);
6e1045e5 614 return true;
ae739cc1 615
6e1045e5 616 case -EBUSY: /* Unclean shutdown */
d025f1e4 617 log_info("%s: Unclean shutdown, rotating.", f->path);
6e1045e5 618 return true;
ae739cc1 619
6e1045e5 620 case -EPROTONOSUPPORT: /* Unsupported feature */
d025f1e4 621 log_info("%s: Unsupported feature, rotating.", f->path);
6e1045e5 622 return true;
ae739cc1 623
6e1045e5
ZJS
624 case -EBADMSG: /* Corrupted */
625 case -ENODATA: /* Truncated */
626 case -ESHUTDOWN: /* Already archived */
d025f1e4 627 log_warning("%s: Journal file corrupted, rotating.", f->path);
6e1045e5 628 return true;
ae739cc1 629
6e1045e5 630 case -EIDRM: /* Journal file has been deleted */
2678031a 631 log_warning("%s: Journal file has been deleted, rotating.", f->path);
6e1045e5 632 return true;
ae739cc1
LP
633
634 case -ETXTBSY: /* Journal file is from the future */
c1a9199e 635 log_warning("%s: Journal file is from the future, rotating.", f->path);
ae739cc1
LP
636 return true;
637
6e1045e5 638 default:
d025f1e4 639 return false;
6e1045e5 640 }
d025f1e4
ZJS
641}
642
d07f7b9e 643static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
7c070017 644 bool vacuumed = false, rotate = false;
0f972d66 645 struct dual_timestamp ts;
d025f1e4 646 JournalFile *f;
d025f1e4
ZJS
647 int r;
648
649 assert(s);
650 assert(iovec);
651 assert(n > 0);
652
0f972d66
LP
653 /* Get the closest, linearized time we have for this log event from the event loop. (Note that we do not use
654 * the source time, and not even the time the event was originally seen, but instead simply the time we started
655 * processing it, as we want strictly linear ordering in what we write out.) */
656 assert_se(sd_event_now(s->event, CLOCK_REALTIME, &ts.realtime) >= 0);
657 assert_se(sd_event_now(s->event, CLOCK_MONOTONIC, &ts.monotonic) >= 0);
658
7c070017
LP
659 if (ts.realtime < s->last_realtime_clock) {
660 /* When the time jumps backwards, let's immediately rotate. Of course, this should not happen during
661 * regular operation. However, when it does happen, then we should make sure that we start fresh files
662 * to ensure that the entries in the journal files are strictly ordered by time, in order to ensure
663 * bisection works correctly. */
d025f1e4 664
7c070017
LP
665 log_debug("Time jumped backwards, rotating.");
666 rotate = true;
667 } else {
668
669 f = find_journal(s, uid);
670 if (!f)
671 return;
672
673 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
674 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
675 rotate = true;
676 }
677 }
d025f1e4 678
7c070017 679 if (rotate) {
d025f1e4 680 server_rotate(s);
3a19f215 681 server_vacuum(s, false);
d025f1e4
ZJS
682 vacuumed = true;
683
684 f = find_journal(s, uid);
685 if (!f)
686 return;
687 }
688
7c070017
LP
689 s->last_realtime_clock = ts.realtime;
690
0f972d66 691 r = journal_file_append_entry(f, &ts, iovec, n, &s->seqnum, NULL, NULL);
26687bf8 692 if (r >= 0) {
d07f7b9e 693 server_schedule_sync(s, priority);
d025f1e4 694 return;
26687bf8 695 }
d025f1e4
ZJS
696
697 if (vacuumed || !shall_try_append_again(f, r)) {
8266e1c0 698 log_error_errno(r, "Failed to write entry (%d items, %zu bytes), ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
d025f1e4
ZJS
699 return;
700 }
701
702 server_rotate(s);
3a19f215 703 server_vacuum(s, false);
d025f1e4
ZJS
704
705 f = find_journal(s, uid);
706 if (!f)
707 return;
708
709 log_debug("Retrying write.");
0f972d66 710 r = journal_file_append_entry(f, &ts, iovec, n, &s->seqnum, NULL, NULL);
8266e1c0
LP
711 if (r < 0)
712 log_error_errno(r, "Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
713 else
d07f7b9e 714 server_schedule_sync(s, priority);
d025f1e4
ZJS
715}
716
4b58153d
LP
717static int get_invocation_id(const char *cgroup_root, const char *slice, const char *unit, char **ret) {
718 _cleanup_free_ char *escaped = NULL, *slice_path = NULL, *p = NULL;
719 char *copy, ids[SD_ID128_STRING_MAX];
720 int r;
721
722 /* Read the invocation ID of a unit off a unit. It's stored in the "trusted.invocation_id" extended attribute
723 * on the cgroup path. */
724
725 r = cg_slice_to_path(slice, &slice_path);
726 if (r < 0)
727 return r;
728
729 escaped = cg_escape(unit);
730 if (!escaped)
731 return -ENOMEM;
732
605405c6 733 p = strjoin(cgroup_root, "/", slice_path, "/", escaped);
4b58153d
LP
734 if (!p)
735 return -ENOMEM;
736
737 r = cg_get_xattr(SYSTEMD_CGROUP_CONTROLLER, p, "trusted.invocation_id", ids, 32);
738 if (r < 0)
739 return r;
740 if (r != 32)
741 return -EINVAL;
742 ids[32] = 0;
743
744 if (!id128_is_valid(ids))
745 return -EINVAL;
746
747 copy = strdup(ids);
748 if (!copy)
749 return -ENOMEM;
750
751 *ret = copy;
752 return 0;
753}
754
d025f1e4
ZJS
755static void dispatch_message_real(
756 Server *s,
757 struct iovec *iovec, unsigned n, unsigned m,
3b3154df
LP
758 const struct ucred *ucred,
759 const struct timeval *tv,
d025f1e4 760 const char *label, size_t label_len,
968f3196 761 const char *unit_id,
d07f7b9e 762 int priority,
b4e7bdcb
NK
763 pid_t object_pid,
764 char *cgroup) {
d025f1e4 765
968f3196 766 char pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
ae018d9b
LP
767 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
768 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
769 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
d3789917 770 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
968f3196
ZJS
771 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
772 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
773 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
774 uid_t object_uid;
775 gid_t object_gid;
968f3196 776 char *x;
d025f1e4 777 int r;
ae018d9b 778 char *t, *c;
82499507
LP
779 uid_t realuid = 0, owner = 0, journal_uid;
780 bool owner_valid = false;
ae018d9b 781#ifdef HAVE_AUDIT
968f3196
ZJS
782 char audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
783 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
784 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
785 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
ae018d9b
LP
786
787 uint32_t audit;
788 uid_t loginuid;
789#endif
d025f1e4
ZJS
790
791 assert(s);
792 assert(iovec);
793 assert(n > 0);
d473176a 794 assert(n + N_IOVEC_META_FIELDS + (object_pid > 0 ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
d025f1e4
ZJS
795
796 if (ucred) {
d025f1e4
ZJS
797 realuid = ucred->uid;
798
de0671ee 799 sprintf(pid, "_PID="PID_FMT, ucred->pid);
c2457105 800 IOVEC_SET_STRING(iovec[n++], pid);
d025f1e4 801
de0671ee 802 sprintf(uid, "_UID="UID_FMT, ucred->uid);
c2457105 803 IOVEC_SET_STRING(iovec[n++], uid);
d025f1e4 804
de0671ee 805 sprintf(gid, "_GID="GID_FMT, ucred->gid);
c2457105 806 IOVEC_SET_STRING(iovec[n++], gid);
d025f1e4
ZJS
807
808 r = get_process_comm(ucred->pid, &t);
809 if (r >= 0) {
63c372cb 810 x = strjoina("_COMM=", t);
d025f1e4 811 free(t);
968f3196 812 IOVEC_SET_STRING(iovec[n++], x);
d025f1e4
ZJS
813 }
814
815 r = get_process_exe(ucred->pid, &t);
816 if (r >= 0) {
63c372cb 817 x = strjoina("_EXE=", t);
d025f1e4 818 free(t);
968f3196 819 IOVEC_SET_STRING(iovec[n++], x);
d025f1e4
ZJS
820 }
821
9bdbc2e2 822 r = get_process_cmdline(ucred->pid, 0, false, &t);
d025f1e4 823 if (r >= 0) {
63c372cb 824 x = strjoina("_CMDLINE=", t);
d025f1e4 825 free(t);
3a832116
SL
826 IOVEC_SET_STRING(iovec[n++], x);
827 }
828
829 r = get_process_capeff(ucred->pid, &t);
830 if (r >= 0) {
63c372cb 831 x = strjoina("_CAP_EFFECTIVE=", t);
3a832116 832 free(t);
968f3196 833 IOVEC_SET_STRING(iovec[n++], x);
d025f1e4
ZJS
834 }
835
0a20e3c1 836#ifdef HAVE_AUDIT
d025f1e4 837 r = audit_session_from_pid(ucred->pid, &audit);
ae018d9b 838 if (r >= 0) {
de0671ee 839 sprintf(audit_session, "_AUDIT_SESSION=%"PRIu32, audit);
ae018d9b
LP
840 IOVEC_SET_STRING(iovec[n++], audit_session);
841 }
d025f1e4
ZJS
842
843 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
7027ff61 844 if (r >= 0) {
de0671ee 845 sprintf(audit_loginuid, "_AUDIT_LOGINUID="UID_FMT, loginuid);
ae018d9b 846 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
d025f1e4 847 }
ae018d9b 848#endif
d025f1e4 849
b4e7bdcb
NK
850 r = 0;
851 if (cgroup)
852 c = cgroup;
853 else
854 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &c);
855
7027ff61 856 if (r >= 0) {
4b58153d 857 _cleanup_free_ char *raw_unit = NULL, *raw_slice = NULL;
968f3196
ZJS
858 char *session = NULL;
859
63c372cb 860 x = strjoina("_SYSTEMD_CGROUP=", c);
968f3196 861 IOVEC_SET_STRING(iovec[n++], x);
d025f1e4 862
ae018d9b
LP
863 r = cg_path_get_session(c, &t);
864 if (r >= 0) {
63c372cb 865 session = strjoina("_SYSTEMD_SESSION=", t);
ae018d9b 866 free(t);
d025f1e4 867 IOVEC_SET_STRING(iovec[n++], session);
ae018d9b
LP
868 }
869
870 if (cg_path_get_owner_uid(c, &owner) >= 0) {
871 owner_valid = true;
d025f1e4 872
de0671ee 873 sprintf(owner_uid, "_SYSTEMD_OWNER_UID="UID_FMT, owner);
d025f1e4 874 IOVEC_SET_STRING(iovec[n++], owner_uid);
ae018d9b 875 }
d025f1e4 876
4b58153d
LP
877 if (cg_path_get_unit(c, &raw_unit) >= 0) {
878 x = strjoina("_SYSTEMD_UNIT=", raw_unit);
19cace37
LP
879 IOVEC_SET_STRING(iovec[n++], x);
880 } else if (unit_id && !session) {
63c372cb 881 x = strjoina("_SYSTEMD_UNIT=", unit_id);
19cace37
LP
882 IOVEC_SET_STRING(iovec[n++], x);
883 }
884
885 if (cg_path_get_user_unit(c, &t) >= 0) {
63c372cb 886 x = strjoina("_SYSTEMD_USER_UNIT=", t);
ae018d9b 887 free(t);
968f3196 888 IOVEC_SET_STRING(iovec[n++], x);
19cace37 889 } else if (unit_id && session) {
63c372cb 890 x = strjoina("_SYSTEMD_USER_UNIT=", unit_id);
19cace37
LP
891 IOVEC_SET_STRING(iovec[n++], x);
892 }
ae018d9b 893
4b58153d
LP
894 if (cg_path_get_slice(c, &raw_slice) >= 0) {
895 x = strjoina("_SYSTEMD_SLICE=", raw_slice);
0a244b8e
LP
896 IOVEC_SET_STRING(iovec[n++], x);
897 }
898
d473176a
LP
899 if (cg_path_get_user_slice(c, &t) >= 0) {
900 x = strjoina("_SYSTEMD_USER_SLICE=", t);
901 free(t);
902 IOVEC_SET_STRING(iovec[n++], x);
903 }
904
4b58153d
LP
905 if (raw_slice && raw_unit) {
906 if (get_invocation_id(s->cgroup_root, raw_slice, raw_unit, &t) >= 0) {
907 x = strjoina("_SYSTEMD_INVOCATION_ID=", t);
908 free(t);
909 IOVEC_SET_STRING(iovec[n++], x);
910 }
911 }
912
b4e7bdcb
NK
913 if (!cgroup)
914 free(c);
2d43b190 915 } else if (unit_id) {
63c372cb 916 x = strjoina("_SYSTEMD_UNIT=", unit_id);
2d43b190 917 IOVEC_SET_STRING(iovec[n++], x);
ef1673d1 918 }
d025f1e4 919
d025f1e4 920#ifdef HAVE_SELINUX
6d395665 921 if (mac_selinux_use()) {
d682b3a7 922 if (label) {
f8294e41 923 x = alloca(strlen("_SELINUX_CONTEXT=") + label_len + 1);
ae018d9b 924
d682b3a7
LP
925 *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
926 IOVEC_SET_STRING(iovec[n++], x);
927 } else {
2ed96880 928 char *con;
d025f1e4 929
d682b3a7 930 if (getpidcon(ucred->pid, &con) >= 0) {
63c372cb 931 x = strjoina("_SELINUX_CONTEXT=", con);
e7ff4e7f 932
d682b3a7
LP
933 freecon(con);
934 IOVEC_SET_STRING(iovec[n++], x);
935 }
d025f1e4
ZJS
936 }
937 }
938#endif
939 }
968f3196
ZJS
940 assert(n <= m);
941
942 if (object_pid) {
943 r = get_process_uid(object_pid, &object_uid);
944 if (r >= 0) {
de0671ee 945 sprintf(o_uid, "OBJECT_UID="UID_FMT, object_uid);
968f3196
ZJS
946 IOVEC_SET_STRING(iovec[n++], o_uid);
947 }
948
949 r = get_process_gid(object_pid, &object_gid);
950 if (r >= 0) {
de0671ee 951 sprintf(o_gid, "OBJECT_GID="GID_FMT, object_gid);
968f3196
ZJS
952 IOVEC_SET_STRING(iovec[n++], o_gid);
953 }
954
955 r = get_process_comm(object_pid, &t);
956 if (r >= 0) {
63c372cb 957 x = strjoina("OBJECT_COMM=", t);
968f3196
ZJS
958 free(t);
959 IOVEC_SET_STRING(iovec[n++], x);
960 }
961
962 r = get_process_exe(object_pid, &t);
963 if (r >= 0) {
63c372cb 964 x = strjoina("OBJECT_EXE=", t);
968f3196
ZJS
965 free(t);
966 IOVEC_SET_STRING(iovec[n++], x);
967 }
968
969 r = get_process_cmdline(object_pid, 0, false, &t);
970 if (r >= 0) {
63c372cb 971 x = strjoina("OBJECT_CMDLINE=", t);
968f3196
ZJS
972 free(t);
973 IOVEC_SET_STRING(iovec[n++], x);
974 }
975
976#ifdef HAVE_AUDIT
977 r = audit_session_from_pid(object_pid, &audit);
978 if (r >= 0) {
de0671ee 979 sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%"PRIu32, audit);
968f3196
ZJS
980 IOVEC_SET_STRING(iovec[n++], o_audit_session);
981 }
982
983 r = audit_loginuid_from_pid(object_pid, &loginuid);
984 if (r >= 0) {
de0671ee 985 sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID="UID_FMT, loginuid);
968f3196
ZJS
986 IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
987 }
988#endif
989
e9174f29 990 r = cg_pid_get_path_shifted(object_pid, s->cgroup_root, &c);
968f3196 991 if (r >= 0) {
63c372cb 992 x = strjoina("OBJECT_SYSTEMD_CGROUP=", c);
968f3196
ZJS
993 IOVEC_SET_STRING(iovec[n++], x);
994
995 r = cg_path_get_session(c, &t);
996 if (r >= 0) {
63c372cb 997 x = strjoina("OBJECT_SYSTEMD_SESSION=", t);
968f3196
ZJS
998 free(t);
999 IOVEC_SET_STRING(iovec[n++], x);
1000 }
1001
1002 if (cg_path_get_owner_uid(c, &owner) >= 0) {
de0671ee 1003 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID="UID_FMT, owner);
968f3196
ZJS
1004 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
1005 }
1006
1007 if (cg_path_get_unit(c, &t) >= 0) {
63c372cb 1008 x = strjoina("OBJECT_SYSTEMD_UNIT=", t);
968f3196 1009 free(t);
19cace37
LP
1010 IOVEC_SET_STRING(iovec[n++], x);
1011 }
1012
1013 if (cg_path_get_user_unit(c, &t) >= 0) {
63c372cb 1014 x = strjoina("OBJECT_SYSTEMD_USER_UNIT=", t);
968f3196 1015 free(t);
968f3196 1016 IOVEC_SET_STRING(iovec[n++], x);
19cace37 1017 }
968f3196 1018
d473176a
LP
1019 if (cg_path_get_slice(c, &t) >= 0) {
1020 x = strjoina("OBJECT_SYSTEMD_SLICE=", t);
1021 free(t);
1022 IOVEC_SET_STRING(iovec[n++], x);
1023 }
1024
1025 if (cg_path_get_user_slice(c, &t) >= 0) {
1026 x = strjoina("OBJECT_SYSTEMD_USER_SLICE=", t);
1027 free(t);
1028 IOVEC_SET_STRING(iovec[n++], x);
1029 }
1030
968f3196
ZJS
1031 free(c);
1032 }
1033 }
1034 assert(n <= m);
d025f1e4
ZJS
1035
1036 if (tv) {
398a50cd 1037 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=" USEC_FMT, timeval_load(tv));
a5693989 1038 IOVEC_SET_STRING(iovec[n++], source_time);
d025f1e4
ZJS
1039 }
1040
1041 /* Note that strictly speaking storing the boot id here is
1042 * redundant since the entry includes this in-line
1043 * anyway. However, we need this indexed, too. */
0c24bb23
LP
1044 if (!isempty(s->boot_id_field))
1045 IOVEC_SET_STRING(iovec[n++], s->boot_id_field);
d025f1e4 1046
0c24bb23
LP
1047 if (!isempty(s->machine_id_field))
1048 IOVEC_SET_STRING(iovec[n++], s->machine_id_field);
d025f1e4 1049
0c24bb23
LP
1050 if (!isempty(s->hostname_field))
1051 IOVEC_SET_STRING(iovec[n++], s->hostname_field);
d025f1e4
ZJS
1052
1053 assert(n <= m);
1054
da499392 1055 if (s->split_mode == SPLIT_UID && realuid > 0)
40adcda8 1056 /* Split up strictly by any UID */
759c945a 1057 journal_uid = realuid;
82499507 1058 else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
edc3797f
LP
1059 /* Split up by login UIDs. We do this only if the
1060 * realuid is not root, in order not to accidentally
1061 * leak privileged information to the user that is
1062 * logged by a privileged process that is part of an
7517e174 1063 * unprivileged session. */
8a0889df 1064 journal_uid = owner;
da499392
KS
1065 else
1066 journal_uid = 0;
759c945a 1067
d07f7b9e 1068 write_to_journal(s, journal_uid, iovec, n, priority);
d025f1e4
ZJS
1069}
1070
2b044526 1071void server_driver_message(Server *s, const char *message_id, const char *format, ...) {
8a03c9ef
ZJS
1072 struct iovec iovec[N_IOVEC_META_FIELDS + 5 + N_IOVEC_PAYLOAD_FIELDS];
1073 unsigned n = 0, m;
32917e33 1074 int r;
d025f1e4 1075 va_list ap;
b92bea5d 1076 struct ucred ucred = {};
d025f1e4
ZJS
1077
1078 assert(s);
1079 assert(format);
1080
4850d39a 1081 assert_cc(3 == LOG_FAC(LOG_DAEMON));
b6fa2555
EV
1082 IOVEC_SET_STRING(iovec[n++], "SYSLOG_FACILITY=3");
1083 IOVEC_SET_STRING(iovec[n++], "SYSLOG_IDENTIFIER=systemd-journald");
1084
d025f1e4 1085 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
4850d39a 1086 assert_cc(6 == LOG_INFO);
32917e33 1087 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
d025f1e4 1088
2b044526
ZJS
1089 if (message_id)
1090 IOVEC_SET_STRING(iovec[n++], message_id);
8a03c9ef
ZJS
1091 m = n;
1092
1093 va_start(ap, format);
32917e33
ZJS
1094 r = log_format_iovec(iovec, ELEMENTSOF(iovec), &n, false, 0, format, ap);
1095 /* Error handling below */
8a03c9ef
ZJS
1096 va_end(ap);
1097
d025f1e4
ZJS
1098 ucred.pid = getpid();
1099 ucred.uid = getuid();
1100 ucred.gid = getgid();
1101
32917e33 1102 if (r >= 0)
b4e7bdcb 1103 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0, NULL);
8a03c9ef
ZJS
1104
1105 while (m < n)
1106 free(iovec[m++].iov_base);
32917e33
ZJS
1107
1108 if (r < 0) {
1109 /* We failed to format the message. Emit a warning instead. */
1110 char buf[LINE_MAX];
1111
1112 xsprintf(buf, "MESSAGE=Entry printing failed: %s", strerror(-r));
1113
1114 n = 3;
1115 IOVEC_SET_STRING(iovec[n++], "PRIORITY=4");
1116 IOVEC_SET_STRING(iovec[n++], buf);
b4e7bdcb 1117 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0, NULL);
32917e33 1118 }
d025f1e4
ZJS
1119}
1120
1121void server_dispatch_message(
1122 Server *s,
1123 struct iovec *iovec, unsigned n, unsigned m,
3b3154df
LP
1124 const struct ucred *ucred,
1125 const struct timeval *tv,
d025f1e4
ZJS
1126 const char *label, size_t label_len,
1127 const char *unit_id,
968f3196
ZJS
1128 int priority,
1129 pid_t object_pid) {
d025f1e4 1130
7027ff61 1131 int rl, r;
7fd1b19b 1132 _cleanup_free_ char *path = NULL;
8580d1f7 1133 uint64_t available = 0;
b4e7bdcb 1134 char *c = NULL;
d025f1e4
ZJS
1135
1136 assert(s);
1137 assert(iovec || n == 0);
1138
1139 if (n == 0)
1140 return;
1141
1142 if (LOG_PRI(priority) > s->max_level_store)
1143 return;
1144
2f5df74a
HHPF
1145 /* Stop early in case the information will not be stored
1146 * in a journal. */
1147 if (s->storage == STORAGE_NONE)
1148 return;
1149
d025f1e4
ZJS
1150 if (!ucred)
1151 goto finish;
1152
e9174f29 1153 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &path);
7027ff61 1154 if (r < 0)
d025f1e4
ZJS
1155 goto finish;
1156
1157 /* example: /user/lennart/3/foobar
1158 * /system/dbus.service/foobar
1159 *
1160 * So let's cut of everything past the third /, since that is
1161 * where user directories start */
1162
1163 c = strchr(path, '/');
1164 if (c) {
1165 c = strchr(c+1, '/');
1166 if (c) {
1167 c = strchr(c+1, '/');
1168 if (c)
1169 *c = 0;
1170 }
1171 }
1172
3a19f215 1173 (void) determine_space(s, &available, NULL);
8580d1f7 1174 rl = journal_rate_limit_test(s->rate_limit, path, priority & LOG_PRIMASK, available);
db91ea32 1175 if (rl == 0)
d025f1e4 1176 return;
d025f1e4
ZJS
1177
1178 /* Write a suppression message if we suppressed something */
1179 if (rl > 1)
2b044526 1180 server_driver_message(s, "MESSAGE_ID=" SD_MESSAGE_JOURNAL_DROPPED_STR,
8a03c9ef
ZJS
1181 LOG_MESSAGE("Suppressed %u messages from %s", rl - 1, path),
1182 NULL);
d025f1e4
ZJS
1183
1184finish:
b4e7bdcb
NK
1185 /* restore cgroup path for logging */
1186 if (c)
1187 *c = '/';
1188 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, priority, object_pid, path);
d025f1e4
ZJS
1189}
1190
f78273c8 1191int server_flush_to_var(Server *s, bool require_flag_file) {
d025f1e4
ZJS
1192 sd_id128_t machine;
1193 sd_journal *j = NULL;
fbb63411
LP
1194 char ts[FORMAT_TIMESPAN_MAX];
1195 usec_t start;
1196 unsigned n = 0;
1197 int r;
d025f1e4
ZJS
1198
1199 assert(s);
1200
f78273c8 1201 if (!IN_SET(s->storage, STORAGE_AUTO, STORAGE_PERSISTENT))
d025f1e4
ZJS
1202 return 0;
1203
1204 if (!s->runtime_journal)
1205 return 0;
1206
f78273c8
LP
1207 if (require_flag_file && !flushed_flag_is_set())
1208 return 0;
1209
8580d1f7 1210 (void) system_journal_open(s, true);
d025f1e4
ZJS
1211
1212 if (!s->system_journal)
1213 return 0;
1214
1215 log_debug("Flushing to /var...");
1216
fbb63411
LP
1217 start = now(CLOCK_MONOTONIC);
1218
d025f1e4 1219 r = sd_id128_get_machine(&machine);
00a16861 1220 if (r < 0)
d025f1e4 1221 return r;
d025f1e4
ZJS
1222
1223 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
23bbb0de
MS
1224 if (r < 0)
1225 return log_error_errno(r, "Failed to read runtime journal: %m");
d025f1e4 1226
93b73b06
LP
1227 sd_journal_set_data_threshold(j, 0);
1228
d025f1e4
ZJS
1229 SD_JOURNAL_FOREACH(j) {
1230 Object *o = NULL;
1231 JournalFile *f;
1232
1233 f = j->current_file;
1234 assert(f && f->current_offset > 0);
1235
fbb63411
LP
1236 n++;
1237
d025f1e4
ZJS
1238 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1239 if (r < 0) {
da927ba9 1240 log_error_errno(r, "Can't read entry: %m");
d025f1e4
ZJS
1241 goto finish;
1242 }
1243
1244 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1245 if (r >= 0)
1246 continue;
1247
1248 if (!shall_try_append_again(s->system_journal, r)) {
da927ba9 1249 log_error_errno(r, "Can't write entry: %m");
d025f1e4
ZJS
1250 goto finish;
1251 }
1252
1253 server_rotate(s);
3a19f215 1254 server_vacuum(s, false);
d025f1e4 1255
253f59df
LP
1256 if (!s->system_journal) {
1257 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1258 r = -EIO;
1259 goto finish;
1260 }
1261
d025f1e4
ZJS
1262 log_debug("Retrying write.");
1263 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1264 if (r < 0) {
da927ba9 1265 log_error_errno(r, "Can't write entry: %m");
d025f1e4
ZJS
1266 goto finish;
1267 }
1268 }
1269
804ae586
LP
1270 r = 0;
1271
d025f1e4
ZJS
1272finish:
1273 journal_file_post_change(s->system_journal);
1274
804ae586 1275 s->runtime_journal = journal_file_close(s->runtime_journal);
d025f1e4
ZJS
1276
1277 if (r >= 0)
c6878637 1278 (void) rm_rf("/run/log/journal", REMOVE_ROOT);
d025f1e4 1279
763c7aa2 1280 sd_journal_close(j);
d025f1e4 1281
2b044526 1282 server_driver_message(s, NULL,
8a03c9ef
ZJS
1283 LOG_MESSAGE("Time spent on flushing to /var is %s for %u entries.",
1284 format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0),
1285 n),
1286 NULL);
fbb63411 1287
d025f1e4
ZJS
1288 return r;
1289}
1290
8531ae70 1291int server_process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
f9a810be 1292 Server *s = userdata;
a315ac4e
LP
1293 struct ucred *ucred = NULL;
1294 struct timeval *tv = NULL;
1295 struct cmsghdr *cmsg;
1296 char *label = NULL;
1297 size_t label_len = 0, m;
1298 struct iovec iovec;
1299 ssize_t n;
1300 int *fds = NULL, v = 0;
1301 unsigned n_fds = 0;
1302
1303 union {
1304 struct cmsghdr cmsghdr;
1305
1306 /* We use NAME_MAX space for the SELinux label
1307 * here. The kernel currently enforces no
1308 * limit, but according to suggestions from
1309 * the SELinux people this will change and it
1310 * will probably be identical to NAME_MAX. For
1311 * now we use that, but this should be updated
1312 * one day when the final limit is known. */
1313 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1314 CMSG_SPACE(sizeof(struct timeval)) +
1315 CMSG_SPACE(sizeof(int)) + /* fd */
1316 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1317 } control = {};
1318
1319 union sockaddr_union sa = {};
1320
1321 struct msghdr msghdr = {
1322 .msg_iov = &iovec,
1323 .msg_iovlen = 1,
1324 .msg_control = &control,
1325 .msg_controllen = sizeof(control),
1326 .msg_name = &sa,
1327 .msg_namelen = sizeof(sa),
1328 };
f9a810be 1329
d025f1e4 1330 assert(s);
875c2e22 1331 assert(fd == s->native_fd || fd == s->syslog_fd || fd == s->audit_fd);
f9a810be
LP
1332
1333 if (revents != EPOLLIN) {
1334 log_error("Got invalid event from epoll for datagram fd: %"PRIx32, revents);
1335 return -EIO;
1336 }
1337
a315ac4e
LP
1338 /* Try to get the right size, if we can. (Not all
1339 * sockets support SIOCINQ, hence we just try, but
1340 * don't rely on it. */
1341 (void) ioctl(fd, SIOCINQ, &v);
d025f1e4 1342
a315ac4e
LP
1343 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1344 m = PAGE_ALIGN(MAX3((size_t) v + 1,
1345 (size_t) LINE_MAX,
1346 ALIGN(sizeof(struct nlmsghdr)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH)) + 1);
d025f1e4 1347
a315ac4e
LP
1348 if (!GREEDY_REALLOC(s->buffer, s->buffer_size, m))
1349 return log_oom();
875c2e22 1350
a315ac4e
LP
1351 iovec.iov_base = s->buffer;
1352 iovec.iov_len = s->buffer_size - 1; /* Leave room for trailing NUL we add later */
d025f1e4 1353
a315ac4e
LP
1354 n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1355 if (n < 0) {
1356 if (errno == EINTR || errno == EAGAIN)
1357 return 0;
875c2e22 1358
a315ac4e
LP
1359 return log_error_errno(errno, "recvmsg() failed: %m");
1360 }
875c2e22 1361
a315ac4e
LP
1362 CMSG_FOREACH(cmsg, &msghdr) {
1363
1364 if (cmsg->cmsg_level == SOL_SOCKET &&
1365 cmsg->cmsg_type == SCM_CREDENTIALS &&
1366 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1367 ucred = (struct ucred*) CMSG_DATA(cmsg);
1368 else if (cmsg->cmsg_level == SOL_SOCKET &&
1369 cmsg->cmsg_type == SCM_SECURITY) {
1370 label = (char*) CMSG_DATA(cmsg);
1371 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1372 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1373 cmsg->cmsg_type == SO_TIMESTAMP &&
1374 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1375 tv = (struct timeval*) CMSG_DATA(cmsg);
1376 else if (cmsg->cmsg_level == SOL_SOCKET &&
1377 cmsg->cmsg_type == SCM_RIGHTS) {
1378 fds = (int*) CMSG_DATA(cmsg);
1379 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
d025f1e4 1380 }
a315ac4e 1381 }
d025f1e4 1382
a315ac4e
LP
1383 /* And a trailing NUL, just in case */
1384 s->buffer[n] = 0;
1385
1386 if (fd == s->syslog_fd) {
1387 if (n > 0 && n_fds == 0)
1388 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1389 else if (n_fds > 0)
1390 log_warning("Got file descriptors via syslog socket. Ignoring.");
1391
1392 } else if (fd == s->native_fd) {
1393 if (n > 0 && n_fds == 0)
1394 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1395 else if (n == 0 && n_fds == 1)
1396 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1397 else if (n_fds > 0)
1398 log_warning("Got too many file descriptors via native socket. Ignoring.");
1399
1400 } else {
1401 assert(fd == s->audit_fd);
1402
1403 if (n > 0 && n_fds == 0)
1404 server_process_audit_message(s, s->buffer, n, ucred, &sa, msghdr.msg_namelen);
1405 else if (n_fds > 0)
1406 log_warning("Got file descriptors via audit socket. Ignoring.");
f9a810be 1407 }
a315ac4e
LP
1408
1409 close_many(fds, n_fds);
1410 return 0;
f9a810be 1411}
d025f1e4 1412
f9a810be
LP
1413static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1414 Server *s = userdata;
33d52ab9 1415 int r;
d025f1e4 1416
f9a810be 1417 assert(s);
d025f1e4 1418
94b65516 1419 log_info("Received request to flush runtime journal from PID " PID_FMT, si->ssi_pid);
d025f1e4 1420
f78273c8 1421 (void) server_flush_to_var(s, false);
f9a810be 1422 server_sync(s);
3a19f215 1423 server_vacuum(s, false);
d025f1e4 1424
33d52ab9
LP
1425 r = touch("/run/systemd/journal/flushed");
1426 if (r < 0)
1427 log_warning_errno(r, "Failed to touch /run/systemd/journal/flushed, ignoring: %m");
74055aa7 1428
18e758bf 1429 server_space_usage_message(s, NULL);
f9a810be
LP
1430 return 0;
1431}
d025f1e4 1432
f9a810be
LP
1433static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1434 Server *s = userdata;
33d52ab9 1435 int r;
d025f1e4 1436
f9a810be 1437 assert(s);
d025f1e4 1438
94b65516 1439 log_info("Received request to rotate journal from PID " PID_FMT, si->ssi_pid);
f9a810be 1440 server_rotate(s);
3a19f215
FB
1441 server_vacuum(s, true);
1442
1443 if (s->system_journal)
1444 patch_min_use(&s->system_storage);
1445 if (s->runtime_journal)
1446 patch_min_use(&s->runtime_storage);
d025f1e4 1447
dbd6e31c 1448 /* Let clients know when the most recent rotation happened. */
33d52ab9
LP
1449 r = write_timestamp_file_atomic("/run/systemd/journal/rotated", now(CLOCK_MONOTONIC));
1450 if (r < 0)
1451 log_warning_errno(r, "Failed to write /run/systemd/journal/rotated, ignoring: %m");
dbd6e31c 1452
f9a810be
LP
1453 return 0;
1454}
d025f1e4 1455
f9a810be
LP
1456static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1457 Server *s = userdata;
d025f1e4 1458
f9a810be 1459 assert(s);
d025f1e4 1460
4daf54a8 1461 log_received_signal(LOG_INFO, si);
d025f1e4 1462
6203e07a 1463 sd_event_exit(s->event, 0);
d025f1e4
ZJS
1464 return 0;
1465}
1466
94b65516
LP
1467static int dispatch_sigrtmin1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1468 Server *s = userdata;
33d52ab9 1469 int r;
94b65516
LP
1470
1471 assert(s);
1472
1473 log_debug("Received request to sync from PID " PID_FMT, si->ssi_pid);
1474
1475 server_sync(s);
1476
1477 /* Let clients know when the most recent sync happened. */
33d52ab9
LP
1478 r = write_timestamp_file_atomic("/run/systemd/journal/synced", now(CLOCK_MONOTONIC));
1479 if (r < 0)
1480 log_warning_errno(r, "Failed to write /run/systemd/journal/synced, ignoring: %m");
94b65516
LP
1481
1482 return 0;
1483}
1484
f9a810be 1485static int setup_signals(Server *s) {
f9a810be 1486 int r;
d025f1e4
ZJS
1487
1488 assert(s);
1489
9bab3b65 1490 assert_se(sigprocmask_many(SIG_SETMASK, NULL, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, SIGRTMIN+1, -1) >= 0);
d025f1e4 1491
151b9b96 1492 r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
f9a810be
LP
1493 if (r < 0)
1494 return r;
1495
151b9b96 1496 r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s);
f9a810be
LP
1497 if (r < 0)
1498 return r;
d025f1e4 1499
151b9b96 1500 r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s);
f9a810be
LP
1501 if (r < 0)
1502 return r;
d025f1e4 1503
b374689c
LP
1504 /* Let's process SIGTERM late, so that we flush all queued
1505 * messages to disk before we exit */
1506 r = sd_event_source_set_priority(s->sigterm_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1507 if (r < 0)
1508 return r;
1509
1510 /* When journald is invoked on the terminal (when debugging),
1511 * it's useful if C-c is handled equivalent to SIGTERM. */
151b9b96 1512 r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s);
f9a810be
LP
1513 if (r < 0)
1514 return r;
d025f1e4 1515
b374689c
LP
1516 r = sd_event_source_set_priority(s->sigint_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1517 if (r < 0)
1518 return r;
1519
94b65516
LP
1520 /* SIGRTMIN+1 causes an immediate sync. We process this very
1521 * late, so that everything else queued at this point is
1522 * really written to disk. Clients can watch
1523 * /run/systemd/journal/synced with inotify until its mtime
1524 * changes to see when a sync happened. */
1525 r = sd_event_add_signal(s->event, &s->sigrtmin1_event_source, SIGRTMIN+1, dispatch_sigrtmin1, s);
1526 if (r < 0)
1527 return r;
1528
1529 r = sd_event_source_set_priority(s->sigrtmin1_event_source, SD_EVENT_PRIORITY_NORMAL+15);
1530 if (r < 0)
1531 return r;
1532
d025f1e4
ZJS
1533 return 0;
1534}
1535
5707ecf3
ZJS
1536static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
1537 Server *s = data;
74df0fca 1538 int r;
d025f1e4 1539
5707ecf3 1540 assert(s);
d025f1e4 1541
1d84ad94
LP
1542 if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_syslog")) {
1543
5707ecf3 1544 r = value ? parse_boolean(value) : true;
d581d9d9 1545 if (r < 0)
5707ecf3
ZJS
1546 log_warning("Failed to parse forward to syslog switch \"%s\". Ignoring.", value);
1547 else
1548 s->forward_to_syslog = r;
1d84ad94
LP
1549
1550 } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_kmsg")) {
1551
5707ecf3
ZJS
1552 r = value ? parse_boolean(value) : true;
1553 if (r < 0)
1554 log_warning("Failed to parse forward to kmsg switch \"%s\". Ignoring.", value);
1555 else
1556 s->forward_to_kmsg = r;
1d84ad94
LP
1557
1558 } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_console")) {
1559
5707ecf3
ZJS
1560 r = value ? parse_boolean(value) : true;
1561 if (r < 0)
1562 log_warning("Failed to parse forward to console switch \"%s\". Ignoring.", value);
1563 else
1564 s->forward_to_console = r;
1d84ad94
LP
1565
1566 } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_wall")) {
1567
5707ecf3
ZJS
1568 r = value ? parse_boolean(value) : true;
1569 if (r < 0)
1570 log_warning("Failed to parse forward to wall switch \"%s\". Ignoring.", value);
1571 else
1572 s->forward_to_wall = r;
1d84ad94
LP
1573
1574 } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_console")) {
1575
1576 if (proc_cmdline_value_missing(key, value))
1577 return 0;
1578
5707ecf3
ZJS
1579 r = log_level_from_string(value);
1580 if (r < 0)
1581 log_warning("Failed to parse max level console value \"%s\". Ignoring.", value);
1582 else
1583 s->max_level_console = r;
1d84ad94
LP
1584
1585 } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_store")) {
1586
1587 if (proc_cmdline_value_missing(key, value))
1588 return 0;
1589
5707ecf3
ZJS
1590 r = log_level_from_string(value);
1591 if (r < 0)
1592 log_warning("Failed to parse max level store value \"%s\". Ignoring.", value);
1593 else
1594 s->max_level_store = r;
1d84ad94
LP
1595
1596 } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_syslog")) {
1597
1598 if (proc_cmdline_value_missing(key, value))
1599 return 0;
1600
5707ecf3
ZJS
1601 r = log_level_from_string(value);
1602 if (r < 0)
1603 log_warning("Failed to parse max level syslog value \"%s\". Ignoring.", value);
1604 else
1605 s->max_level_syslog = r;
1d84ad94
LP
1606
1607 } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_kmsg")) {
1608
1609 if (proc_cmdline_value_missing(key, value))
1610 return 0;
1611
5707ecf3
ZJS
1612 r = log_level_from_string(value);
1613 if (r < 0)
1614 log_warning("Failed to parse max level kmsg value \"%s\". Ignoring.", value);
1615 else
1616 s->max_level_kmsg = r;
1d84ad94
LP
1617
1618 } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_wall")) {
1619
1620 if (proc_cmdline_value_missing(key, value))
1621 return 0;
1622
5707ecf3
ZJS
1623 r = log_level_from_string(value);
1624 if (r < 0)
1625 log_warning("Failed to parse max level wall value \"%s\". Ignoring.", value);
1626 else
1627 s->max_level_wall = r;
1d84ad94 1628
5707ecf3
ZJS
1629 } else if (startswith(key, "systemd.journald"))
1630 log_warning("Unknown journald kernel command line option \"%s\". Ignoring.", key);
d025f1e4 1631
804ae586 1632 /* do not warn about state here, since probably systemd already did */
db91ea32 1633 return 0;
d025f1e4
ZJS
1634}
1635
1636static int server_parse_config_file(Server *s) {
d025f1e4
ZJS
1637 assert(s);
1638
43688c49 1639 return config_parse_many_nulstr(PKGSYSCONFDIR "/journald.conf",
da412854
YW
1640 CONF_PATHS_NULSTR("systemd/journald.conf.d"),
1641 "Journal\0",
1642 config_item_perf_lookup, journald_gperf_lookup,
1643 false, s);
d025f1e4
ZJS
1644}
1645
f9a810be
LP
1646static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
1647 Server *s = userdata;
26687bf8
OS
1648
1649 assert(s);
1650
f9a810be 1651 server_sync(s);
26687bf8
OS
1652 return 0;
1653}
1654
d07f7b9e 1655int server_schedule_sync(Server *s, int priority) {
26687bf8
OS
1656 int r;
1657
26687bf8
OS
1658 assert(s);
1659
d07f7b9e
LP
1660 if (priority <= LOG_CRIT) {
1661 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1662 server_sync(s);
1663 return 0;
1664 }
1665
26687bf8
OS
1666 if (s->sync_scheduled)
1667 return 0;
1668
f9a810be
LP
1669 if (s->sync_interval_usec > 0) {
1670 usec_t when;
ca267016 1671
6a0f1f6d 1672 r = sd_event_now(s->event, CLOCK_MONOTONIC, &when);
f9a810be
LP
1673 if (r < 0)
1674 return r;
26687bf8 1675
f9a810be
LP
1676 when += s->sync_interval_usec;
1677
1678 if (!s->sync_event_source) {
6a0f1f6d
LP
1679 r = sd_event_add_time(
1680 s->event,
1681 &s->sync_event_source,
1682 CLOCK_MONOTONIC,
1683 when, 0,
1684 server_dispatch_sync, s);
f9a810be
LP
1685 if (r < 0)
1686 return r;
1687
1688 r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
1689 } else {
1690 r = sd_event_source_set_time(s->sync_event_source, when);
1691 if (r < 0)
1692 return r;
1693
1694 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
1695 }
26687bf8 1696 if (r < 0)
f9a810be 1697 return r;
26687bf8 1698
f9a810be
LP
1699 s->sync_scheduled = true;
1700 }
26687bf8
OS
1701
1702 return 0;
1703}
1704
0c24bb23
LP
1705static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1706 Server *s = userdata;
1707
1708 assert(s);
1709
1710 server_cache_hostname(s);
1711 return 0;
1712}
1713
1714static int server_open_hostname(Server *s) {
1715 int r;
1716
1717 assert(s);
1718
1719 s->hostname_fd = open("/proc/sys/kernel/hostname", O_RDONLY|O_CLOEXEC|O_NDELAY|O_NOCTTY);
4a62c710
MS
1720 if (s->hostname_fd < 0)
1721 return log_error_errno(errno, "Failed to open /proc/sys/kernel/hostname: %m");
0c24bb23 1722
151b9b96 1723 r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s);
0c24bb23 1724 if (r < 0) {
28def94c
DR
1725 /* kernels prior to 3.2 don't support polling this file. Ignore
1726 * the failure. */
1727 if (r == -EPERM) {
e53fc357 1728 log_warning_errno(r, "Failed to register hostname fd in event loop, ignoring: %m");
03e334a1 1729 s->hostname_fd = safe_close(s->hostname_fd);
28def94c
DR
1730 return 0;
1731 }
1732
23bbb0de 1733 return log_error_errno(r, "Failed to register hostname fd in event loop: %m");
0c24bb23
LP
1734 }
1735
1736 r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
23bbb0de
MS
1737 if (r < 0)
1738 return log_error_errno(r, "Failed to adjust priority of host name event source: %m");
0c24bb23
LP
1739
1740 return 0;
1741}
1742
e22aa3d3
LP
1743static int dispatch_notify_event(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1744 Server *s = userdata;
1745 int r;
1746
1747 assert(s);
1748 assert(s->notify_event_source == es);
1749 assert(s->notify_fd == fd);
1750
e22aa3d3 1751 /* The $NOTIFY_SOCKET is writable again, now send exactly one
dd835265 1752 * message on it. Either it's the watchdog event, the initial
119e9655
LP
1753 * READY=1 event or an stdout stream event. If there's nothing
1754 * to write anymore, turn our event source off. The next time
1755 * there's something to send it will be turned on again. */
e22aa3d3
LP
1756
1757 if (!s->sent_notify_ready) {
1758 static const char p[] =
1759 "READY=1\n"
1760 "STATUS=Processing requests...";
1761 ssize_t l;
1762
1763 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1764 if (l < 0) {
1765 if (errno == EAGAIN)
1766 return 0;
1767
1768 return log_error_errno(errno, "Failed to send READY=1 notification message: %m");
1769 }
1770
1771 s->sent_notify_ready = true;
1772 log_debug("Sent READY=1 notification.");
1773
119e9655
LP
1774 } else if (s->send_watchdog) {
1775
1776 static const char p[] =
1777 "WATCHDOG=1";
1778
1779 ssize_t l;
1780
1781 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1782 if (l < 0) {
1783 if (errno == EAGAIN)
1784 return 0;
1785
1786 return log_error_errno(errno, "Failed to send WATCHDOG=1 notification message: %m");
1787 }
1788
1789 s->send_watchdog = false;
1790 log_debug("Sent WATCHDOG=1 notification.");
1791
e22aa3d3
LP
1792 } else if (s->stdout_streams_notify_queue)
1793 /* Dispatch one stream notification event */
1794 stdout_stream_send_notify(s->stdout_streams_notify_queue);
1795
61233823 1796 /* Leave us enabled if there's still more to do. */
119e9655 1797 if (s->send_watchdog || s->stdout_streams_notify_queue)
e22aa3d3
LP
1798 return 0;
1799
1800 /* There was nothing to do anymore, let's turn ourselves off. */
1801 r = sd_event_source_set_enabled(es, SD_EVENT_OFF);
1802 if (r < 0)
1803 return log_error_errno(r, "Failed to turn off notify event source: %m");
1804
1805 return 0;
1806}
1807
119e9655
LP
1808static int dispatch_watchdog(sd_event_source *es, uint64_t usec, void *userdata) {
1809 Server *s = userdata;
1810 int r;
1811
1812 assert(s);
1813
1814 s->send_watchdog = true;
1815
1816 r = sd_event_source_set_enabled(s->notify_event_source, SD_EVENT_ON);
1817 if (r < 0)
1818 log_warning_errno(r, "Failed to turn on notify event source: %m");
1819
1820 r = sd_event_source_set_time(s->watchdog_event_source, usec + s->watchdog_usec / 2);
1821 if (r < 0)
1822 return log_error_errno(r, "Failed to restart watchdog event source: %m");
1823
1824 r = sd_event_source_set_enabled(s->watchdog_event_source, SD_EVENT_ON);
1825 if (r < 0)
1826 return log_error_errno(r, "Failed to enable watchdog event source: %m");
1827
1828 return 0;
1829}
1830
e22aa3d3
LP
1831static int server_connect_notify(Server *s) {
1832 union sockaddr_union sa = {
1833 .un.sun_family = AF_UNIX,
1834 };
1835 const char *e;
1836 int r;
1837
1838 assert(s);
1839 assert(s->notify_fd < 0);
1840 assert(!s->notify_event_source);
1841
1842 /*
1843 So here's the problem: we'd like to send notification
1844 messages to PID 1, but we cannot do that via sd_notify(),
1845 since that's synchronous, and we might end up blocking on
1846 it. Specifically: given that PID 1 might block on
1847 dbus-daemon during IPC, and dbus-daemon is logging to us,
1848 and might hence block on us, we might end up in a deadlock
ccddd104 1849 if we block on sending PID 1 notification messages — by
e22aa3d3
LP
1850 generating a full blocking circle. To avoid this, let's
1851 create a non-blocking socket, and connect it to the
1852 notification socket, and then wait for POLLOUT before we
1853 send anything. This should efficiently avoid any deadlocks,
1854 as we'll never block on PID 1, hence PID 1 can safely block
1855 on dbus-daemon which can safely block on us again.
1856
1857 Don't think that this issue is real? It is, see:
1858 https://github.com/systemd/systemd/issues/1505
1859 */
1860
1861 e = getenv("NOTIFY_SOCKET");
1862 if (!e)
1863 return 0;
1864
1865 if ((e[0] != '@' && e[0] != '/') || e[1] == 0) {
1866 log_error("NOTIFY_SOCKET set to an invalid value: %s", e);
1867 return -EINVAL;
1868 }
1869
1870 if (strlen(e) > sizeof(sa.un.sun_path)) {
1871 log_error("NOTIFY_SOCKET path too long: %s", e);
1872 return -EINVAL;
1873 }
1874
1875 s->notify_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
1876 if (s->notify_fd < 0)
1877 return log_error_errno(errno, "Failed to create notify socket: %m");
1878
1879 (void) fd_inc_sndbuf(s->notify_fd, NOTIFY_SNDBUF_SIZE);
1880
1881 strncpy(sa.un.sun_path, e, sizeof(sa.un.sun_path));
1882 if (sa.un.sun_path[0] == '@')
1883 sa.un.sun_path[0] = 0;
1884
fc2fffe7 1885 r = connect(s->notify_fd, &sa.sa, SOCKADDR_UN_LEN(sa.un));
e22aa3d3
LP
1886 if (r < 0)
1887 return log_error_errno(errno, "Failed to connect to notify socket: %m");
1888
1889 r = sd_event_add_io(s->event, &s->notify_event_source, s->notify_fd, EPOLLOUT, dispatch_notify_event, s);
1890 if (r < 0)
1891 return log_error_errno(r, "Failed to watch notification socket: %m");
1892
119e9655
LP
1893 if (sd_watchdog_enabled(false, &s->watchdog_usec) > 0) {
1894 s->send_watchdog = true;
1895
4de2402b 1896 r = sd_event_add_time(s->event, &s->watchdog_event_source, CLOCK_MONOTONIC, now(CLOCK_MONOTONIC) + s->watchdog_usec/2, s->watchdog_usec/4, dispatch_watchdog, s);
119e9655
LP
1897 if (r < 0)
1898 return log_error_errno(r, "Failed to add watchdog time event: %m");
1899 }
1900
e22aa3d3
LP
1901 /* This should fire pretty soon, which we'll use to send the
1902 * READY=1 event. */
1903
1904 return 0;
1905}
1906
d025f1e4 1907int server_init(Server *s) {
13790add 1908 _cleanup_fdset_free_ FDSet *fds = NULL;
d025f1e4 1909 int n, r, fd;
7d18d348 1910 bool no_sockets;
d025f1e4
ZJS
1911
1912 assert(s);
1913
1914 zero(*s);
e22aa3d3 1915 s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->audit_fd = s->hostname_fd = s->notify_fd = -1;
d025f1e4
ZJS
1916 s->compress = true;
1917 s->seal = true;
b2392ff3 1918 s->read_kmsg = true;
d025f1e4 1919
119e9655
LP
1920 s->watchdog_usec = USEC_INFINITY;
1921
26687bf8
OS
1922 s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1923 s->sync_scheduled = false;
1924
d025f1e4
ZJS
1925 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1926 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1927
40b71e89 1928 s->forward_to_wall = true;
d025f1e4 1929
e150e820
MB
1930 s->max_file_usec = DEFAULT_MAX_FILE_USEC;
1931
d025f1e4
ZJS
1932 s->max_level_store = LOG_DEBUG;
1933 s->max_level_syslog = LOG_DEBUG;
1934 s->max_level_kmsg = LOG_NOTICE;
1935 s->max_level_console = LOG_INFO;
40b71e89 1936 s->max_level_wall = LOG_EMERG;
d025f1e4 1937
266a4700
FB
1938 journal_reset_metrics(&s->system_storage.metrics);
1939 journal_reset_metrics(&s->runtime_storage.metrics);
d025f1e4
ZJS
1940
1941 server_parse_config_file(s);
1d84ad94
LP
1942
1943 r = proc_cmdline_parse(parse_proc_cmdline_item, s, PROC_CMDLINE_STRIP_RD_PREFIX);
1944 if (r < 0)
1945 log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
8580d1f7 1946
d288f79f 1947 if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
b1389b0d
ZJS
1948 log_debug("Setting both rate limit interval and burst from "USEC_FMT",%u to 0,0",
1949 s->rate_limit_interval, s->rate_limit_burst);
d288f79f
ZJS
1950 s->rate_limit_interval = s->rate_limit_burst = 0;
1951 }
d025f1e4 1952
8580d1f7 1953 (void) mkdir_p("/run/systemd/journal", 0755);
d025f1e4 1954
43cf8388 1955 s->user_journals = ordered_hashmap_new(NULL);
d025f1e4
ZJS
1956 if (!s->user_journals)
1957 return log_oom();
1958
1959 s->mmap = mmap_cache_new();
1960 if (!s->mmap)
1961 return log_oom();
1962
b58c888f
VC
1963 s->deferred_closes = set_new(NULL);
1964 if (!s->deferred_closes)
1965 return log_oom();
1966
f9a810be 1967 r = sd_event_default(&s->event);
23bbb0de
MS
1968 if (r < 0)
1969 return log_error_errno(r, "Failed to create event loop: %m");
d025f1e4
ZJS
1970
1971 n = sd_listen_fds(true);
23bbb0de
MS
1972 if (n < 0)
1973 return log_error_errno(n, "Failed to read listening file descriptors from environment: %m");
d025f1e4
ZJS
1974
1975 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1976
1977 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1978
1979 if (s->native_fd >= 0) {
1980 log_error("Too many native sockets passed.");
1981 return -EINVAL;
1982 }
1983
1984 s->native_fd = fd;
1985
1986 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1987
1988 if (s->stdout_fd >= 0) {
1989 log_error("Too many stdout sockets passed.");
1990 return -EINVAL;
1991 }
1992
1993 s->stdout_fd = fd;
1994
03ee5c38
LP
1995 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0 ||
1996 sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/dev-log", 0) > 0) {
d025f1e4
ZJS
1997
1998 if (s->syslog_fd >= 0) {
1999 log_error("Too many /dev/log sockets passed.");
2000 return -EINVAL;
2001 }
2002
2003 s->syslog_fd = fd;
2004
875c2e22
LP
2005 } else if (sd_is_socket(fd, AF_NETLINK, SOCK_RAW, -1) > 0) {
2006
2007 if (s->audit_fd >= 0) {
2008 log_error("Too many audit sockets passed.");
2009 return -EINVAL;
2010 }
2011
2012 s->audit_fd = fd;
2013
4ec3cd73 2014 } else {
4ec3cd73 2015
13790add
LP
2016 if (!fds) {
2017 fds = fdset_new();
2018 if (!fds)
2019 return log_oom();
2020 }
4ec3cd73 2021
13790add
LP
2022 r = fdset_put(fds, fd);
2023 if (r < 0)
2024 return log_oom();
4ec3cd73 2025 }
d025f1e4
ZJS
2026 }
2027
15d91bff
ZJS
2028 /* Try to restore streams, but don't bother if this fails */
2029 (void) server_restore_streams(s, fds);
d025f1e4 2030
13790add
LP
2031 if (fdset_size(fds) > 0) {
2032 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds));
2033 fds = fdset_free(fds);
2034 }
2035
7d18d348
ZJS
2036 no_sockets = s->native_fd < 0 && s->stdout_fd < 0 && s->syslog_fd < 0 && s->audit_fd < 0;
2037
2038 /* always open stdout, syslog, native, and kmsg sockets */
37b7affe
ZJS
2039
2040 /* systemd-journald.socket: /run/systemd/journal/stdout */
15d91bff
ZJS
2041 r = server_open_stdout_socket(s);
2042 if (r < 0)
2043 return r;
2044
37b7affe 2045 /* systemd-journald-dev-log.socket: /run/systemd/journal/dev-log */
13790add 2046 r = server_open_syslog_socket(s);
d025f1e4
ZJS
2047 if (r < 0)
2048 return r;
2049
37b7affe 2050 /* systemd-journald.socket: /run/systemd/journal/socket */
13790add 2051 r = server_open_native_socket(s);
d025f1e4
ZJS
2052 if (r < 0)
2053 return r;
2054
b2392ff3 2055 /* /dev/kmsg */
d025f1e4
ZJS
2056 r = server_open_dev_kmsg(s);
2057 if (r < 0)
2058 return r;
2059
7d18d348
ZJS
2060 /* Unless we got *some* sockets and not audit, open audit socket */
2061 if (s->audit_fd >= 0 || no_sockets) {
2062 r = server_open_audit(s);
2063 if (r < 0)
2064 return r;
2065 }
875c2e22 2066
d025f1e4
ZJS
2067 r = server_open_kernel_seqnum(s);
2068 if (r < 0)
2069 return r;
2070
0c24bb23
LP
2071 r = server_open_hostname(s);
2072 if (r < 0)
2073 return r;
2074
f9a810be 2075 r = setup_signals(s);
d025f1e4
ZJS
2076 if (r < 0)
2077 return r;
2078
2079 s->udev = udev_new();
2080 if (!s->udev)
2081 return -ENOMEM;
2082
f9a810be 2083 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
d025f1e4
ZJS
2084 if (!s->rate_limit)
2085 return -ENOMEM;
2086
e9174f29
LP
2087 r = cg_get_root_path(&s->cgroup_root);
2088 if (r < 0)
2089 return r;
2090
0c24bb23
LP
2091 server_cache_hostname(s);
2092 server_cache_boot_id(s);
2093 server_cache_machine_id(s);
2094
266a4700
FB
2095 s->runtime_storage.name = "Runtime journal";
2096 s->system_storage.name = "System journal";
2097
605405c6
ZJS
2098 s->runtime_storage.path = strjoin("/run/log/journal/", SERVER_MACHINE_ID(s));
2099 s->system_storage.path = strjoin("/var/log/journal/", SERVER_MACHINE_ID(s));
266a4700
FB
2100 if (!s->runtime_storage.path || !s->system_storage.path)
2101 return -ENOMEM;
2102
e22aa3d3
LP
2103 (void) server_connect_notify(s);
2104
804ae586 2105 return system_journal_open(s, false);
d025f1e4
ZJS
2106}
2107
2108void server_maybe_append_tags(Server *s) {
2109#ifdef HAVE_GCRYPT
2110 JournalFile *f;
2111 Iterator i;
2112 usec_t n;
2113
2114 n = now(CLOCK_REALTIME);
2115
2116 if (s->system_journal)
2117 journal_file_maybe_append_tag(s->system_journal, n);
2118
43cf8388 2119 ORDERED_HASHMAP_FOREACH(f, s->user_journals, i)
d025f1e4
ZJS
2120 journal_file_maybe_append_tag(f, n);
2121#endif
2122}
2123
2124void server_done(Server *s) {
2125 JournalFile *f;
2126 assert(s);
2127
b58c888f
VC
2128 if (s->deferred_closes) {
2129 journal_file_close_set(s->deferred_closes);
2130 set_free(s->deferred_closes);
2131 }
2132
d025f1e4
ZJS
2133 while (s->stdout_streams)
2134 stdout_stream_free(s->stdout_streams);
2135
2136 if (s->system_journal)
69a3a6fd 2137 (void) journal_file_close(s->system_journal);
d025f1e4
ZJS
2138
2139 if (s->runtime_journal)
69a3a6fd 2140 (void) journal_file_close(s->runtime_journal);
d025f1e4 2141
43cf8388 2142 while ((f = ordered_hashmap_steal_first(s->user_journals)))
69a3a6fd 2143 (void) journal_file_close(f);
d025f1e4 2144
43cf8388 2145 ordered_hashmap_free(s->user_journals);
d025f1e4 2146
f9a810be
LP
2147 sd_event_source_unref(s->syslog_event_source);
2148 sd_event_source_unref(s->native_event_source);
2149 sd_event_source_unref(s->stdout_event_source);
2150 sd_event_source_unref(s->dev_kmsg_event_source);
875c2e22 2151 sd_event_source_unref(s->audit_event_source);
f9a810be
LP
2152 sd_event_source_unref(s->sync_event_source);
2153 sd_event_source_unref(s->sigusr1_event_source);
2154 sd_event_source_unref(s->sigusr2_event_source);
2155 sd_event_source_unref(s->sigterm_event_source);
2156 sd_event_source_unref(s->sigint_event_source);
94b65516 2157 sd_event_source_unref(s->sigrtmin1_event_source);
0c24bb23 2158 sd_event_source_unref(s->hostname_event_source);
e22aa3d3 2159 sd_event_source_unref(s->notify_event_source);
119e9655 2160 sd_event_source_unref(s->watchdog_event_source);
f9a810be 2161 sd_event_unref(s->event);
d025f1e4 2162
03e334a1
LP
2163 safe_close(s->syslog_fd);
2164 safe_close(s->native_fd);
2165 safe_close(s->stdout_fd);
2166 safe_close(s->dev_kmsg_fd);
875c2e22 2167 safe_close(s->audit_fd);
03e334a1 2168 safe_close(s->hostname_fd);
e22aa3d3 2169 safe_close(s->notify_fd);
0c24bb23 2170
d025f1e4
ZJS
2171 if (s->rate_limit)
2172 journal_rate_limit_free(s->rate_limit);
2173
2174 if (s->kernel_seqnum)
2175 munmap(s->kernel_seqnum, sizeof(uint64_t));
2176
2177 free(s->buffer);
2178 free(s->tty_path);
e9174f29 2179 free(s->cgroup_root);
99d0966e 2180 free(s->hostname_field);
c6e9e16f
ZJS
2181 free(s->runtime_storage.path);
2182 free(s->system_storage.path);
d025f1e4
ZJS
2183
2184 if (s->mmap)
2185 mmap_cache_unref(s->mmap);
2186
3e044c49 2187 udev_unref(s->udev);
d025f1e4 2188}
8580d1f7
LP
2189
2190static const char* const storage_table[_STORAGE_MAX] = {
2191 [STORAGE_AUTO] = "auto",
2192 [STORAGE_VOLATILE] = "volatile",
2193 [STORAGE_PERSISTENT] = "persistent",
2194 [STORAGE_NONE] = "none"
2195};
2196
2197DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
2198DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
2199
2200static const char* const split_mode_table[_SPLIT_MAX] = {
2201 [SPLIT_LOGIN] = "login",
2202 [SPLIT_UID] = "uid",
2203 [SPLIT_NONE] = "none",
2204};
2205
2206DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
2207DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");