]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/journal/journald-server.c
journal-file: refactor journal_file_open_reliably()
[thirdparty/systemd.git] / src / journal / journald-server.c
CommitLineData
53e1b683 1/* SPDX-License-Identifier: LGPL-2.1+ */
d025f1e4 2
349cc4a5 3#if HAVE_SELINUX
24882e06
LP
4#include <selinux/selinux.h>
5#endif
8580d1f7
LP
6#include <sys/ioctl.h>
7#include <sys/mman.h>
8#include <sys/signalfd.h>
9#include <sys/statvfs.h>
07630cea 10#include <linux/sockios.h>
24882e06 11
8580d1f7 12#include "sd-daemon.h"
74df0fca
LP
13#include "sd-journal.h"
14#include "sd-messages.h"
8580d1f7
LP
15
16#include "acl-util.h"
b5efdb8a 17#include "alloc-util.h"
430f0182 18#include "audit-util.h"
d025f1e4 19#include "cgroup-util.h"
d025f1e4 20#include "conf-parser.h"
a0956174 21#include "dirent-util.h"
0dec689b 22#include "extract-word.h"
3ffd4af2 23#include "fd-util.h"
33d52ab9 24#include "fileio.h"
f97b34a6 25#include "format-util.h"
f4f15635 26#include "fs-util.h"
8580d1f7 27#include "hashmap.h"
958b66ea 28#include "hostname-util.h"
4b58153d 29#include "id128-util.h"
afc5dbf3 30#include "io-util.h"
8580d1f7
LP
31#include "journal-authenticate.h"
32#include "journal-file.h"
d025f1e4
ZJS
33#include "journal-internal.h"
34#include "journal-vacuum.h"
8580d1f7 35#include "journald-audit.h"
22e3a02b 36#include "journald-context.h"
d025f1e4 37#include "journald-kmsg.h"
d025f1e4 38#include "journald-native.h"
8580d1f7 39#include "journald-rate-limit.h"
3ffd4af2 40#include "journald-server.h"
8580d1f7
LP
41#include "journald-stream.h"
42#include "journald-syslog.h"
4b58153d 43#include "log.h"
07630cea
LP
44#include "missing.h"
45#include "mkdir.h"
6bedfcbb 46#include "parse-util.h"
4e731273 47#include "proc-cmdline.h"
07630cea
LP
48#include "process-util.h"
49#include "rm-rf.h"
50#include "selinux-util.h"
51#include "signal-util.h"
52#include "socket-util.h"
32917e33 53#include "stdio-util.h"
8b43440b 54#include "string-table.h"
07630cea 55#include "string-util.h"
863a5610 56#include "syslog-util.h"
22e3a02b 57#include "user-util.h"
d025f1e4 58
d025f1e4
ZJS
59#define USER_JOURNALS_MAX 1024
60
26687bf8 61#define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
7f1ad696 62#define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
3de8ff5a 63#define DEFAULT_RATE_LIMIT_BURST 10000
e150e820 64#define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
d025f1e4 65
8580d1f7 66#define RECHECK_SPACE_USEC (30*USEC_PER_SEC)
d025f1e4 67
e22aa3d3
LP
68#define NOTIFY_SNDBUF_SIZE (8*1024*1024)
69
7a24f3bf
VC
70/* The period to insert between posting changes for coalescing */
71#define POST_CHANGE_TIMER_INTERVAL_USEC (250*USEC_PER_MSEC)
72
ec20fe5f
LP
73/* Pick a good default that is likely to fit into AF_UNIX and AF_INET SOCK_DGRAM datagrams, and even leaves some room
74 * for a bit of additional metadata. */
75#define DEFAULT_LINE_MAX (48*1024)
76
e0ed6db9
FB
77static int determine_path_usage(Server *s, const char *path, uint64_t *ret_used, uint64_t *ret_free) {
78 _cleanup_closedir_ DIR *d = NULL;
79 struct dirent *de;
80 struct statvfs ss;
e0ed6db9
FB
81
82 assert(ret_used);
83 assert(ret_free);
84
266a4700 85 d = opendir(path);
e0ed6db9
FB
86 if (!d)
87 return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR,
266a4700 88 errno, "Failed to open %s: %m", path);
e0ed6db9
FB
89
90 if (fstatvfs(dirfd(d), &ss) < 0)
266a4700 91 return log_error_errno(errno, "Failed to fstatvfs(%s): %m", path);
e0ed6db9
FB
92
93 *ret_free = ss.f_bsize * ss.f_bavail;
94 *ret_used = 0;
95 FOREACH_DIRENT_ALL(de, d, break) {
96 struct stat st;
97
98 if (!endswith(de->d_name, ".journal") &&
99 !endswith(de->d_name, ".journal~"))
100 continue;
101
102 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) {
266a4700 103 log_debug_errno(errno, "Failed to stat %s/%s, ignoring: %m", path, de->d_name);
e0ed6db9
FB
104 continue;
105 }
106
107 if (!S_ISREG(st.st_mode))
108 continue;
109
110 *ret_used += (uint64_t) st.st_blocks * 512UL;
111 }
112
113 return 0;
114}
115
a0edc477 116static void cache_space_invalidate(JournalStorageSpace *space) {
67319249 117 zero(*space);
a0edc477
FB
118}
119
57f443a6 120static int cache_space_refresh(Server *s, JournalStorage *storage) {
23aba343 121 JournalStorageSpace *space;
266a4700 122 JournalMetrics *metrics;
23aba343 123 uint64_t vfs_used, vfs_avail, avail;
d025f1e4 124 usec_t ts;
e0ed6db9 125 int r;
d025f1e4 126
8580d1f7 127 assert(s);
266a4700 128
266a4700 129 metrics = &storage->metrics;
23aba343 130 space = &storage->space;
d025f1e4 131
8580d1f7 132 ts = now(CLOCK_MONOTONIC);
d025f1e4 133
3099caf2 134 if (space->timestamp != 0 && space->timestamp + RECHECK_SPACE_USEC > ts)
d025f1e4
ZJS
135 return 0;
136
23aba343 137 r = determine_path_usage(s, storage->path, &vfs_used, &vfs_avail);
e0ed6db9
FB
138 if (r < 0)
139 return r;
d025f1e4 140
23aba343
FB
141 space->vfs_used = vfs_used;
142 space->vfs_available = vfs_avail;
143
144 avail = LESS_BY(vfs_avail, metrics->keep_free);
145
23aba343
FB
146 space->limit = MIN(MAX(vfs_used + avail, metrics->min_use), metrics->max_use);
147 space->available = LESS_BY(space->limit, vfs_used);
148 space->timestamp = ts;
8580d1f7
LP
149 return 1;
150}
151
3a19f215
FB
152static void patch_min_use(JournalStorage *storage) {
153 assert(storage);
154
155 /* Let's bump the min_use limit to the current usage on disk. We do
156 * this when starting up and first opening the journal files. This way
157 * sudden spikes in disk usage will not cause journald to vacuum files
158 * without bounds. Note that this means that only a restart of journald
159 * will make it reset this value. */
160
161 storage->metrics.min_use = MAX(storage->metrics.min_use, storage->space.vfs_used);
162}
163
3a19f215 164static int determine_space(Server *s, uint64_t *available, uint64_t *limit) {
266a4700 165 JournalStorage *js;
57f443a6 166 int r;
8580d1f7
LP
167
168 assert(s);
169
266a4700 170 js = s->system_journal ? &s->system_storage : &s->runtime_storage;
57f443a6
FB
171
172 r = cache_space_refresh(s, js);
173 if (r >= 0) {
174 if (available)
175 *available = js->space.available;
176 if (limit)
177 *limit = js->space.limit;
178 }
179 return r;
d025f1e4
ZJS
180}
181
cba5629e
FB
182void server_space_usage_message(Server *s, JournalStorage *storage) {
183 char fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
184 fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX], fb6[FORMAT_BYTES_MAX];
185 JournalMetrics *metrics;
cba5629e
FB
186
187 assert(s);
188
189 if (!storage)
190 storage = s->system_journal ? &s->system_storage : &s->runtime_storage;
191
57f443a6 192 if (cache_space_refresh(s, storage) < 0)
cba5629e
FB
193 return;
194
195 metrics = &storage->metrics;
23aba343 196 format_bytes(fb1, sizeof(fb1), storage->space.vfs_used);
cba5629e
FB
197 format_bytes(fb2, sizeof(fb2), metrics->max_use);
198 format_bytes(fb3, sizeof(fb3), metrics->keep_free);
23aba343 199 format_bytes(fb4, sizeof(fb4), storage->space.vfs_available);
cba5629e
FB
200 format_bytes(fb5, sizeof(fb5), storage->space.limit);
201 format_bytes(fb6, sizeof(fb6), storage->space.available);
202
13181942
LP
203 server_driver_message(s, 0,
204 "MESSAGE_ID=" SD_MESSAGE_JOURNAL_USAGE_STR,
cba5629e
FB
205 LOG_MESSAGE("%s (%s) is %s, max %s, %s free.",
206 storage->name, storage->path, fb1, fb5, fb6),
207 "JOURNAL_NAME=%s", storage->name,
208 "JOURNAL_PATH=%s", storage->path,
23aba343 209 "CURRENT_USE=%"PRIu64, storage->space.vfs_used,
cba5629e
FB
210 "CURRENT_USE_PRETTY=%s", fb1,
211 "MAX_USE=%"PRIu64, metrics->max_use,
212 "MAX_USE_PRETTY=%s", fb2,
213 "DISK_KEEP_FREE=%"PRIu64, metrics->keep_free,
214 "DISK_KEEP_FREE_PRETTY=%s", fb3,
23aba343 215 "DISK_AVAILABLE=%"PRIu64, storage->space.vfs_available,
cba5629e
FB
216 "DISK_AVAILABLE_PRETTY=%s", fb4,
217 "LIMIT=%"PRIu64, storage->space.limit,
218 "LIMIT_PRETTY=%s", fb5,
219 "AVAILABLE=%"PRIu64, storage->space.available,
220 "AVAILABLE_PRETTY=%s", fb6,
221 NULL);
222}
223
2fce06b0
LP
224static bool uid_for_system_journal(uid_t uid) {
225
226 /* Returns true if the specified UID shall get its data stored in the system journal*/
227
228 return uid_is_system(uid) || uid_is_dynamic(uid) || uid == UID_NOBODY;
229}
230
5c3bde3f 231static void server_add_acls(JournalFile *f, uid_t uid) {
349cc4a5 232#if HAVE_ACL
5c3bde3f 233 int r;
d025f1e4 234#endif
d025f1e4
ZJS
235 assert(f);
236
349cc4a5 237#if HAVE_ACL
2fce06b0 238 if (uid_for_system_journal(uid))
d025f1e4
ZJS
239 return;
240
5c3bde3f
ZJS
241 r = add_acls_for_user(f->fd, uid);
242 if (r < 0)
243 log_warning_errno(r, "Failed to set ACL on %s, ignoring: %m", f->path);
d025f1e4
ZJS
244#endif
245}
246
7a24f3bf
VC
247static int open_journal(
248 Server *s,
249 bool reliably,
250 const char *fname,
251 int flags,
252 bool seal,
253 JournalMetrics *metrics,
7a24f3bf
VC
254 JournalFile **ret) {
255 int r;
e167d7fd 256 JournalFile *f;
7a24f3bf
VC
257
258 assert(s);
259 assert(fname);
260 assert(ret);
261
262 if (reliably)
1b7cf0e5
AG
263 r = journal_file_open_reliably(fname, flags, 0640, s->compress.enabled, s->compress.threshold_bytes,
264 seal, metrics, s->mmap, s->deferred_closes, NULL, &f);
7a24f3bf 265 else
1b7cf0e5
AG
266 r = journal_file_open(-1, fname, flags, 0640, s->compress.enabled, s->compress.threshold_bytes, seal,
267 metrics, s->mmap, s->deferred_closes, NULL, &f);
268
7a24f3bf
VC
269 if (r < 0)
270 return r;
271
e167d7fd 272 r = journal_file_enable_post_change_timer(f, s->event, POST_CHANGE_TIMER_INTERVAL_USEC);
7a24f3bf 273 if (r < 0) {
69a3a6fd 274 (void) journal_file_close(f);
7a24f3bf
VC
275 return r;
276 }
277
e167d7fd 278 *ret = f;
7a24f3bf
VC
279 return r;
280}
281
6431c7e2 282static bool flushed_flag_is_set(void) {
f78273c8 283 return access("/run/systemd/journal/flushed", F_OK) >= 0;
6431c7e2
VC
284}
285
105bdb46
VC
286static int system_journal_open(Server *s, bool flush_requested) {
287 const char *fn;
288 int r = 0;
289
290 if (!s->system_journal &&
f78273c8
LP
291 IN_SET(s->storage, STORAGE_PERSISTENT, STORAGE_AUTO) &&
292 (flush_requested || flushed_flag_is_set())) {
105bdb46
VC
293
294 /* If in auto mode: first try to create the machine
295 * path, but not the prefix.
296 *
297 * If in persistent mode: create /var/log/journal and
298 * the machine path */
299
300 if (s->storage == STORAGE_PERSISTENT)
301 (void) mkdir_p("/var/log/journal/", 0755);
302
266a4700 303 (void) mkdir(s->system_storage.path, 0755);
105bdb46 304
266a4700
FB
305 fn = strjoina(s->system_storage.path, "/system.journal");
306 r = open_journal(s, true, fn, O_RDWR|O_CREAT, s->seal, &s->system_storage.metrics, &s->system_journal);
105bdb46
VC
307 if (r >= 0) {
308 server_add_acls(s->system_journal, 0);
57f443a6 309 (void) cache_space_refresh(s, &s->system_storage);
3a19f215 310 patch_min_use(&s->system_storage);
29bfb683 311 } else {
4c701096 312 if (!IN_SET(r, -ENOENT, -EROFS))
105bdb46
VC
313 log_warning_errno(r, "Failed to open system journal: %m");
314
315 r = 0;
316 }
929eeb54
VC
317
318 /* If the runtime journal is open, and we're post-flush, we're
319 * recovering from a failed system journal rotate (ENOSPC)
320 * for which the runtime journal was reopened.
321 *
322 * Perform an implicit flush to var, leaving the runtime
323 * journal closed, now that the system journal is back.
324 */
f78273c8
LP
325 if (!flush_requested)
326 (void) server_flush_to_var(s, true);
105bdb46
VC
327 }
328
329 if (!s->runtime_journal &&
330 (s->storage != STORAGE_NONE)) {
331
266a4700 332 fn = strjoina(s->runtime_storage.path, "/system.journal");
105bdb46
VC
333
334 if (s->system_journal) {
335
336 /* Try to open the runtime journal, but only
337 * if it already exists, so that we can flush
338 * it into the system journal */
339
266a4700 340 r = open_journal(s, false, fn, O_RDWR, false, &s->runtime_storage.metrics, &s->runtime_journal);
105bdb46
VC
341 if (r < 0) {
342 if (r != -ENOENT)
343 log_warning_errno(r, "Failed to open runtime journal: %m");
344
345 r = 0;
346 }
347
348 } else {
349
350 /* OK, we really need the runtime journal, so create
351 * it if necessary. */
352
353 (void) mkdir("/run/log", 0755);
354 (void) mkdir("/run/log/journal", 0755);
355 (void) mkdir_parents(fn, 0750);
356
266a4700 357 r = open_journal(s, true, fn, O_RDWR|O_CREAT, false, &s->runtime_storage.metrics, &s->runtime_journal);
105bdb46
VC
358 if (r < 0)
359 return log_error_errno(r, "Failed to open runtime journal: %m");
360 }
361
362 if (s->runtime_journal) {
363 server_add_acls(s->runtime_journal, 0);
57f443a6 364 (void) cache_space_refresh(s, &s->runtime_storage);
3a19f215 365 patch_min_use(&s->runtime_storage);
105bdb46
VC
366 }
367 }
368
369 return r;
370}
371
d025f1e4 372static JournalFile* find_journal(Server *s, uid_t uid) {
ed375beb 373 _cleanup_free_ char *p = NULL;
d025f1e4
ZJS
374 int r;
375 JournalFile *f;
376 sd_id128_t machine;
377
378 assert(s);
379
105bdb46
VC
380 /* A rotate that fails to create the new journal (ENOSPC) leaves the
381 * rotated journal as NULL. Unless we revisit opening, even after
382 * space is made available we'll continue to return NULL indefinitely.
383 *
384 * system_journal_open() is a noop if the journals are already open, so
385 * we can just call it here to recover from failed rotates (or anything
386 * else that's left the journals as NULL).
387 *
388 * Fixes https://github.com/systemd/systemd/issues/3968 */
389 (void) system_journal_open(s, false);
390
d025f1e4
ZJS
391 /* We split up user logs only on /var, not on /run. If the
392 * runtime file is open, we write to it exclusively, in order
393 * to guarantee proper order as soon as we flush /run to
394 * /var and close the runtime file. */
395
396 if (s->runtime_journal)
397 return s->runtime_journal;
398
2fce06b0 399 if (uid_for_system_journal(uid))
d025f1e4
ZJS
400 return s->system_journal;
401
402 r = sd_id128_get_machine(&machine);
403 if (r < 0)
404 return s->system_journal;
405
4a0b58c4 406 f = ordered_hashmap_get(s->user_journals, UID_TO_PTR(uid));
d025f1e4
ZJS
407 if (f)
408 return f;
409
de0671ee
ZJS
410 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-"UID_FMT".journal",
411 SD_ID128_FORMAT_VAL(machine), uid) < 0)
d025f1e4
ZJS
412 return s->system_journal;
413
43cf8388 414 while (ordered_hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
d025f1e4 415 /* Too many open? Then let's close one */
43cf8388 416 f = ordered_hashmap_steal_first(s->user_journals);
d025f1e4 417 assert(f);
69a3a6fd 418 (void) journal_file_close(f);
d025f1e4
ZJS
419 }
420
266a4700 421 r = open_journal(s, true, p, O_RDWR|O_CREAT, s->seal, &s->system_storage.metrics, &f);
d025f1e4
ZJS
422 if (r < 0)
423 return s->system_journal;
424
5c3bde3f 425 server_add_acls(f, uid);
d025f1e4 426
4a0b58c4 427 r = ordered_hashmap_put(s->user_journals, UID_TO_PTR(uid), f);
d025f1e4 428 if (r < 0) {
69a3a6fd 429 (void) journal_file_close(f);
d025f1e4
ZJS
430 return s->system_journal;
431 }
432
433 return f;
434}
435
ea69bd41
LP
436static int do_rotate(
437 Server *s,
438 JournalFile **f,
439 const char* name,
440 bool seal,
441 uint32_t uid) {
442
fc55baee
ZJS
443 int r;
444 assert(s);
445
446 if (!*f)
447 return -EINVAL;
448
1b7cf0e5 449 r = journal_file_rotate(f, s->compress.enabled, s->compress.threshold_bytes, seal, s->deferred_closes);
bb6b922f 450 if (r < 0) {
fc55baee 451 if (*f)
bb6b922f 452 return log_error_errno(r, "Failed to rotate %s: %m", (*f)->path);
fc55baee 453 else
bb6b922f
YW
454 return log_error_errno(r, "Failed to create new %s journal: %m", name);
455 }
456
457 server_add_acls(*f, uid);
2678031a 458
fc55baee
ZJS
459 return r;
460}
461
f760d8a8
LP
462static void server_process_deferred_closes(Server *s) {
463 JournalFile *f;
464 Iterator i;
465
466 /* Perform any deferred closes which aren't still offlining. */
467 SET_FOREACH(f, s->deferred_closes, i)
468 if (!journal_file_is_offlining(f)) {
469 (void) set_remove(s->deferred_closes, f);
470 (void) journal_file_close(f);
471 }
472}
473
d025f1e4
ZJS
474void server_rotate(Server *s) {
475 JournalFile *f;
476 void *k;
477 Iterator i;
478 int r;
479
480 log_debug("Rotating...");
481
8580d1f7
LP
482 (void) do_rotate(s, &s->runtime_journal, "runtime", false, 0);
483 (void) do_rotate(s, &s->system_journal, "system", s->seal, 0);
d025f1e4 484
43cf8388 485 ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
4a0b58c4 486 r = do_rotate(s, &f, "user", s->seal, PTR_TO_UID(k));
fc55baee 487 if (r >= 0)
43cf8388 488 ordered_hashmap_replace(s->user_journals, k, f);
fc55baee
ZJS
489 else if (!f)
490 /* Old file has been closed and deallocated */
43cf8388 491 ordered_hashmap_remove(s->user_journals, k);
d025f1e4 492 }
b58c888f 493
f760d8a8 494 server_process_deferred_closes(s);
d025f1e4
ZJS
495}
496
26687bf8
OS
497void server_sync(Server *s) {
498 JournalFile *f;
26687bf8
OS
499 Iterator i;
500 int r;
501
26687bf8 502 if (s->system_journal) {
ac2e41f5 503 r = journal_file_set_offline(s->system_journal, false);
26687bf8 504 if (r < 0)
65089b82 505 log_warning_errno(r, "Failed to sync system journal, ignoring: %m");
26687bf8
OS
506 }
507
65c1d46b 508 ORDERED_HASHMAP_FOREACH(f, s->user_journals, i) {
ac2e41f5 509 r = journal_file_set_offline(f, false);
26687bf8 510 if (r < 0)
65089b82 511 log_warning_errno(r, "Failed to sync user journal, ignoring: %m");
26687bf8
OS
512 }
513
f9a810be
LP
514 if (s->sync_event_source) {
515 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
516 if (r < 0)
da927ba9 517 log_error_errno(r, "Failed to disable sync timer source: %m");
f9a810be 518 }
26687bf8
OS
519
520 s->sync_scheduled = false;
521}
522
3a19f215 523static void do_vacuum(Server *s, JournalStorage *storage, bool verbose) {
ea69bd41 524
63c8666b
ZJS
525 int r;
526
8580d1f7 527 assert(s);
266a4700 528 assert(storage);
8580d1f7 529
57f443a6 530 (void) cache_space_refresh(s, storage);
18e758bf
FB
531
532 if (verbose)
533 server_space_usage_message(s, storage);
8580d1f7 534
57f443a6
FB
535 r = journal_directory_vacuum(storage->path, storage->space.limit,
536 storage->metrics.n_max_files, s->max_retention_usec,
537 &s->oldest_file_usec, verbose);
63c8666b 538 if (r < 0 && r != -ENOENT)
266a4700
FB
539 log_warning_errno(r, "Failed to vacuum %s, ignoring: %m", storage->path);
540
a0edc477 541 cache_space_invalidate(&storage->space);
63c8666b
ZJS
542}
543
3a19f215 544int server_vacuum(Server *s, bool verbose) {
8580d1f7 545 assert(s);
d025f1e4
ZJS
546
547 log_debug("Vacuuming...");
548
549 s->oldest_file_usec = 0;
550
266a4700 551 if (s->system_journal)
3a19f215 552 do_vacuum(s, &s->system_storage, verbose);
266a4700 553 if (s->runtime_journal)
3a19f215 554 do_vacuum(s, &s->runtime_storage, verbose);
d025f1e4 555
8580d1f7 556 return 0;
d025f1e4
ZJS
557}
558
0c24bb23
LP
559static void server_cache_machine_id(Server *s) {
560 sd_id128_t id;
561 int r;
562
563 assert(s);
564
565 r = sd_id128_get_machine(&id);
566 if (r < 0)
567 return;
568
569 sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
570}
571
572static void server_cache_boot_id(Server *s) {
573 sd_id128_t id;
574 int r;
575
576 assert(s);
577
578 r = sd_id128_get_boot(&id);
579 if (r < 0)
580 return;
581
582 sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
583}
584
585static void server_cache_hostname(Server *s) {
586 _cleanup_free_ char *t = NULL;
587 char *x;
588
589 assert(s);
590
591 t = gethostname_malloc();
592 if (!t)
593 return;
594
595 x = strappend("_HOSTNAME=", t);
596 if (!x)
597 return;
598
599 free(s->hostname_field);
600 s->hostname_field = x;
601}
602
8531ae70 603static bool shall_try_append_again(JournalFile *f, int r) {
6e1045e5 604 switch(r) {
ae739cc1 605
6e1045e5
ZJS
606 case -E2BIG: /* Hit configured limit */
607 case -EFBIG: /* Hit fs limit */
608 case -EDQUOT: /* Quota limit hit */
609 case -ENOSPC: /* Disk full */
d025f1e4 610 log_debug("%s: Allocation limit reached, rotating.", f->path);
6e1045e5 611 return true;
ae739cc1 612
6e1045e5
ZJS
613 case -EIO: /* I/O error of some kind (mmap) */
614 log_warning("%s: IO error, rotating.", f->path);
615 return true;
ae739cc1 616
6e1045e5 617 case -EHOSTDOWN: /* Other machine */
d025f1e4 618 log_info("%s: Journal file from other machine, rotating.", f->path);
6e1045e5 619 return true;
ae739cc1 620
6e1045e5 621 case -EBUSY: /* Unclean shutdown */
d025f1e4 622 log_info("%s: Unclean shutdown, rotating.", f->path);
6e1045e5 623 return true;
ae739cc1 624
6e1045e5 625 case -EPROTONOSUPPORT: /* Unsupported feature */
d025f1e4 626 log_info("%s: Unsupported feature, rotating.", f->path);
6e1045e5 627 return true;
ae739cc1 628
6e1045e5
ZJS
629 case -EBADMSG: /* Corrupted */
630 case -ENODATA: /* Truncated */
631 case -ESHUTDOWN: /* Already archived */
d025f1e4 632 log_warning("%s: Journal file corrupted, rotating.", f->path);
6e1045e5 633 return true;
ae739cc1 634
6e1045e5 635 case -EIDRM: /* Journal file has been deleted */
2678031a 636 log_warning("%s: Journal file has been deleted, rotating.", f->path);
6e1045e5 637 return true;
ae739cc1
LP
638
639 case -ETXTBSY: /* Journal file is from the future */
c1a9199e 640 log_warning("%s: Journal file is from the future, rotating.", f->path);
ae739cc1
LP
641 return true;
642
6e1045e5 643 default:
d025f1e4 644 return false;
6e1045e5 645 }
d025f1e4
ZJS
646}
647
da6053d0 648static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, size_t n, int priority) {
7c070017 649 bool vacuumed = false, rotate = false;
0f972d66 650 struct dual_timestamp ts;
d025f1e4 651 JournalFile *f;
d025f1e4
ZJS
652 int r;
653
654 assert(s);
655 assert(iovec);
656 assert(n > 0);
657
0f972d66
LP
658 /* Get the closest, linearized time we have for this log event from the event loop. (Note that we do not use
659 * the source time, and not even the time the event was originally seen, but instead simply the time we started
660 * processing it, as we want strictly linear ordering in what we write out.) */
661 assert_se(sd_event_now(s->event, CLOCK_REALTIME, &ts.realtime) >= 0);
662 assert_se(sd_event_now(s->event, CLOCK_MONOTONIC, &ts.monotonic) >= 0);
663
7c070017
LP
664 if (ts.realtime < s->last_realtime_clock) {
665 /* When the time jumps backwards, let's immediately rotate. Of course, this should not happen during
666 * regular operation. However, when it does happen, then we should make sure that we start fresh files
667 * to ensure that the entries in the journal files are strictly ordered by time, in order to ensure
668 * bisection works correctly. */
d025f1e4 669
7c070017
LP
670 log_debug("Time jumped backwards, rotating.");
671 rotate = true;
672 } else {
673
674 f = find_journal(s, uid);
675 if (!f)
676 return;
677
678 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
679 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
680 rotate = true;
681 }
682 }
d025f1e4 683
7c070017 684 if (rotate) {
d025f1e4 685 server_rotate(s);
3a19f215 686 server_vacuum(s, false);
d025f1e4
ZJS
687 vacuumed = true;
688
689 f = find_journal(s, uid);
690 if (!f)
691 return;
692 }
693
7c070017
LP
694 s->last_realtime_clock = ts.realtime;
695
d180c349 696 r = journal_file_append_entry(f, &ts, NULL, iovec, n, &s->seqnum, NULL, NULL);
26687bf8 697 if (r >= 0) {
d07f7b9e 698 server_schedule_sync(s, priority);
d025f1e4 699 return;
26687bf8 700 }
d025f1e4
ZJS
701
702 if (vacuumed || !shall_try_append_again(f, r)) {
da6053d0 703 log_error_errno(r, "Failed to write entry (%zu items, %zu bytes), ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
d025f1e4
ZJS
704 return;
705 }
706
707 server_rotate(s);
3a19f215 708 server_vacuum(s, false);
d025f1e4
ZJS
709
710 f = find_journal(s, uid);
711 if (!f)
712 return;
713
714 log_debug("Retrying write.");
d180c349 715 r = journal_file_append_entry(f, &ts, NULL, iovec, n, &s->seqnum, NULL, NULL);
8266e1c0 716 if (r < 0)
da6053d0 717 log_error_errno(r, "Failed to write entry (%zu items, %zu bytes) despite vacuuming, ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
8266e1c0 718 else
d07f7b9e 719 server_schedule_sync(s, priority);
d025f1e4
ZJS
720}
721
22e3a02b
LP
722#define IOVEC_ADD_NUMERIC_FIELD(iovec, n, value, type, isset, format, field) \
723 if (isset(value)) { \
724 char *k; \
fbd0b64f 725 k = newa(char, STRLEN(field "=") + DECIMAL_STR_MAX(type) + 1); \
22e3a02b 726 sprintf(k, field "=" format, value); \
e6a7ec4b 727 iovec[n++] = IOVEC_MAKE_STRING(k); \
22e3a02b 728 }
4b58153d 729
22e3a02b
LP
730#define IOVEC_ADD_STRING_FIELD(iovec, n, value, field) \
731 if (!isempty(value)) { \
732 char *k; \
733 k = strjoina(field "=", value); \
e6a7ec4b 734 iovec[n++] = IOVEC_MAKE_STRING(k); \
22e3a02b 735 }
4b58153d 736
22e3a02b
LP
737#define IOVEC_ADD_ID128_FIELD(iovec, n, value, field) \
738 if (!sd_id128_is_null(value)) { \
739 char *k; \
fbd0b64f 740 k = newa(char, STRLEN(field "=") + SD_ID128_STRING_MAX); \
22e3a02b 741 sd_id128_to_string(value, stpcpy(k, field "=")); \
e6a7ec4b 742 iovec[n++] = IOVEC_MAKE_STRING(k); \
22e3a02b 743 }
4b58153d 744
22e3a02b
LP
745#define IOVEC_ADD_SIZED_FIELD(iovec, n, value, value_size, field) \
746 if (value_size > 0) { \
747 char *k; \
fbd0b64f 748 k = newa(char, STRLEN(field "=") + value_size + 1); \
22e3a02b 749 *((char*) mempcpy(stpcpy(k, field "="), value, value_size)) = 0; \
e6a7ec4b 750 iovec[n++] = IOVEC_MAKE_STRING(k); \
22e3a02b 751 } \
4b58153d 752
d025f1e4
ZJS
753static void dispatch_message_real(
754 Server *s,
d3070fbd 755 struct iovec *iovec, size_t n, size_t m,
22e3a02b 756 const ClientContext *c,
3b3154df 757 const struct timeval *tv,
d07f7b9e 758 int priority,
22e3a02b
LP
759 pid_t object_pid) {
760
761 char source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)];
762 uid_t journal_uid;
763 ClientContext *o;
d025f1e4
ZJS
764
765 assert(s);
766 assert(iovec);
767 assert(n > 0);
d3070fbd
LP
768 assert(n +
769 N_IOVEC_META_FIELDS +
770 (pid_is_valid(object_pid) ? N_IOVEC_OBJECT_FIELDS : 0) +
771 client_context_extra_fields_n_iovec(c) <= m);
19cace37 772
22e3a02b
LP
773 if (c) {
774 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->pid, pid_t, pid_is_valid, PID_FMT, "_PID");
775 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->uid, uid_t, uid_is_valid, UID_FMT, "_UID");
776 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->gid, gid_t, gid_is_valid, GID_FMT, "_GID");
4b58153d 777
22e3a02b
LP
778 IOVEC_ADD_STRING_FIELD(iovec, n, c->comm, "_COMM");
779 IOVEC_ADD_STRING_FIELD(iovec, n, c->exe, "_EXE");
780 IOVEC_ADD_STRING_FIELD(iovec, n, c->cmdline, "_CMDLINE");
781 IOVEC_ADD_STRING_FIELD(iovec, n, c->capeff, "_CAP_EFFECTIVE");
d025f1e4 782
22e3a02b 783 IOVEC_ADD_SIZED_FIELD(iovec, n, c->label, c->label_size, "_SELINUX_CONTEXT");
ae018d9b 784
22e3a02b
LP
785 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->auditid, uint32_t, audit_session_is_valid, "%" PRIu32, "_AUDIT_SESSION");
786 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->loginuid, uid_t, uid_is_valid, UID_FMT, "_AUDIT_LOGINUID");
d025f1e4 787
22e3a02b
LP
788 IOVEC_ADD_STRING_FIELD(iovec, n, c->cgroup, "_SYSTEMD_CGROUP");
789 IOVEC_ADD_STRING_FIELD(iovec, n, c->session, "_SYSTEMD_SESSION");
790 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->owner_uid, uid_t, uid_is_valid, UID_FMT, "_SYSTEMD_OWNER_UID");
791 IOVEC_ADD_STRING_FIELD(iovec, n, c->unit, "_SYSTEMD_UNIT");
792 IOVEC_ADD_STRING_FIELD(iovec, n, c->user_unit, "_SYSTEMD_USER_UNIT");
793 IOVEC_ADD_STRING_FIELD(iovec, n, c->slice, "_SYSTEMD_SLICE");
794 IOVEC_ADD_STRING_FIELD(iovec, n, c->user_slice, "_SYSTEMD_USER_SLICE");
e7ff4e7f 795
22e3a02b 796 IOVEC_ADD_ID128_FIELD(iovec, n, c->invocation_id, "_SYSTEMD_INVOCATION_ID");
d3070fbd
LP
797
798 if (c->extra_fields_n_iovec > 0) {
799 memcpy(iovec + n, c->extra_fields_iovec, c->extra_fields_n_iovec * sizeof(struct iovec));
800 n += c->extra_fields_n_iovec;
801 }
d025f1e4 802 }
968f3196 803
22e3a02b 804 assert(n <= m);
968f3196 805
22e3a02b 806 if (pid_is_valid(object_pid) && client_context_get(s, object_pid, NULL, NULL, 0, NULL, &o) >= 0) {
968f3196 807
22e3a02b
LP
808 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->pid, pid_t, pid_is_valid, PID_FMT, "OBJECT_PID");
809 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->uid, uid_t, uid_is_valid, UID_FMT, "OBJECT_UID");
810 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->gid, gid_t, gid_is_valid, GID_FMT, "OBJECT_GID");
968f3196 811
22e3a02b
LP
812 IOVEC_ADD_STRING_FIELD(iovec, n, o->comm, "OBJECT_COMM");
813 IOVEC_ADD_STRING_FIELD(iovec, n, o->exe, "OBJECT_EXE");
814 IOVEC_ADD_STRING_FIELD(iovec, n, o->cmdline, "OBJECT_CMDLINE");
815 IOVEC_ADD_STRING_FIELD(iovec, n, o->capeff, "OBJECT_CAP_EFFECTIVE");
968f3196 816
22e3a02b 817 IOVEC_ADD_SIZED_FIELD(iovec, n, o->label, o->label_size, "OBJECT_SELINUX_CONTEXT");
19cace37 818
22e3a02b
LP
819 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->auditid, uint32_t, audit_session_is_valid, "%" PRIu32, "OBJECT_AUDIT_SESSION");
820 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->loginuid, uid_t, uid_is_valid, UID_FMT, "OBJECT_AUDIT_LOGINUID");
d473176a 821
22e3a02b
LP
822 IOVEC_ADD_STRING_FIELD(iovec, n, o->cgroup, "OBJECT_SYSTEMD_CGROUP");
823 IOVEC_ADD_STRING_FIELD(iovec, n, o->session, "OBJECT_SYSTEMD_SESSION");
824 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->owner_uid, uid_t, uid_is_valid, UID_FMT, "OBJECT_SYSTEMD_OWNER_UID");
825 IOVEC_ADD_STRING_FIELD(iovec, n, o->unit, "OBJECT_SYSTEMD_UNIT");
826 IOVEC_ADD_STRING_FIELD(iovec, n, o->user_unit, "OBJECT_SYSTEMD_USER_UNIT");
827 IOVEC_ADD_STRING_FIELD(iovec, n, o->slice, "OBJECT_SYSTEMD_SLICE");
828 IOVEC_ADD_STRING_FIELD(iovec, n, o->user_slice, "OBJECT_SYSTEMD_USER_SLICE");
d473176a 829
22e3a02b 830 IOVEC_ADD_ID128_FIELD(iovec, n, o->invocation_id, "OBJECT_SYSTEMD_INVOCATION_ID=");
968f3196 831 }
22e3a02b 832
968f3196 833 assert(n <= m);
d025f1e4
ZJS
834
835 if (tv) {
398a50cd 836 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=" USEC_FMT, timeval_load(tv));
e6a7ec4b 837 iovec[n++] = IOVEC_MAKE_STRING(source_time);
d025f1e4
ZJS
838 }
839
840 /* Note that strictly speaking storing the boot id here is
841 * redundant since the entry includes this in-line
842 * anyway. However, we need this indexed, too. */
0c24bb23 843 if (!isempty(s->boot_id_field))
e6a7ec4b 844 iovec[n++] = IOVEC_MAKE_STRING(s->boot_id_field);
d025f1e4 845
0c24bb23 846 if (!isempty(s->machine_id_field))
e6a7ec4b 847 iovec[n++] = IOVEC_MAKE_STRING(s->machine_id_field);
d025f1e4 848
0c24bb23 849 if (!isempty(s->hostname_field))
e6a7ec4b 850 iovec[n++] = IOVEC_MAKE_STRING(s->hostname_field);
d025f1e4
ZJS
851
852 assert(n <= m);
853
22e3a02b
LP
854 if (s->split_mode == SPLIT_UID && c && uid_is_valid(c->uid))
855 /* Split up strictly by (non-root) UID */
856 journal_uid = c->uid;
857 else if (s->split_mode == SPLIT_LOGIN && c && c->uid > 0 && uid_is_valid(c->owner_uid))
edc3797f
LP
858 /* Split up by login UIDs. We do this only if the
859 * realuid is not root, in order not to accidentally
860 * leak privileged information to the user that is
861 * logged by a privileged process that is part of an
7517e174 862 * unprivileged session. */
22e3a02b 863 journal_uid = c->owner_uid;
da499392
KS
864 else
865 journal_uid = 0;
759c945a 866
d07f7b9e 867 write_to_journal(s, journal_uid, iovec, n, priority);
d025f1e4
ZJS
868}
869
13181942 870void server_driver_message(Server *s, pid_t object_pid, const char *message_id, const char *format, ...) {
22e3a02b 871
d3070fbd
LP
872 struct iovec *iovec;
873 size_t n = 0, k, m;
d025f1e4 874 va_list ap;
22e3a02b 875 int r;
d025f1e4
ZJS
876
877 assert(s);
878 assert(format);
879
f643ae71 880 m = N_IOVEC_META_FIELDS + 5 + N_IOVEC_PAYLOAD_FIELDS + client_context_extra_fields_n_iovec(s->my_context) + N_IOVEC_OBJECT_FIELDS;
d3070fbd
LP
881 iovec = newa(struct iovec, m);
882
4850d39a 883 assert_cc(3 == LOG_FAC(LOG_DAEMON));
e6a7ec4b
LP
884 iovec[n++] = IOVEC_MAKE_STRING("SYSLOG_FACILITY=3");
885 iovec[n++] = IOVEC_MAKE_STRING("SYSLOG_IDENTIFIER=systemd-journald");
b6fa2555 886
e6a7ec4b 887 iovec[n++] = IOVEC_MAKE_STRING("_TRANSPORT=driver");
4850d39a 888 assert_cc(6 == LOG_INFO);
e6a7ec4b 889 iovec[n++] = IOVEC_MAKE_STRING("PRIORITY=6");
d025f1e4 890
2b044526 891 if (message_id)
e6a7ec4b 892 iovec[n++] = IOVEC_MAKE_STRING(message_id);
d3070fbd 893 k = n;
8a03c9ef
ZJS
894
895 va_start(ap, format);
d3070fbd 896 r = log_format_iovec(iovec, m, &n, false, 0, format, ap);
32917e33 897 /* Error handling below */
8a03c9ef
ZJS
898 va_end(ap);
899
32917e33 900 if (r >= 0)
d3070fbd 901 dispatch_message_real(s, iovec, n, m, s->my_context, NULL, LOG_INFO, object_pid);
8a03c9ef 902
d3070fbd
LP
903 while (k < n)
904 free(iovec[k++].iov_base);
32917e33
ZJS
905
906 if (r < 0) {
907 /* We failed to format the message. Emit a warning instead. */
908 char buf[LINE_MAX];
909
910 xsprintf(buf, "MESSAGE=Entry printing failed: %s", strerror(-r));
911
912 n = 3;
e6a7ec4b
LP
913 iovec[n++] = IOVEC_MAKE_STRING("PRIORITY=4");
914 iovec[n++] = IOVEC_MAKE_STRING(buf);
d3070fbd 915 dispatch_message_real(s, iovec, n, m, s->my_context, NULL, LOG_INFO, object_pid);
32917e33 916 }
d025f1e4
ZJS
917}
918
919void server_dispatch_message(
920 Server *s,
d3070fbd 921 struct iovec *iovec, size_t n, size_t m,
22e3a02b 922 ClientContext *c,
3b3154df 923 const struct timeval *tv,
968f3196
ZJS
924 int priority,
925 pid_t object_pid) {
d025f1e4 926
8580d1f7 927 uint64_t available = 0;
22e3a02b 928 int rl;
d025f1e4
ZJS
929
930 assert(s);
931 assert(iovec || n == 0);
932
933 if (n == 0)
934 return;
935
936 if (LOG_PRI(priority) > s->max_level_store)
937 return;
938
2f5df74a
HHPF
939 /* Stop early in case the information will not be stored
940 * in a journal. */
941 if (s->storage == STORAGE_NONE)
942 return;
943
22e3a02b
LP
944 if (c && c->unit) {
945 (void) determine_space(s, &available, NULL);
d025f1e4 946
90fc172e 947 rl = journal_rate_limit_test(s->rate_limit, c->unit, c->log_rate_limit_interval, c->log_rate_limit_burst, priority & LOG_PRIMASK, available);
22e3a02b
LP
948 if (rl == 0)
949 return;
d025f1e4 950
22e3a02b
LP
951 /* Write a suppression message if we suppressed something */
952 if (rl > 1)
13181942
LP
953 server_driver_message(s, c->pid,
954 "MESSAGE_ID=" SD_MESSAGE_JOURNAL_DROPPED_STR,
955 LOG_MESSAGE("Suppressed %i messages from %s", rl - 1, c->unit),
5908ff1c 956 "N_DROPPED=%i", rl - 1,
22e3a02b 957 NULL);
d025f1e4
ZJS
958 }
959
22e3a02b 960 dispatch_message_real(s, iovec, n, m, c, tv, priority, object_pid);
d025f1e4
ZJS
961}
962
f78273c8 963int server_flush_to_var(Server *s, bool require_flag_file) {
d025f1e4
ZJS
964 sd_id128_t machine;
965 sd_journal *j = NULL;
fbb63411
LP
966 char ts[FORMAT_TIMESPAN_MAX];
967 usec_t start;
968 unsigned n = 0;
969 int r;
d025f1e4
ZJS
970
971 assert(s);
972
f78273c8 973 if (!IN_SET(s->storage, STORAGE_AUTO, STORAGE_PERSISTENT))
d025f1e4
ZJS
974 return 0;
975
976 if (!s->runtime_journal)
977 return 0;
978
f78273c8
LP
979 if (require_flag_file && !flushed_flag_is_set())
980 return 0;
981
8580d1f7 982 (void) system_journal_open(s, true);
d025f1e4
ZJS
983
984 if (!s->system_journal)
985 return 0;
986
987 log_debug("Flushing to /var...");
988
fbb63411
LP
989 start = now(CLOCK_MONOTONIC);
990
d025f1e4 991 r = sd_id128_get_machine(&machine);
00a16861 992 if (r < 0)
d025f1e4 993 return r;
d025f1e4
ZJS
994
995 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
23bbb0de
MS
996 if (r < 0)
997 return log_error_errno(r, "Failed to read runtime journal: %m");
d025f1e4 998
93b73b06
LP
999 sd_journal_set_data_threshold(j, 0);
1000
d025f1e4
ZJS
1001 SD_JOURNAL_FOREACH(j) {
1002 Object *o = NULL;
1003 JournalFile *f;
1004
1005 f = j->current_file;
1006 assert(f && f->current_offset > 0);
1007
fbb63411
LP
1008 n++;
1009
d025f1e4
ZJS
1010 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1011 if (r < 0) {
da927ba9 1012 log_error_errno(r, "Can't read entry: %m");
d025f1e4
ZJS
1013 goto finish;
1014 }
1015
5a271b08 1016 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset);
d025f1e4
ZJS
1017 if (r >= 0)
1018 continue;
1019
1020 if (!shall_try_append_again(s->system_journal, r)) {
da927ba9 1021 log_error_errno(r, "Can't write entry: %m");
d025f1e4
ZJS
1022 goto finish;
1023 }
1024
1025 server_rotate(s);
3a19f215 1026 server_vacuum(s, false);
d025f1e4 1027
253f59df
LP
1028 if (!s->system_journal) {
1029 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1030 r = -EIO;
1031 goto finish;
1032 }
1033
d025f1e4 1034 log_debug("Retrying write.");
5a271b08 1035 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset);
d025f1e4 1036 if (r < 0) {
da927ba9 1037 log_error_errno(r, "Can't write entry: %m");
d025f1e4
ZJS
1038 goto finish;
1039 }
1040 }
1041
804ae586
LP
1042 r = 0;
1043
d025f1e4 1044finish:
fd790d6f
RM
1045 if (s->system_journal)
1046 journal_file_post_change(s->system_journal);
d025f1e4 1047
804ae586 1048 s->runtime_journal = journal_file_close(s->runtime_journal);
d025f1e4
ZJS
1049
1050 if (r >= 0)
c6878637 1051 (void) rm_rf("/run/log/journal", REMOVE_ROOT);
d025f1e4 1052
763c7aa2 1053 sd_journal_close(j);
d025f1e4 1054
13181942 1055 server_driver_message(s, 0, NULL,
8a03c9ef
ZJS
1056 LOG_MESSAGE("Time spent on flushing to /var is %s for %u entries.",
1057 format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0),
1058 n),
1059 NULL);
fbb63411 1060
d025f1e4
ZJS
1061 return r;
1062}
1063
8531ae70 1064int server_process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
f9a810be 1065 Server *s = userdata;
a315ac4e
LP
1066 struct ucred *ucred = NULL;
1067 struct timeval *tv = NULL;
1068 struct cmsghdr *cmsg;
1069 char *label = NULL;
1070 size_t label_len = 0, m;
1071 struct iovec iovec;
1072 ssize_t n;
1073 int *fds = NULL, v = 0;
da6053d0 1074 size_t n_fds = 0;
a315ac4e
LP
1075
1076 union {
1077 struct cmsghdr cmsghdr;
1078
1079 /* We use NAME_MAX space for the SELinux label
1080 * here. The kernel currently enforces no
1081 * limit, but according to suggestions from
1082 * the SELinux people this will change and it
1083 * will probably be identical to NAME_MAX. For
1084 * now we use that, but this should be updated
1085 * one day when the final limit is known. */
1086 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1087 CMSG_SPACE(sizeof(struct timeval)) +
1088 CMSG_SPACE(sizeof(int)) + /* fd */
1089 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1090 } control = {};
1091
1092 union sockaddr_union sa = {};
1093
1094 struct msghdr msghdr = {
1095 .msg_iov = &iovec,
1096 .msg_iovlen = 1,
1097 .msg_control = &control,
1098 .msg_controllen = sizeof(control),
1099 .msg_name = &sa,
1100 .msg_namelen = sizeof(sa),
1101 };
f9a810be 1102
d025f1e4 1103 assert(s);
875c2e22 1104 assert(fd == s->native_fd || fd == s->syslog_fd || fd == s->audit_fd);
f9a810be
LP
1105
1106 if (revents != EPOLLIN) {
1107 log_error("Got invalid event from epoll for datagram fd: %"PRIx32, revents);
1108 return -EIO;
1109 }
1110
22e3a02b
LP
1111 /* Try to get the right size, if we can. (Not all sockets support SIOCINQ, hence we just try, but don't rely on
1112 * it.) */
a315ac4e 1113 (void) ioctl(fd, SIOCINQ, &v);
d025f1e4 1114
a315ac4e
LP
1115 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1116 m = PAGE_ALIGN(MAX3((size_t) v + 1,
1117 (size_t) LINE_MAX,
1118 ALIGN(sizeof(struct nlmsghdr)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH)) + 1);
d025f1e4 1119
a315ac4e
LP
1120 if (!GREEDY_REALLOC(s->buffer, s->buffer_size, m))
1121 return log_oom();
875c2e22 1122
a315ac4e
LP
1123 iovec.iov_base = s->buffer;
1124 iovec.iov_len = s->buffer_size - 1; /* Leave room for trailing NUL we add later */
d025f1e4 1125
a315ac4e
LP
1126 n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1127 if (n < 0) {
3742095b 1128 if (IN_SET(errno, EINTR, EAGAIN))
a315ac4e 1129 return 0;
875c2e22 1130
a315ac4e
LP
1131 return log_error_errno(errno, "recvmsg() failed: %m");
1132 }
875c2e22 1133
a315ac4e
LP
1134 CMSG_FOREACH(cmsg, &msghdr) {
1135
1136 if (cmsg->cmsg_level == SOL_SOCKET &&
1137 cmsg->cmsg_type == SCM_CREDENTIALS &&
1138 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1139 ucred = (struct ucred*) CMSG_DATA(cmsg);
1140 else if (cmsg->cmsg_level == SOL_SOCKET &&
1141 cmsg->cmsg_type == SCM_SECURITY) {
1142 label = (char*) CMSG_DATA(cmsg);
1143 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1144 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1145 cmsg->cmsg_type == SO_TIMESTAMP &&
1146 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1147 tv = (struct timeval*) CMSG_DATA(cmsg);
1148 else if (cmsg->cmsg_level == SOL_SOCKET &&
1149 cmsg->cmsg_type == SCM_RIGHTS) {
1150 fds = (int*) CMSG_DATA(cmsg);
1151 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
d025f1e4 1152 }
a315ac4e 1153 }
d025f1e4 1154
a315ac4e
LP
1155 /* And a trailing NUL, just in case */
1156 s->buffer[n] = 0;
1157
1158 if (fd == s->syslog_fd) {
1159 if (n > 0 && n_fds == 0)
bb3ff70a 1160 server_process_syslog_message(s, s->buffer, n, ucred, tv, label, label_len);
a315ac4e
LP
1161 else if (n_fds > 0)
1162 log_warning("Got file descriptors via syslog socket. Ignoring.");
1163
1164 } else if (fd == s->native_fd) {
1165 if (n > 0 && n_fds == 0)
1166 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1167 else if (n == 0 && n_fds == 1)
1168 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1169 else if (n_fds > 0)
1170 log_warning("Got too many file descriptors via native socket. Ignoring.");
1171
1172 } else {
1173 assert(fd == s->audit_fd);
1174
1175 if (n > 0 && n_fds == 0)
1176 server_process_audit_message(s, s->buffer, n, ucred, &sa, msghdr.msg_namelen);
1177 else if (n_fds > 0)
1178 log_warning("Got file descriptors via audit socket. Ignoring.");
f9a810be 1179 }
a315ac4e
LP
1180
1181 close_many(fds, n_fds);
1182 return 0;
f9a810be 1183}
d025f1e4 1184
f9a810be
LP
1185static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1186 Server *s = userdata;
33d52ab9 1187 int r;
d025f1e4 1188
f9a810be 1189 assert(s);
d025f1e4 1190
94b65516 1191 log_info("Received request to flush runtime journal from PID " PID_FMT, si->ssi_pid);
d025f1e4 1192
f78273c8 1193 (void) server_flush_to_var(s, false);
f9a810be 1194 server_sync(s);
3a19f215 1195 server_vacuum(s, false);
d025f1e4 1196
33d52ab9
LP
1197 r = touch("/run/systemd/journal/flushed");
1198 if (r < 0)
1199 log_warning_errno(r, "Failed to touch /run/systemd/journal/flushed, ignoring: %m");
74055aa7 1200
18e758bf 1201 server_space_usage_message(s, NULL);
f9a810be
LP
1202 return 0;
1203}
d025f1e4 1204
f9a810be
LP
1205static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1206 Server *s = userdata;
33d52ab9 1207 int r;
d025f1e4 1208
f9a810be 1209 assert(s);
d025f1e4 1210
94b65516 1211 log_info("Received request to rotate journal from PID " PID_FMT, si->ssi_pid);
f9a810be 1212 server_rotate(s);
3a19f215
FB
1213 server_vacuum(s, true);
1214
1215 if (s->system_journal)
1216 patch_min_use(&s->system_storage);
1217 if (s->runtime_journal)
1218 patch_min_use(&s->runtime_storage);
d025f1e4 1219
dbd6e31c 1220 /* Let clients know when the most recent rotation happened. */
33d52ab9
LP
1221 r = write_timestamp_file_atomic("/run/systemd/journal/rotated", now(CLOCK_MONOTONIC));
1222 if (r < 0)
1223 log_warning_errno(r, "Failed to write /run/systemd/journal/rotated, ignoring: %m");
dbd6e31c 1224
f9a810be
LP
1225 return 0;
1226}
d025f1e4 1227
f9a810be
LP
1228static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1229 Server *s = userdata;
d025f1e4 1230
f9a810be 1231 assert(s);
d025f1e4 1232
4daf54a8 1233 log_received_signal(LOG_INFO, si);
d025f1e4 1234
6203e07a 1235 sd_event_exit(s->event, 0);
d025f1e4
ZJS
1236 return 0;
1237}
1238
94b65516
LP
1239static int dispatch_sigrtmin1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1240 Server *s = userdata;
33d52ab9 1241 int r;
94b65516
LP
1242
1243 assert(s);
1244
1245 log_debug("Received request to sync from PID " PID_FMT, si->ssi_pid);
1246
1247 server_sync(s);
1248
1249 /* Let clients know when the most recent sync happened. */
33d52ab9
LP
1250 r = write_timestamp_file_atomic("/run/systemd/journal/synced", now(CLOCK_MONOTONIC));
1251 if (r < 0)
1252 log_warning_errno(r, "Failed to write /run/systemd/journal/synced, ignoring: %m");
94b65516
LP
1253
1254 return 0;
1255}
1256
f9a810be 1257static int setup_signals(Server *s) {
f9a810be 1258 int r;
d025f1e4
ZJS
1259
1260 assert(s);
1261
9bab3b65 1262 assert_se(sigprocmask_many(SIG_SETMASK, NULL, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, SIGRTMIN+1, -1) >= 0);
d025f1e4 1263
151b9b96 1264 r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
f9a810be
LP
1265 if (r < 0)
1266 return r;
1267
151b9b96 1268 r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s);
f9a810be
LP
1269 if (r < 0)
1270 return r;
d025f1e4 1271
151b9b96 1272 r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s);
f9a810be
LP
1273 if (r < 0)
1274 return r;
d025f1e4 1275
b374689c
LP
1276 /* Let's process SIGTERM late, so that we flush all queued
1277 * messages to disk before we exit */
1278 r = sd_event_source_set_priority(s->sigterm_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1279 if (r < 0)
1280 return r;
1281
1282 /* When journald is invoked on the terminal (when debugging),
1283 * it's useful if C-c is handled equivalent to SIGTERM. */
151b9b96 1284 r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s);
f9a810be
LP
1285 if (r < 0)
1286 return r;
d025f1e4 1287
b374689c
LP
1288 r = sd_event_source_set_priority(s->sigint_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1289 if (r < 0)
1290 return r;
1291
94b65516
LP
1292 /* SIGRTMIN+1 causes an immediate sync. We process this very
1293 * late, so that everything else queued at this point is
1294 * really written to disk. Clients can watch
1295 * /run/systemd/journal/synced with inotify until its mtime
1296 * changes to see when a sync happened. */
1297 r = sd_event_add_signal(s->event, &s->sigrtmin1_event_source, SIGRTMIN+1, dispatch_sigrtmin1, s);
1298 if (r < 0)
1299 return r;
1300
1301 r = sd_event_source_set_priority(s->sigrtmin1_event_source, SD_EVENT_PRIORITY_NORMAL+15);
1302 if (r < 0)
1303 return r;
1304
d025f1e4
ZJS
1305 return 0;
1306}
1307
5707ecf3
ZJS
1308static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
1309 Server *s = data;
74df0fca 1310 int r;
d025f1e4 1311
5707ecf3 1312 assert(s);
d025f1e4 1313
1d84ad94
LP
1314 if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_syslog")) {
1315
5707ecf3 1316 r = value ? parse_boolean(value) : true;
d581d9d9 1317 if (r < 0)
5707ecf3
ZJS
1318 log_warning("Failed to parse forward to syslog switch \"%s\". Ignoring.", value);
1319 else
1320 s->forward_to_syslog = r;
1d84ad94
LP
1321
1322 } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_kmsg")) {
1323
5707ecf3
ZJS
1324 r = value ? parse_boolean(value) : true;
1325 if (r < 0)
1326 log_warning("Failed to parse forward to kmsg switch \"%s\". Ignoring.", value);
1327 else
1328 s->forward_to_kmsg = r;
1d84ad94
LP
1329
1330 } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_console")) {
1331
5707ecf3
ZJS
1332 r = value ? parse_boolean(value) : true;
1333 if (r < 0)
1334 log_warning("Failed to parse forward to console switch \"%s\". Ignoring.", value);
1335 else
1336 s->forward_to_console = r;
1d84ad94
LP
1337
1338 } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_wall")) {
1339
5707ecf3
ZJS
1340 r = value ? parse_boolean(value) : true;
1341 if (r < 0)
1342 log_warning("Failed to parse forward to wall switch \"%s\". Ignoring.", value);
1343 else
1344 s->forward_to_wall = r;
1d84ad94
LP
1345
1346 } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_console")) {
1347
1348 if (proc_cmdline_value_missing(key, value))
1349 return 0;
1350
5707ecf3
ZJS
1351 r = log_level_from_string(value);
1352 if (r < 0)
1353 log_warning("Failed to parse max level console value \"%s\". Ignoring.", value);
1354 else
1355 s->max_level_console = r;
1d84ad94
LP
1356
1357 } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_store")) {
1358
1359 if (proc_cmdline_value_missing(key, value))
1360 return 0;
1361
5707ecf3
ZJS
1362 r = log_level_from_string(value);
1363 if (r < 0)
1364 log_warning("Failed to parse max level store value \"%s\". Ignoring.", value);
1365 else
1366 s->max_level_store = r;
1d84ad94
LP
1367
1368 } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_syslog")) {
1369
1370 if (proc_cmdline_value_missing(key, value))
1371 return 0;
1372
5707ecf3
ZJS
1373 r = log_level_from_string(value);
1374 if (r < 0)
1375 log_warning("Failed to parse max level syslog value \"%s\". Ignoring.", value);
1376 else
1377 s->max_level_syslog = r;
1d84ad94
LP
1378
1379 } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_kmsg")) {
1380
1381 if (proc_cmdline_value_missing(key, value))
1382 return 0;
1383
5707ecf3
ZJS
1384 r = log_level_from_string(value);
1385 if (r < 0)
1386 log_warning("Failed to parse max level kmsg value \"%s\". Ignoring.", value);
1387 else
1388 s->max_level_kmsg = r;
1d84ad94
LP
1389
1390 } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_wall")) {
1391
1392 if (proc_cmdline_value_missing(key, value))
1393 return 0;
1394
5707ecf3
ZJS
1395 r = log_level_from_string(value);
1396 if (r < 0)
1397 log_warning("Failed to parse max level wall value \"%s\". Ignoring.", value);
1398 else
1399 s->max_level_wall = r;
1d84ad94 1400
5707ecf3
ZJS
1401 } else if (startswith(key, "systemd.journald"))
1402 log_warning("Unknown journald kernel command line option \"%s\". Ignoring.", key);
d025f1e4 1403
804ae586 1404 /* do not warn about state here, since probably systemd already did */
db91ea32 1405 return 0;
d025f1e4
ZJS
1406}
1407
1408static int server_parse_config_file(Server *s) {
d025f1e4
ZJS
1409 assert(s);
1410
43688c49 1411 return config_parse_many_nulstr(PKGSYSCONFDIR "/journald.conf",
da412854
YW
1412 CONF_PATHS_NULSTR("systemd/journald.conf.d"),
1413 "Journal\0",
1414 config_item_perf_lookup, journald_gperf_lookup,
bcde742e 1415 CONFIG_PARSE_WARN, s);
d025f1e4
ZJS
1416}
1417
f9a810be
LP
1418static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
1419 Server *s = userdata;
26687bf8
OS
1420
1421 assert(s);
1422
f9a810be 1423 server_sync(s);
26687bf8
OS
1424 return 0;
1425}
1426
d07f7b9e 1427int server_schedule_sync(Server *s, int priority) {
26687bf8
OS
1428 int r;
1429
26687bf8
OS
1430 assert(s);
1431
d07f7b9e
LP
1432 if (priority <= LOG_CRIT) {
1433 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1434 server_sync(s);
1435 return 0;
1436 }
1437
26687bf8
OS
1438 if (s->sync_scheduled)
1439 return 0;
1440
f9a810be
LP
1441 if (s->sync_interval_usec > 0) {
1442 usec_t when;
ca267016 1443
6a0f1f6d 1444 r = sd_event_now(s->event, CLOCK_MONOTONIC, &when);
f9a810be
LP
1445 if (r < 0)
1446 return r;
26687bf8 1447
f9a810be
LP
1448 when += s->sync_interval_usec;
1449
1450 if (!s->sync_event_source) {
6a0f1f6d
LP
1451 r = sd_event_add_time(
1452 s->event,
1453 &s->sync_event_source,
1454 CLOCK_MONOTONIC,
1455 when, 0,
1456 server_dispatch_sync, s);
f9a810be
LP
1457 if (r < 0)
1458 return r;
1459
1460 r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
1461 } else {
1462 r = sd_event_source_set_time(s->sync_event_source, when);
1463 if (r < 0)
1464 return r;
1465
1466 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
1467 }
26687bf8 1468 if (r < 0)
f9a810be 1469 return r;
26687bf8 1470
f9a810be
LP
1471 s->sync_scheduled = true;
1472 }
26687bf8
OS
1473
1474 return 0;
1475}
1476
0c24bb23
LP
1477static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1478 Server *s = userdata;
1479
1480 assert(s);
1481
1482 server_cache_hostname(s);
1483 return 0;
1484}
1485
1486static int server_open_hostname(Server *s) {
1487 int r;
1488
1489 assert(s);
1490
db4a47e9
LP
1491 s->hostname_fd = open("/proc/sys/kernel/hostname",
1492 O_RDONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
4a62c710
MS
1493 if (s->hostname_fd < 0)
1494 return log_error_errno(errno, "Failed to open /proc/sys/kernel/hostname: %m");
0c24bb23 1495
151b9b96 1496 r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s);
0c24bb23 1497 if (r < 0) {
28def94c
DR
1498 /* kernels prior to 3.2 don't support polling this file. Ignore
1499 * the failure. */
1500 if (r == -EPERM) {
e53fc357 1501 log_warning_errno(r, "Failed to register hostname fd in event loop, ignoring: %m");
03e334a1 1502 s->hostname_fd = safe_close(s->hostname_fd);
28def94c
DR
1503 return 0;
1504 }
1505
23bbb0de 1506 return log_error_errno(r, "Failed to register hostname fd in event loop: %m");
0c24bb23
LP
1507 }
1508
1509 r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
23bbb0de
MS
1510 if (r < 0)
1511 return log_error_errno(r, "Failed to adjust priority of host name event source: %m");
0c24bb23
LP
1512
1513 return 0;
1514}
1515
e22aa3d3
LP
1516static int dispatch_notify_event(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1517 Server *s = userdata;
1518 int r;
1519
1520 assert(s);
1521 assert(s->notify_event_source == es);
1522 assert(s->notify_fd == fd);
1523
e22aa3d3 1524 /* The $NOTIFY_SOCKET is writable again, now send exactly one
dd835265 1525 * message on it. Either it's the watchdog event, the initial
119e9655
LP
1526 * READY=1 event or an stdout stream event. If there's nothing
1527 * to write anymore, turn our event source off. The next time
1528 * there's something to send it will be turned on again. */
e22aa3d3
LP
1529
1530 if (!s->sent_notify_ready) {
1531 static const char p[] =
1532 "READY=1\n"
1533 "STATUS=Processing requests...";
1534 ssize_t l;
1535
1536 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1537 if (l < 0) {
1538 if (errno == EAGAIN)
1539 return 0;
1540
1541 return log_error_errno(errno, "Failed to send READY=1 notification message: %m");
1542 }
1543
1544 s->sent_notify_ready = true;
1545 log_debug("Sent READY=1 notification.");
1546
119e9655
LP
1547 } else if (s->send_watchdog) {
1548
1549 static const char p[] =
1550 "WATCHDOG=1";
1551
1552 ssize_t l;
1553
1554 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1555 if (l < 0) {
1556 if (errno == EAGAIN)
1557 return 0;
1558
1559 return log_error_errno(errno, "Failed to send WATCHDOG=1 notification message: %m");
1560 }
1561
1562 s->send_watchdog = false;
1563 log_debug("Sent WATCHDOG=1 notification.");
1564
e22aa3d3
LP
1565 } else if (s->stdout_streams_notify_queue)
1566 /* Dispatch one stream notification event */
1567 stdout_stream_send_notify(s->stdout_streams_notify_queue);
1568
61233823 1569 /* Leave us enabled if there's still more to do. */
119e9655 1570 if (s->send_watchdog || s->stdout_streams_notify_queue)
e22aa3d3
LP
1571 return 0;
1572
1573 /* There was nothing to do anymore, let's turn ourselves off. */
1574 r = sd_event_source_set_enabled(es, SD_EVENT_OFF);
1575 if (r < 0)
1576 return log_error_errno(r, "Failed to turn off notify event source: %m");
1577
1578 return 0;
1579}
1580
119e9655
LP
1581static int dispatch_watchdog(sd_event_source *es, uint64_t usec, void *userdata) {
1582 Server *s = userdata;
1583 int r;
1584
1585 assert(s);
1586
1587 s->send_watchdog = true;
1588
1589 r = sd_event_source_set_enabled(s->notify_event_source, SD_EVENT_ON);
1590 if (r < 0)
1591 log_warning_errno(r, "Failed to turn on notify event source: %m");
1592
1593 r = sd_event_source_set_time(s->watchdog_event_source, usec + s->watchdog_usec / 2);
1594 if (r < 0)
1595 return log_error_errno(r, "Failed to restart watchdog event source: %m");
1596
1597 r = sd_event_source_set_enabled(s->watchdog_event_source, SD_EVENT_ON);
1598 if (r < 0)
1599 return log_error_errno(r, "Failed to enable watchdog event source: %m");
1600
1601 return 0;
1602}
1603
e22aa3d3 1604static int server_connect_notify(Server *s) {
15a3e96f 1605 union sockaddr_union sa = {};
e22aa3d3 1606 const char *e;
15a3e96f 1607 int r, salen;
e22aa3d3
LP
1608
1609 assert(s);
1610 assert(s->notify_fd < 0);
1611 assert(!s->notify_event_source);
1612
1613 /*
1614 So here's the problem: we'd like to send notification
1615 messages to PID 1, but we cannot do that via sd_notify(),
1616 since that's synchronous, and we might end up blocking on
1617 it. Specifically: given that PID 1 might block on
1618 dbus-daemon during IPC, and dbus-daemon is logging to us,
1619 and might hence block on us, we might end up in a deadlock
ccddd104 1620 if we block on sending PID 1 notification messages — by
e22aa3d3
LP
1621 generating a full blocking circle. To avoid this, let's
1622 create a non-blocking socket, and connect it to the
1623 notification socket, and then wait for POLLOUT before we
1624 send anything. This should efficiently avoid any deadlocks,
1625 as we'll never block on PID 1, hence PID 1 can safely block
1626 on dbus-daemon which can safely block on us again.
1627
1628 Don't think that this issue is real? It is, see:
1629 https://github.com/systemd/systemd/issues/1505
1630 */
1631
1632 e = getenv("NOTIFY_SOCKET");
1633 if (!e)
1634 return 0;
1635
15a3e96f
LP
1636 salen = sockaddr_un_set_path(&sa.un, e);
1637 if (salen < 0)
1638 return log_error_errno(salen, "NOTIFY_SOCKET set to invalid value '%s': %m", e);
e22aa3d3
LP
1639
1640 s->notify_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
1641 if (s->notify_fd < 0)
1642 return log_error_errno(errno, "Failed to create notify socket: %m");
1643
1644 (void) fd_inc_sndbuf(s->notify_fd, NOTIFY_SNDBUF_SIZE);
1645
15a3e96f 1646 r = connect(s->notify_fd, &sa.sa, salen);
e22aa3d3
LP
1647 if (r < 0)
1648 return log_error_errno(errno, "Failed to connect to notify socket: %m");
1649
1650 r = sd_event_add_io(s->event, &s->notify_event_source, s->notify_fd, EPOLLOUT, dispatch_notify_event, s);
1651 if (r < 0)
1652 return log_error_errno(r, "Failed to watch notification socket: %m");
1653
119e9655
LP
1654 if (sd_watchdog_enabled(false, &s->watchdog_usec) > 0) {
1655 s->send_watchdog = true;
1656
4de2402b 1657 r = sd_event_add_time(s->event, &s->watchdog_event_source, CLOCK_MONOTONIC, now(CLOCK_MONOTONIC) + s->watchdog_usec/2, s->watchdog_usec/4, dispatch_watchdog, s);
119e9655
LP
1658 if (r < 0)
1659 return log_error_errno(r, "Failed to add watchdog time event: %m");
1660 }
1661
e22aa3d3
LP
1662 /* This should fire pretty soon, which we'll use to send the
1663 * READY=1 event. */
1664
1665 return 0;
1666}
1667
d025f1e4 1668int server_init(Server *s) {
13790add 1669 _cleanup_fdset_free_ FDSet *fds = NULL;
d025f1e4 1670 int n, r, fd;
7d18d348 1671 bool no_sockets;
d025f1e4
ZJS
1672
1673 assert(s);
1674
1675 zero(*s);
e22aa3d3 1676 s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->audit_fd = s->hostname_fd = s->notify_fd = -1;
1b7cf0e5
AG
1677 s->compress.enabled = true;
1678 s->compress.threshold_bytes = (uint64_t) -1;
d025f1e4 1679 s->seal = true;
b2392ff3 1680 s->read_kmsg = true;
d025f1e4 1681
119e9655
LP
1682 s->watchdog_usec = USEC_INFINITY;
1683
26687bf8
OS
1684 s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1685 s->sync_scheduled = false;
1686
d025f1e4
ZJS
1687 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1688 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1689
40b71e89 1690 s->forward_to_wall = true;
d025f1e4 1691
e150e820
MB
1692 s->max_file_usec = DEFAULT_MAX_FILE_USEC;
1693
d025f1e4
ZJS
1694 s->max_level_store = LOG_DEBUG;
1695 s->max_level_syslog = LOG_DEBUG;
1696 s->max_level_kmsg = LOG_NOTICE;
1697 s->max_level_console = LOG_INFO;
40b71e89 1698 s->max_level_wall = LOG_EMERG;
d025f1e4 1699
ec20fe5f
LP
1700 s->line_max = DEFAULT_LINE_MAX;
1701
266a4700
FB
1702 journal_reset_metrics(&s->system_storage.metrics);
1703 journal_reset_metrics(&s->runtime_storage.metrics);
d025f1e4
ZJS
1704
1705 server_parse_config_file(s);
1d84ad94
LP
1706
1707 r = proc_cmdline_parse(parse_proc_cmdline_item, s, PROC_CMDLINE_STRIP_RD_PREFIX);
1708 if (r < 0)
1709 log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
8580d1f7 1710
d288f79f 1711 if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
b1389b0d
ZJS
1712 log_debug("Setting both rate limit interval and burst from "USEC_FMT",%u to 0,0",
1713 s->rate_limit_interval, s->rate_limit_burst);
d288f79f
ZJS
1714 s->rate_limit_interval = s->rate_limit_burst = 0;
1715 }
d025f1e4 1716
8580d1f7 1717 (void) mkdir_p("/run/systemd/journal", 0755);
d025f1e4 1718
43cf8388 1719 s->user_journals = ordered_hashmap_new(NULL);
d025f1e4
ZJS
1720 if (!s->user_journals)
1721 return log_oom();
1722
1723 s->mmap = mmap_cache_new();
1724 if (!s->mmap)
1725 return log_oom();
1726
b58c888f
VC
1727 s->deferred_closes = set_new(NULL);
1728 if (!s->deferred_closes)
1729 return log_oom();
1730
f9a810be 1731 r = sd_event_default(&s->event);
23bbb0de
MS
1732 if (r < 0)
1733 return log_error_errno(r, "Failed to create event loop: %m");
d025f1e4
ZJS
1734
1735 n = sd_listen_fds(true);
23bbb0de
MS
1736 if (n < 0)
1737 return log_error_errno(n, "Failed to read listening file descriptors from environment: %m");
d025f1e4
ZJS
1738
1739 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1740
1741 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1742
1743 if (s->native_fd >= 0) {
1744 log_error("Too many native sockets passed.");
1745 return -EINVAL;
1746 }
1747
1748 s->native_fd = fd;
1749
1750 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1751
1752 if (s->stdout_fd >= 0) {
1753 log_error("Too many stdout sockets passed.");
1754 return -EINVAL;
1755 }
1756
1757 s->stdout_fd = fd;
1758
03ee5c38
LP
1759 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0 ||
1760 sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/dev-log", 0) > 0) {
d025f1e4
ZJS
1761
1762 if (s->syslog_fd >= 0) {
1763 log_error("Too many /dev/log sockets passed.");
1764 return -EINVAL;
1765 }
1766
1767 s->syslog_fd = fd;
1768
875c2e22
LP
1769 } else if (sd_is_socket(fd, AF_NETLINK, SOCK_RAW, -1) > 0) {
1770
1771 if (s->audit_fd >= 0) {
1772 log_error("Too many audit sockets passed.");
1773 return -EINVAL;
1774 }
1775
1776 s->audit_fd = fd;
1777
4ec3cd73 1778 } else {
4ec3cd73 1779
13790add
LP
1780 if (!fds) {
1781 fds = fdset_new();
1782 if (!fds)
1783 return log_oom();
1784 }
4ec3cd73 1785
13790add
LP
1786 r = fdset_put(fds, fd);
1787 if (r < 0)
1788 return log_oom();
4ec3cd73 1789 }
d025f1e4
ZJS
1790 }
1791
15d91bff
ZJS
1792 /* Try to restore streams, but don't bother if this fails */
1793 (void) server_restore_streams(s, fds);
d025f1e4 1794
13790add
LP
1795 if (fdset_size(fds) > 0) {
1796 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds));
1797 fds = fdset_free(fds);
1798 }
1799
7d18d348
ZJS
1800 no_sockets = s->native_fd < 0 && s->stdout_fd < 0 && s->syslog_fd < 0 && s->audit_fd < 0;
1801
1802 /* always open stdout, syslog, native, and kmsg sockets */
37b7affe
ZJS
1803
1804 /* systemd-journald.socket: /run/systemd/journal/stdout */
15d91bff
ZJS
1805 r = server_open_stdout_socket(s);
1806 if (r < 0)
1807 return r;
1808
37b7affe 1809 /* systemd-journald-dev-log.socket: /run/systemd/journal/dev-log */
13790add 1810 r = server_open_syslog_socket(s);
d025f1e4
ZJS
1811 if (r < 0)
1812 return r;
1813
37b7affe 1814 /* systemd-journald.socket: /run/systemd/journal/socket */
13790add 1815 r = server_open_native_socket(s);
d025f1e4
ZJS
1816 if (r < 0)
1817 return r;
1818
b2392ff3 1819 /* /dev/kmsg */
d025f1e4
ZJS
1820 r = server_open_dev_kmsg(s);
1821 if (r < 0)
1822 return r;
1823
7d18d348
ZJS
1824 /* Unless we got *some* sockets and not audit, open audit socket */
1825 if (s->audit_fd >= 0 || no_sockets) {
1826 r = server_open_audit(s);
1827 if (r < 0)
1828 return r;
1829 }
875c2e22 1830
d025f1e4
ZJS
1831 r = server_open_kernel_seqnum(s);
1832 if (r < 0)
1833 return r;
1834
0c24bb23
LP
1835 r = server_open_hostname(s);
1836 if (r < 0)
1837 return r;
1838
f9a810be 1839 r = setup_signals(s);
d025f1e4
ZJS
1840 if (r < 0)
1841 return r;
1842
90fc172e 1843 s->rate_limit = journal_rate_limit_new();
d025f1e4
ZJS
1844 if (!s->rate_limit)
1845 return -ENOMEM;
1846
e9174f29
LP
1847 r = cg_get_root_path(&s->cgroup_root);
1848 if (r < 0)
1849 return r;
1850
0c24bb23
LP
1851 server_cache_hostname(s);
1852 server_cache_boot_id(s);
1853 server_cache_machine_id(s);
1854
266a4700
FB
1855 s->runtime_storage.name = "Runtime journal";
1856 s->system_storage.name = "System journal";
1857
605405c6
ZJS
1858 s->runtime_storage.path = strjoin("/run/log/journal/", SERVER_MACHINE_ID(s));
1859 s->system_storage.path = strjoin("/var/log/journal/", SERVER_MACHINE_ID(s));
266a4700
FB
1860 if (!s->runtime_storage.path || !s->system_storage.path)
1861 return -ENOMEM;
1862
e22aa3d3
LP
1863 (void) server_connect_notify(s);
1864
22e3a02b
LP
1865 (void) client_context_acquire_default(s);
1866
804ae586 1867 return system_journal_open(s, false);
d025f1e4
ZJS
1868}
1869
1870void server_maybe_append_tags(Server *s) {
349cc4a5 1871#if HAVE_GCRYPT
d025f1e4
ZJS
1872 JournalFile *f;
1873 Iterator i;
1874 usec_t n;
1875
1876 n = now(CLOCK_REALTIME);
1877
1878 if (s->system_journal)
1879 journal_file_maybe_append_tag(s->system_journal, n);
1880
43cf8388 1881 ORDERED_HASHMAP_FOREACH(f, s->user_journals, i)
d025f1e4
ZJS
1882 journal_file_maybe_append_tag(f, n);
1883#endif
1884}
1885
1886void server_done(Server *s) {
d025f1e4
ZJS
1887 assert(s);
1888
f9168190 1889 set_free_with_destructor(s->deferred_closes, journal_file_close);
b58c888f 1890
d025f1e4
ZJS
1891 while (s->stdout_streams)
1892 stdout_stream_free(s->stdout_streams);
1893
22e3a02b
LP
1894 client_context_flush_all(s);
1895
d025f1e4 1896 if (s->system_journal)
69a3a6fd 1897 (void) journal_file_close(s->system_journal);
d025f1e4
ZJS
1898
1899 if (s->runtime_journal)
69a3a6fd 1900 (void) journal_file_close(s->runtime_journal);
d025f1e4 1901
f9168190 1902 ordered_hashmap_free_with_destructor(s->user_journals, journal_file_close);
d025f1e4 1903
f9a810be
LP
1904 sd_event_source_unref(s->syslog_event_source);
1905 sd_event_source_unref(s->native_event_source);
1906 sd_event_source_unref(s->stdout_event_source);
1907 sd_event_source_unref(s->dev_kmsg_event_source);
875c2e22 1908 sd_event_source_unref(s->audit_event_source);
f9a810be
LP
1909 sd_event_source_unref(s->sync_event_source);
1910 sd_event_source_unref(s->sigusr1_event_source);
1911 sd_event_source_unref(s->sigusr2_event_source);
1912 sd_event_source_unref(s->sigterm_event_source);
1913 sd_event_source_unref(s->sigint_event_source);
94b65516 1914 sd_event_source_unref(s->sigrtmin1_event_source);
0c24bb23 1915 sd_event_source_unref(s->hostname_event_source);
e22aa3d3 1916 sd_event_source_unref(s->notify_event_source);
119e9655 1917 sd_event_source_unref(s->watchdog_event_source);
f9a810be 1918 sd_event_unref(s->event);
d025f1e4 1919
03e334a1
LP
1920 safe_close(s->syslog_fd);
1921 safe_close(s->native_fd);
1922 safe_close(s->stdout_fd);
1923 safe_close(s->dev_kmsg_fd);
875c2e22 1924 safe_close(s->audit_fd);
03e334a1 1925 safe_close(s->hostname_fd);
e22aa3d3 1926 safe_close(s->notify_fd);
0c24bb23 1927
d025f1e4
ZJS
1928 if (s->rate_limit)
1929 journal_rate_limit_free(s->rate_limit);
1930
1931 if (s->kernel_seqnum)
1932 munmap(s->kernel_seqnum, sizeof(uint64_t));
1933
1934 free(s->buffer);
1935 free(s->tty_path);
e9174f29 1936 free(s->cgroup_root);
99d0966e 1937 free(s->hostname_field);
c6e9e16f
ZJS
1938 free(s->runtime_storage.path);
1939 free(s->system_storage.path);
d025f1e4
ZJS
1940
1941 if (s->mmap)
1942 mmap_cache_unref(s->mmap);
d025f1e4 1943}
8580d1f7
LP
1944
1945static const char* const storage_table[_STORAGE_MAX] = {
1946 [STORAGE_AUTO] = "auto",
1947 [STORAGE_VOLATILE] = "volatile",
1948 [STORAGE_PERSISTENT] = "persistent",
1949 [STORAGE_NONE] = "none"
1950};
1951
1952DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
1953DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
1954
1955static const char* const split_mode_table[_SPLIT_MAX] = {
1956 [SPLIT_LOGIN] = "login",
1957 [SPLIT_UID] = "uid",
1958 [SPLIT_NONE] = "none",
1959};
1960
1961DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
1962DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
ec20fe5f
LP
1963
1964int config_parse_line_max(
1965 const char* unit,
1966 const char *filename,
1967 unsigned line,
1968 const char *section,
1969 unsigned section_line,
1970 const char *lvalue,
1971 int ltype,
1972 const char *rvalue,
1973 void *data,
1974 void *userdata) {
1975
1976 size_t *sz = data;
1977 int r;
1978
1979 assert(filename);
1980 assert(lvalue);
1981 assert(rvalue);
1982 assert(data);
1983
1984 if (isempty(rvalue))
1985 /* Empty assignment means default */
1986 *sz = DEFAULT_LINE_MAX;
1987 else {
1988 uint64_t v;
1989
1990 r = parse_size(rvalue, 1024, &v);
1991 if (r < 0) {
1992 log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse LineMax= value, ignoring: %s", rvalue);
1993 return 0;
1994 }
1995
1996 if (v < 79) {
1997 /* Why specify 79 here as minimum line length? Simply, because the most common traditional
1998 * terminal size is 80ch, and it might make sense to break one character before the natural
1999 * line break would occur on that. */
2000 log_syntax(unit, LOG_WARNING, filename, line, 0, "LineMax= too small, clamping to 79: %s", rvalue);
2001 *sz = 79;
2002 } else if (v > (uint64_t) (SSIZE_MAX-1)) {
2003 /* So, why specify SSIZE_MAX-1 here? Because that's one below the largest size value read()
2004 * can return, and we need one extra byte for the trailing NUL byte. Of course IRL such large
2005 * memory allocations will fail anyway, hence this limit is mostly theoretical anyway, as we'll
2006 * fail much earlier anyway. */
2007 log_syntax(unit, LOG_WARNING, filename, line, 0, "LineMax= too large, clamping to %" PRIu64 ": %s", (uint64_t) (SSIZE_MAX-1), rvalue);
2008 *sz = SSIZE_MAX-1;
2009 } else
2010 *sz = (size_t) v;
2011 }
2012
2013 return 0;
2014}
1b7cf0e5
AG
2015
2016int config_parse_compress(const char* unit,
2017 const char *filename,
2018 unsigned line,
2019 const char *section,
2020 unsigned section_line,
2021 const char *lvalue,
2022 int ltype,
2023 const char *rvalue,
2024 void *data,
2025 void *userdata) {
2026 JournalCompressOptions* compress = data;
2027 int r;
2028
2029 if (streq(rvalue, "1")) {
2030 log_syntax(unit, LOG_WARNING, filename, line, 0,
2031 "Compress= ambiguously specified as 1, enabling compression with default threshold");
2032 compress->enabled = true;
2033 } else if (streq(rvalue, "0")) {
2034 log_syntax(unit, LOG_WARNING, filename, line, 0,
2035 "Compress= ambiguously specified as 0, disabling compression");
2036 compress->enabled = false;
2037 } else if ((r = parse_boolean(rvalue)) >= 0)
2038 compress->enabled = r;
2039 else if (parse_size(rvalue, 1024, &compress->threshold_bytes) == 0)
2040 compress->enabled = true;
2041 else if (isempty(rvalue)) {
2042 compress->enabled = true;
2043 compress->threshold_bytes = (uint64_t) -1;
2044 } else
2045 log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse Compress= value, ignoring: %s", rvalue);
2046
2047 return 0;
2048}