]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/journal/journald-server.c
journal: remove unused args from journal_file_copy_entry()
[thirdparty/systemd.git] / src / journal / journald-server.c
CommitLineData
53e1b683 1/* SPDX-License-Identifier: LGPL-2.1+ */
d025f1e4
ZJS
2/***
3 This file is part of systemd.
4
5 Copyright 2011 Lennart Poettering
d025f1e4
ZJS
6***/
7
349cc4a5 8#if HAVE_SELINUX
24882e06
LP
9#include <selinux/selinux.h>
10#endif
8580d1f7
LP
11#include <sys/ioctl.h>
12#include <sys/mman.h>
13#include <sys/signalfd.h>
14#include <sys/statvfs.h>
07630cea 15#include <linux/sockios.h>
24882e06 16
b4bbcaa9 17#include "libudev.h"
8580d1f7 18#include "sd-daemon.h"
74df0fca
LP
19#include "sd-journal.h"
20#include "sd-messages.h"
8580d1f7
LP
21
22#include "acl-util.h"
b5efdb8a 23#include "alloc-util.h"
430f0182 24#include "audit-util.h"
d025f1e4 25#include "cgroup-util.h"
d025f1e4 26#include "conf-parser.h"
a0956174 27#include "dirent-util.h"
0dec689b 28#include "extract-word.h"
3ffd4af2 29#include "fd-util.h"
33d52ab9 30#include "fileio.h"
f97b34a6 31#include "format-util.h"
f4f15635 32#include "fs-util.h"
8580d1f7 33#include "hashmap.h"
958b66ea 34#include "hostname-util.h"
4b58153d 35#include "id128-util.h"
afc5dbf3 36#include "io-util.h"
8580d1f7
LP
37#include "journal-authenticate.h"
38#include "journal-file.h"
d025f1e4
ZJS
39#include "journal-internal.h"
40#include "journal-vacuum.h"
8580d1f7 41#include "journald-audit.h"
22e3a02b 42#include "journald-context.h"
d025f1e4 43#include "journald-kmsg.h"
d025f1e4 44#include "journald-native.h"
8580d1f7 45#include "journald-rate-limit.h"
3ffd4af2 46#include "journald-server.h"
8580d1f7
LP
47#include "journald-stream.h"
48#include "journald-syslog.h"
4b58153d 49#include "log.h"
07630cea
LP
50#include "missing.h"
51#include "mkdir.h"
6bedfcbb 52#include "parse-util.h"
4e731273 53#include "proc-cmdline.h"
07630cea
LP
54#include "process-util.h"
55#include "rm-rf.h"
56#include "selinux-util.h"
57#include "signal-util.h"
58#include "socket-util.h"
32917e33 59#include "stdio-util.h"
8b43440b 60#include "string-table.h"
07630cea 61#include "string-util.h"
863a5610 62#include "syslog-util.h"
22e3a02b 63#include "user-util.h"
d025f1e4 64
d025f1e4
ZJS
65#define USER_JOURNALS_MAX 1024
66
26687bf8 67#define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
7f1ad696 68#define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
3de8ff5a 69#define DEFAULT_RATE_LIMIT_BURST 10000
e150e820 70#define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
d025f1e4 71
8580d1f7 72#define RECHECK_SPACE_USEC (30*USEC_PER_SEC)
d025f1e4 73
e22aa3d3
LP
74#define NOTIFY_SNDBUF_SIZE (8*1024*1024)
75
7a24f3bf
VC
76/* The period to insert between posting changes for coalescing */
77#define POST_CHANGE_TIMER_INTERVAL_USEC (250*USEC_PER_MSEC)
78
ec20fe5f
LP
79/* Pick a good default that is likely to fit into AF_UNIX and AF_INET SOCK_DGRAM datagrams, and even leaves some room
80 * for a bit of additional metadata. */
81#define DEFAULT_LINE_MAX (48*1024)
82
e0ed6db9
FB
83static int determine_path_usage(Server *s, const char *path, uint64_t *ret_used, uint64_t *ret_free) {
84 _cleanup_closedir_ DIR *d = NULL;
85 struct dirent *de;
86 struct statvfs ss;
e0ed6db9
FB
87
88 assert(ret_used);
89 assert(ret_free);
90
266a4700 91 d = opendir(path);
e0ed6db9
FB
92 if (!d)
93 return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR,
266a4700 94 errno, "Failed to open %s: %m", path);
e0ed6db9
FB
95
96 if (fstatvfs(dirfd(d), &ss) < 0)
266a4700 97 return log_error_errno(errno, "Failed to fstatvfs(%s): %m", path);
e0ed6db9
FB
98
99 *ret_free = ss.f_bsize * ss.f_bavail;
100 *ret_used = 0;
101 FOREACH_DIRENT_ALL(de, d, break) {
102 struct stat st;
103
104 if (!endswith(de->d_name, ".journal") &&
105 !endswith(de->d_name, ".journal~"))
106 continue;
107
108 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) {
266a4700 109 log_debug_errno(errno, "Failed to stat %s/%s, ignoring: %m", path, de->d_name);
e0ed6db9
FB
110 continue;
111 }
112
113 if (!S_ISREG(st.st_mode))
114 continue;
115
116 *ret_used += (uint64_t) st.st_blocks * 512UL;
117 }
118
119 return 0;
120}
121
a0edc477 122static void cache_space_invalidate(JournalStorageSpace *space) {
67319249 123 zero(*space);
a0edc477
FB
124}
125
57f443a6 126static int cache_space_refresh(Server *s, JournalStorage *storage) {
23aba343 127 JournalStorageSpace *space;
266a4700 128 JournalMetrics *metrics;
23aba343 129 uint64_t vfs_used, vfs_avail, avail;
d025f1e4 130 usec_t ts;
e0ed6db9 131 int r;
d025f1e4 132
8580d1f7 133 assert(s);
266a4700 134
266a4700 135 metrics = &storage->metrics;
23aba343 136 space = &storage->space;
d025f1e4 137
8580d1f7 138 ts = now(CLOCK_MONOTONIC);
d025f1e4 139
3099caf2 140 if (space->timestamp != 0 && space->timestamp + RECHECK_SPACE_USEC > ts)
d025f1e4
ZJS
141 return 0;
142
23aba343 143 r = determine_path_usage(s, storage->path, &vfs_used, &vfs_avail);
e0ed6db9
FB
144 if (r < 0)
145 return r;
d025f1e4 146
23aba343
FB
147 space->vfs_used = vfs_used;
148 space->vfs_available = vfs_avail;
149
150 avail = LESS_BY(vfs_avail, metrics->keep_free);
151
23aba343
FB
152 space->limit = MIN(MAX(vfs_used + avail, metrics->min_use), metrics->max_use);
153 space->available = LESS_BY(space->limit, vfs_used);
154 space->timestamp = ts;
8580d1f7
LP
155 return 1;
156}
157
3a19f215
FB
158static void patch_min_use(JournalStorage *storage) {
159 assert(storage);
160
161 /* Let's bump the min_use limit to the current usage on disk. We do
162 * this when starting up and first opening the journal files. This way
163 * sudden spikes in disk usage will not cause journald to vacuum files
164 * without bounds. Note that this means that only a restart of journald
165 * will make it reset this value. */
166
167 storage->metrics.min_use = MAX(storage->metrics.min_use, storage->space.vfs_used);
168}
169
3a19f215 170static int determine_space(Server *s, uint64_t *available, uint64_t *limit) {
266a4700 171 JournalStorage *js;
57f443a6 172 int r;
8580d1f7
LP
173
174 assert(s);
175
266a4700 176 js = s->system_journal ? &s->system_storage : &s->runtime_storage;
57f443a6
FB
177
178 r = cache_space_refresh(s, js);
179 if (r >= 0) {
180 if (available)
181 *available = js->space.available;
182 if (limit)
183 *limit = js->space.limit;
184 }
185 return r;
d025f1e4
ZJS
186}
187
cba5629e
FB
188void server_space_usage_message(Server *s, JournalStorage *storage) {
189 char fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
190 fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX], fb6[FORMAT_BYTES_MAX];
191 JournalMetrics *metrics;
cba5629e
FB
192
193 assert(s);
194
195 if (!storage)
196 storage = s->system_journal ? &s->system_storage : &s->runtime_storage;
197
57f443a6 198 if (cache_space_refresh(s, storage) < 0)
cba5629e
FB
199 return;
200
201 metrics = &storage->metrics;
23aba343 202 format_bytes(fb1, sizeof(fb1), storage->space.vfs_used);
cba5629e
FB
203 format_bytes(fb2, sizeof(fb2), metrics->max_use);
204 format_bytes(fb3, sizeof(fb3), metrics->keep_free);
23aba343 205 format_bytes(fb4, sizeof(fb4), storage->space.vfs_available);
cba5629e
FB
206 format_bytes(fb5, sizeof(fb5), storage->space.limit);
207 format_bytes(fb6, sizeof(fb6), storage->space.available);
208
13181942
LP
209 server_driver_message(s, 0,
210 "MESSAGE_ID=" SD_MESSAGE_JOURNAL_USAGE_STR,
cba5629e
FB
211 LOG_MESSAGE("%s (%s) is %s, max %s, %s free.",
212 storage->name, storage->path, fb1, fb5, fb6),
213 "JOURNAL_NAME=%s", storage->name,
214 "JOURNAL_PATH=%s", storage->path,
23aba343 215 "CURRENT_USE=%"PRIu64, storage->space.vfs_used,
cba5629e
FB
216 "CURRENT_USE_PRETTY=%s", fb1,
217 "MAX_USE=%"PRIu64, metrics->max_use,
218 "MAX_USE_PRETTY=%s", fb2,
219 "DISK_KEEP_FREE=%"PRIu64, metrics->keep_free,
220 "DISK_KEEP_FREE_PRETTY=%s", fb3,
23aba343 221 "DISK_AVAILABLE=%"PRIu64, storage->space.vfs_available,
cba5629e
FB
222 "DISK_AVAILABLE_PRETTY=%s", fb4,
223 "LIMIT=%"PRIu64, storage->space.limit,
224 "LIMIT_PRETTY=%s", fb5,
225 "AVAILABLE=%"PRIu64, storage->space.available,
226 "AVAILABLE_PRETTY=%s", fb6,
227 NULL);
228}
229
2fce06b0
LP
230static bool uid_for_system_journal(uid_t uid) {
231
232 /* Returns true if the specified UID shall get its data stored in the system journal*/
233
234 return uid_is_system(uid) || uid_is_dynamic(uid) || uid == UID_NOBODY;
235}
236
5c3bde3f 237static void server_add_acls(JournalFile *f, uid_t uid) {
349cc4a5 238#if HAVE_ACL
5c3bde3f 239 int r;
d025f1e4 240#endif
d025f1e4
ZJS
241 assert(f);
242
349cc4a5 243#if HAVE_ACL
2fce06b0 244 if (uid_for_system_journal(uid))
d025f1e4
ZJS
245 return;
246
5c3bde3f
ZJS
247 r = add_acls_for_user(f->fd, uid);
248 if (r < 0)
249 log_warning_errno(r, "Failed to set ACL on %s, ignoring: %m", f->path);
d025f1e4
ZJS
250#endif
251}
252
7a24f3bf
VC
253static int open_journal(
254 Server *s,
255 bool reliably,
256 const char *fname,
257 int flags,
258 bool seal,
259 JournalMetrics *metrics,
7a24f3bf
VC
260 JournalFile **ret) {
261 int r;
e167d7fd 262 JournalFile *f;
7a24f3bf
VC
263
264 assert(s);
265 assert(fname);
266 assert(ret);
267
268 if (reliably)
1b7cf0e5
AG
269 r = journal_file_open_reliably(fname, flags, 0640, s->compress.enabled, s->compress.threshold_bytes,
270 seal, metrics, s->mmap, s->deferred_closes, NULL, &f);
7a24f3bf 271 else
1b7cf0e5
AG
272 r = journal_file_open(-1, fname, flags, 0640, s->compress.enabled, s->compress.threshold_bytes, seal,
273 metrics, s->mmap, s->deferred_closes, NULL, &f);
274
7a24f3bf
VC
275 if (r < 0)
276 return r;
277
e167d7fd 278 r = journal_file_enable_post_change_timer(f, s->event, POST_CHANGE_TIMER_INTERVAL_USEC);
7a24f3bf 279 if (r < 0) {
69a3a6fd 280 (void) journal_file_close(f);
7a24f3bf
VC
281 return r;
282 }
283
e167d7fd 284 *ret = f;
7a24f3bf
VC
285 return r;
286}
287
6431c7e2 288static bool flushed_flag_is_set(void) {
f78273c8 289 return access("/run/systemd/journal/flushed", F_OK) >= 0;
6431c7e2
VC
290}
291
105bdb46
VC
292static int system_journal_open(Server *s, bool flush_requested) {
293 const char *fn;
294 int r = 0;
295
296 if (!s->system_journal &&
f78273c8
LP
297 IN_SET(s->storage, STORAGE_PERSISTENT, STORAGE_AUTO) &&
298 (flush_requested || flushed_flag_is_set())) {
105bdb46
VC
299
300 /* If in auto mode: first try to create the machine
301 * path, but not the prefix.
302 *
303 * If in persistent mode: create /var/log/journal and
304 * the machine path */
305
306 if (s->storage == STORAGE_PERSISTENT)
307 (void) mkdir_p("/var/log/journal/", 0755);
308
266a4700 309 (void) mkdir(s->system_storage.path, 0755);
105bdb46 310
266a4700
FB
311 fn = strjoina(s->system_storage.path, "/system.journal");
312 r = open_journal(s, true, fn, O_RDWR|O_CREAT, s->seal, &s->system_storage.metrics, &s->system_journal);
105bdb46
VC
313 if (r >= 0) {
314 server_add_acls(s->system_journal, 0);
57f443a6 315 (void) cache_space_refresh(s, &s->system_storage);
3a19f215 316 patch_min_use(&s->system_storage);
105bdb46 317 } else if (r < 0) {
4c701096 318 if (!IN_SET(r, -ENOENT, -EROFS))
105bdb46
VC
319 log_warning_errno(r, "Failed to open system journal: %m");
320
321 r = 0;
322 }
929eeb54
VC
323
324 /* If the runtime journal is open, and we're post-flush, we're
325 * recovering from a failed system journal rotate (ENOSPC)
326 * for which the runtime journal was reopened.
327 *
328 * Perform an implicit flush to var, leaving the runtime
329 * journal closed, now that the system journal is back.
330 */
f78273c8
LP
331 if (!flush_requested)
332 (void) server_flush_to_var(s, true);
105bdb46
VC
333 }
334
335 if (!s->runtime_journal &&
336 (s->storage != STORAGE_NONE)) {
337
266a4700 338 fn = strjoina(s->runtime_storage.path, "/system.journal");
105bdb46
VC
339
340 if (s->system_journal) {
341
342 /* Try to open the runtime journal, but only
343 * if it already exists, so that we can flush
344 * it into the system journal */
345
266a4700 346 r = open_journal(s, false, fn, O_RDWR, false, &s->runtime_storage.metrics, &s->runtime_journal);
105bdb46
VC
347 if (r < 0) {
348 if (r != -ENOENT)
349 log_warning_errno(r, "Failed to open runtime journal: %m");
350
351 r = 0;
352 }
353
354 } else {
355
356 /* OK, we really need the runtime journal, so create
357 * it if necessary. */
358
359 (void) mkdir("/run/log", 0755);
360 (void) mkdir("/run/log/journal", 0755);
361 (void) mkdir_parents(fn, 0750);
362
266a4700 363 r = open_journal(s, true, fn, O_RDWR|O_CREAT, false, &s->runtime_storage.metrics, &s->runtime_journal);
105bdb46
VC
364 if (r < 0)
365 return log_error_errno(r, "Failed to open runtime journal: %m");
366 }
367
368 if (s->runtime_journal) {
369 server_add_acls(s->runtime_journal, 0);
57f443a6 370 (void) cache_space_refresh(s, &s->runtime_storage);
3a19f215 371 patch_min_use(&s->runtime_storage);
105bdb46
VC
372 }
373 }
374
375 return r;
376}
377
d025f1e4 378static JournalFile* find_journal(Server *s, uid_t uid) {
ed375beb 379 _cleanup_free_ char *p = NULL;
d025f1e4
ZJS
380 int r;
381 JournalFile *f;
382 sd_id128_t machine;
383
384 assert(s);
385
105bdb46
VC
386 /* A rotate that fails to create the new journal (ENOSPC) leaves the
387 * rotated journal as NULL. Unless we revisit opening, even after
388 * space is made available we'll continue to return NULL indefinitely.
389 *
390 * system_journal_open() is a noop if the journals are already open, so
391 * we can just call it here to recover from failed rotates (or anything
392 * else that's left the journals as NULL).
393 *
394 * Fixes https://github.com/systemd/systemd/issues/3968 */
395 (void) system_journal_open(s, false);
396
d025f1e4
ZJS
397 /* We split up user logs only on /var, not on /run. If the
398 * runtime file is open, we write to it exclusively, in order
399 * to guarantee proper order as soon as we flush /run to
400 * /var and close the runtime file. */
401
402 if (s->runtime_journal)
403 return s->runtime_journal;
404
2fce06b0 405 if (uid_for_system_journal(uid))
d025f1e4
ZJS
406 return s->system_journal;
407
408 r = sd_id128_get_machine(&machine);
409 if (r < 0)
410 return s->system_journal;
411
4a0b58c4 412 f = ordered_hashmap_get(s->user_journals, UID_TO_PTR(uid));
d025f1e4
ZJS
413 if (f)
414 return f;
415
de0671ee
ZJS
416 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-"UID_FMT".journal",
417 SD_ID128_FORMAT_VAL(machine), uid) < 0)
d025f1e4
ZJS
418 return s->system_journal;
419
43cf8388 420 while (ordered_hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
d025f1e4 421 /* Too many open? Then let's close one */
43cf8388 422 f = ordered_hashmap_steal_first(s->user_journals);
d025f1e4 423 assert(f);
69a3a6fd 424 (void) journal_file_close(f);
d025f1e4
ZJS
425 }
426
266a4700 427 r = open_journal(s, true, p, O_RDWR|O_CREAT, s->seal, &s->system_storage.metrics, &f);
d025f1e4
ZJS
428 if (r < 0)
429 return s->system_journal;
430
5c3bde3f 431 server_add_acls(f, uid);
d025f1e4 432
4a0b58c4 433 r = ordered_hashmap_put(s->user_journals, UID_TO_PTR(uid), f);
d025f1e4 434 if (r < 0) {
69a3a6fd 435 (void) journal_file_close(f);
d025f1e4
ZJS
436 return s->system_journal;
437 }
438
439 return f;
440}
441
ea69bd41
LP
442static int do_rotate(
443 Server *s,
444 JournalFile **f,
445 const char* name,
446 bool seal,
447 uint32_t uid) {
448
fc55baee
ZJS
449 int r;
450 assert(s);
451
452 if (!*f)
453 return -EINVAL;
454
1b7cf0e5 455 r = journal_file_rotate(f, s->compress.enabled, s->compress.threshold_bytes, seal, s->deferred_closes);
bb6b922f 456 if (r < 0) {
fc55baee 457 if (*f)
bb6b922f 458 return log_error_errno(r, "Failed to rotate %s: %m", (*f)->path);
fc55baee 459 else
bb6b922f
YW
460 return log_error_errno(r, "Failed to create new %s journal: %m", name);
461 }
462
463 server_add_acls(*f, uid);
2678031a 464
fc55baee
ZJS
465 return r;
466}
467
d025f1e4
ZJS
468void server_rotate(Server *s) {
469 JournalFile *f;
470 void *k;
471 Iterator i;
472 int r;
473
474 log_debug("Rotating...");
475
8580d1f7
LP
476 (void) do_rotate(s, &s->runtime_journal, "runtime", false, 0);
477 (void) do_rotate(s, &s->system_journal, "system", s->seal, 0);
d025f1e4 478
43cf8388 479 ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
4a0b58c4 480 r = do_rotate(s, &f, "user", s->seal, PTR_TO_UID(k));
fc55baee 481 if (r >= 0)
43cf8388 482 ordered_hashmap_replace(s->user_journals, k, f);
fc55baee
ZJS
483 else if (!f)
484 /* Old file has been closed and deallocated */
43cf8388 485 ordered_hashmap_remove(s->user_journals, k);
d025f1e4 486 }
b58c888f
VC
487
488 /* Perform any deferred closes which aren't still offlining. */
489 SET_FOREACH(f, s->deferred_closes, i)
490 if (!journal_file_is_offlining(f)) {
491 (void) set_remove(s->deferred_closes, f);
492 (void) journal_file_close(f);
493 }
d025f1e4
ZJS
494}
495
26687bf8
OS
496void server_sync(Server *s) {
497 JournalFile *f;
26687bf8
OS
498 Iterator i;
499 int r;
500
26687bf8 501 if (s->system_journal) {
ac2e41f5 502 r = journal_file_set_offline(s->system_journal, false);
26687bf8 503 if (r < 0)
65089b82 504 log_warning_errno(r, "Failed to sync system journal, ignoring: %m");
26687bf8
OS
505 }
506
65c1d46b 507 ORDERED_HASHMAP_FOREACH(f, s->user_journals, i) {
ac2e41f5 508 r = journal_file_set_offline(f, false);
26687bf8 509 if (r < 0)
65089b82 510 log_warning_errno(r, "Failed to sync user journal, ignoring: %m");
26687bf8
OS
511 }
512
f9a810be
LP
513 if (s->sync_event_source) {
514 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
515 if (r < 0)
da927ba9 516 log_error_errno(r, "Failed to disable sync timer source: %m");
f9a810be 517 }
26687bf8
OS
518
519 s->sync_scheduled = false;
520}
521
3a19f215 522static void do_vacuum(Server *s, JournalStorage *storage, bool verbose) {
ea69bd41 523
63c8666b
ZJS
524 int r;
525
8580d1f7 526 assert(s);
266a4700 527 assert(storage);
8580d1f7 528
57f443a6 529 (void) cache_space_refresh(s, storage);
18e758bf
FB
530
531 if (verbose)
532 server_space_usage_message(s, storage);
8580d1f7 533
57f443a6
FB
534 r = journal_directory_vacuum(storage->path, storage->space.limit,
535 storage->metrics.n_max_files, s->max_retention_usec,
536 &s->oldest_file_usec, verbose);
63c8666b 537 if (r < 0 && r != -ENOENT)
266a4700
FB
538 log_warning_errno(r, "Failed to vacuum %s, ignoring: %m", storage->path);
539
a0edc477 540 cache_space_invalidate(&storage->space);
63c8666b
ZJS
541}
542
3a19f215 543int server_vacuum(Server *s, bool verbose) {
8580d1f7 544 assert(s);
d025f1e4
ZJS
545
546 log_debug("Vacuuming...");
547
548 s->oldest_file_usec = 0;
549
266a4700 550 if (s->system_journal)
3a19f215 551 do_vacuum(s, &s->system_storage, verbose);
266a4700 552 if (s->runtime_journal)
3a19f215 553 do_vacuum(s, &s->runtime_storage, verbose);
d025f1e4 554
8580d1f7 555 return 0;
d025f1e4
ZJS
556}
557
0c24bb23
LP
558static void server_cache_machine_id(Server *s) {
559 sd_id128_t id;
560 int r;
561
562 assert(s);
563
564 r = sd_id128_get_machine(&id);
565 if (r < 0)
566 return;
567
568 sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
569}
570
571static void server_cache_boot_id(Server *s) {
572 sd_id128_t id;
573 int r;
574
575 assert(s);
576
577 r = sd_id128_get_boot(&id);
578 if (r < 0)
579 return;
580
581 sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
582}
583
584static void server_cache_hostname(Server *s) {
585 _cleanup_free_ char *t = NULL;
586 char *x;
587
588 assert(s);
589
590 t = gethostname_malloc();
591 if (!t)
592 return;
593
594 x = strappend("_HOSTNAME=", t);
595 if (!x)
596 return;
597
598 free(s->hostname_field);
599 s->hostname_field = x;
600}
601
8531ae70 602static bool shall_try_append_again(JournalFile *f, int r) {
6e1045e5 603 switch(r) {
ae739cc1 604
6e1045e5
ZJS
605 case -E2BIG: /* Hit configured limit */
606 case -EFBIG: /* Hit fs limit */
607 case -EDQUOT: /* Quota limit hit */
608 case -ENOSPC: /* Disk full */
d025f1e4 609 log_debug("%s: Allocation limit reached, rotating.", f->path);
6e1045e5 610 return true;
ae739cc1 611
6e1045e5
ZJS
612 case -EIO: /* I/O error of some kind (mmap) */
613 log_warning("%s: IO error, rotating.", f->path);
614 return true;
ae739cc1 615
6e1045e5 616 case -EHOSTDOWN: /* Other machine */
d025f1e4 617 log_info("%s: Journal file from other machine, rotating.", f->path);
6e1045e5 618 return true;
ae739cc1 619
6e1045e5 620 case -EBUSY: /* Unclean shutdown */
d025f1e4 621 log_info("%s: Unclean shutdown, rotating.", f->path);
6e1045e5 622 return true;
ae739cc1 623
6e1045e5 624 case -EPROTONOSUPPORT: /* Unsupported feature */
d025f1e4 625 log_info("%s: Unsupported feature, rotating.", f->path);
6e1045e5 626 return true;
ae739cc1 627
6e1045e5
ZJS
628 case -EBADMSG: /* Corrupted */
629 case -ENODATA: /* Truncated */
630 case -ESHUTDOWN: /* Already archived */
d025f1e4 631 log_warning("%s: Journal file corrupted, rotating.", f->path);
6e1045e5 632 return true;
ae739cc1 633
6e1045e5 634 case -EIDRM: /* Journal file has been deleted */
2678031a 635 log_warning("%s: Journal file has been deleted, rotating.", f->path);
6e1045e5 636 return true;
ae739cc1
LP
637
638 case -ETXTBSY: /* Journal file is from the future */
c1a9199e 639 log_warning("%s: Journal file is from the future, rotating.", f->path);
ae739cc1
LP
640 return true;
641
6e1045e5 642 default:
d025f1e4 643 return false;
6e1045e5 644 }
d025f1e4
ZJS
645}
646
da6053d0 647static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, size_t n, int priority) {
7c070017 648 bool vacuumed = false, rotate = false;
0f972d66 649 struct dual_timestamp ts;
d025f1e4 650 JournalFile *f;
d025f1e4
ZJS
651 int r;
652
653 assert(s);
654 assert(iovec);
655 assert(n > 0);
656
0f972d66
LP
657 /* Get the closest, linearized time we have for this log event from the event loop. (Note that we do not use
658 * the source time, and not even the time the event was originally seen, but instead simply the time we started
659 * processing it, as we want strictly linear ordering in what we write out.) */
660 assert_se(sd_event_now(s->event, CLOCK_REALTIME, &ts.realtime) >= 0);
661 assert_se(sd_event_now(s->event, CLOCK_MONOTONIC, &ts.monotonic) >= 0);
662
7c070017
LP
663 if (ts.realtime < s->last_realtime_clock) {
664 /* When the time jumps backwards, let's immediately rotate. Of course, this should not happen during
665 * regular operation. However, when it does happen, then we should make sure that we start fresh files
666 * to ensure that the entries in the journal files are strictly ordered by time, in order to ensure
667 * bisection works correctly. */
d025f1e4 668
7c070017
LP
669 log_debug("Time jumped backwards, rotating.");
670 rotate = true;
671 } else {
672
673 f = find_journal(s, uid);
674 if (!f)
675 return;
676
677 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
678 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
679 rotate = true;
680 }
681 }
d025f1e4 682
7c070017 683 if (rotate) {
d025f1e4 684 server_rotate(s);
3a19f215 685 server_vacuum(s, false);
d025f1e4
ZJS
686 vacuumed = true;
687
688 f = find_journal(s, uid);
689 if (!f)
690 return;
691 }
692
7c070017
LP
693 s->last_realtime_clock = ts.realtime;
694
0f972d66 695 r = journal_file_append_entry(f, &ts, iovec, n, &s->seqnum, NULL, NULL);
26687bf8 696 if (r >= 0) {
d07f7b9e 697 server_schedule_sync(s, priority);
d025f1e4 698 return;
26687bf8 699 }
d025f1e4
ZJS
700
701 if (vacuumed || !shall_try_append_again(f, r)) {
da6053d0 702 log_error_errno(r, "Failed to write entry (%zu items, %zu bytes), ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
d025f1e4
ZJS
703 return;
704 }
705
706 server_rotate(s);
3a19f215 707 server_vacuum(s, false);
d025f1e4
ZJS
708
709 f = find_journal(s, uid);
710 if (!f)
711 return;
712
713 log_debug("Retrying write.");
0f972d66 714 r = journal_file_append_entry(f, &ts, iovec, n, &s->seqnum, NULL, NULL);
8266e1c0 715 if (r < 0)
da6053d0 716 log_error_errno(r, "Failed to write entry (%zu items, %zu bytes) despite vacuuming, ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
8266e1c0 717 else
d07f7b9e 718 server_schedule_sync(s, priority);
d025f1e4
ZJS
719}
720
22e3a02b
LP
721#define IOVEC_ADD_NUMERIC_FIELD(iovec, n, value, type, isset, format, field) \
722 if (isset(value)) { \
723 char *k; \
fbd0b64f 724 k = newa(char, STRLEN(field "=") + DECIMAL_STR_MAX(type) + 1); \
22e3a02b 725 sprintf(k, field "=" format, value); \
e6a7ec4b 726 iovec[n++] = IOVEC_MAKE_STRING(k); \
22e3a02b 727 }
4b58153d 728
22e3a02b
LP
729#define IOVEC_ADD_STRING_FIELD(iovec, n, value, field) \
730 if (!isempty(value)) { \
731 char *k; \
732 k = strjoina(field "=", value); \
e6a7ec4b 733 iovec[n++] = IOVEC_MAKE_STRING(k); \
22e3a02b 734 }
4b58153d 735
22e3a02b
LP
736#define IOVEC_ADD_ID128_FIELD(iovec, n, value, field) \
737 if (!sd_id128_is_null(value)) { \
738 char *k; \
fbd0b64f 739 k = newa(char, STRLEN(field "=") + SD_ID128_STRING_MAX); \
22e3a02b 740 sd_id128_to_string(value, stpcpy(k, field "=")); \
e6a7ec4b 741 iovec[n++] = IOVEC_MAKE_STRING(k); \
22e3a02b 742 }
4b58153d 743
22e3a02b
LP
744#define IOVEC_ADD_SIZED_FIELD(iovec, n, value, value_size, field) \
745 if (value_size > 0) { \
746 char *k; \
fbd0b64f 747 k = newa(char, STRLEN(field "=") + value_size + 1); \
22e3a02b 748 *((char*) mempcpy(stpcpy(k, field "="), value, value_size)) = 0; \
e6a7ec4b 749 iovec[n++] = IOVEC_MAKE_STRING(k); \
22e3a02b 750 } \
4b58153d 751
d025f1e4
ZJS
752static void dispatch_message_real(
753 Server *s,
d3070fbd 754 struct iovec *iovec, size_t n, size_t m,
22e3a02b 755 const ClientContext *c,
3b3154df 756 const struct timeval *tv,
d07f7b9e 757 int priority,
22e3a02b
LP
758 pid_t object_pid) {
759
760 char source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)];
761 uid_t journal_uid;
762 ClientContext *o;
d025f1e4
ZJS
763
764 assert(s);
765 assert(iovec);
766 assert(n > 0);
d3070fbd
LP
767 assert(n +
768 N_IOVEC_META_FIELDS +
769 (pid_is_valid(object_pid) ? N_IOVEC_OBJECT_FIELDS : 0) +
770 client_context_extra_fields_n_iovec(c) <= m);
19cace37 771
22e3a02b
LP
772 if (c) {
773 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->pid, pid_t, pid_is_valid, PID_FMT, "_PID");
774 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->uid, uid_t, uid_is_valid, UID_FMT, "_UID");
775 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->gid, gid_t, gid_is_valid, GID_FMT, "_GID");
4b58153d 776
22e3a02b
LP
777 IOVEC_ADD_STRING_FIELD(iovec, n, c->comm, "_COMM");
778 IOVEC_ADD_STRING_FIELD(iovec, n, c->exe, "_EXE");
779 IOVEC_ADD_STRING_FIELD(iovec, n, c->cmdline, "_CMDLINE");
780 IOVEC_ADD_STRING_FIELD(iovec, n, c->capeff, "_CAP_EFFECTIVE");
d025f1e4 781
22e3a02b 782 IOVEC_ADD_SIZED_FIELD(iovec, n, c->label, c->label_size, "_SELINUX_CONTEXT");
ae018d9b 783
22e3a02b
LP
784 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->auditid, uint32_t, audit_session_is_valid, "%" PRIu32, "_AUDIT_SESSION");
785 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->loginuid, uid_t, uid_is_valid, UID_FMT, "_AUDIT_LOGINUID");
d025f1e4 786
22e3a02b
LP
787 IOVEC_ADD_STRING_FIELD(iovec, n, c->cgroup, "_SYSTEMD_CGROUP");
788 IOVEC_ADD_STRING_FIELD(iovec, n, c->session, "_SYSTEMD_SESSION");
789 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->owner_uid, uid_t, uid_is_valid, UID_FMT, "_SYSTEMD_OWNER_UID");
790 IOVEC_ADD_STRING_FIELD(iovec, n, c->unit, "_SYSTEMD_UNIT");
791 IOVEC_ADD_STRING_FIELD(iovec, n, c->user_unit, "_SYSTEMD_USER_UNIT");
792 IOVEC_ADD_STRING_FIELD(iovec, n, c->slice, "_SYSTEMD_SLICE");
793 IOVEC_ADD_STRING_FIELD(iovec, n, c->user_slice, "_SYSTEMD_USER_SLICE");
e7ff4e7f 794
22e3a02b 795 IOVEC_ADD_ID128_FIELD(iovec, n, c->invocation_id, "_SYSTEMD_INVOCATION_ID");
d3070fbd
LP
796
797 if (c->extra_fields_n_iovec > 0) {
798 memcpy(iovec + n, c->extra_fields_iovec, c->extra_fields_n_iovec * sizeof(struct iovec));
799 n += c->extra_fields_n_iovec;
800 }
d025f1e4 801 }
968f3196 802
22e3a02b 803 assert(n <= m);
968f3196 804
22e3a02b 805 if (pid_is_valid(object_pid) && client_context_get(s, object_pid, NULL, NULL, 0, NULL, &o) >= 0) {
968f3196 806
22e3a02b
LP
807 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->pid, pid_t, pid_is_valid, PID_FMT, "OBJECT_PID");
808 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->uid, uid_t, uid_is_valid, UID_FMT, "OBJECT_UID");
809 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->gid, gid_t, gid_is_valid, GID_FMT, "OBJECT_GID");
968f3196 810
22e3a02b
LP
811 IOVEC_ADD_STRING_FIELD(iovec, n, o->comm, "OBJECT_COMM");
812 IOVEC_ADD_STRING_FIELD(iovec, n, o->exe, "OBJECT_EXE");
813 IOVEC_ADD_STRING_FIELD(iovec, n, o->cmdline, "OBJECT_CMDLINE");
814 IOVEC_ADD_STRING_FIELD(iovec, n, o->capeff, "OBJECT_CAP_EFFECTIVE");
968f3196 815
22e3a02b 816 IOVEC_ADD_SIZED_FIELD(iovec, n, o->label, o->label_size, "OBJECT_SELINUX_CONTEXT");
19cace37 817
22e3a02b
LP
818 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->auditid, uint32_t, audit_session_is_valid, "%" PRIu32, "OBJECT_AUDIT_SESSION");
819 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->loginuid, uid_t, uid_is_valid, UID_FMT, "OBJECT_AUDIT_LOGINUID");
d473176a 820
22e3a02b
LP
821 IOVEC_ADD_STRING_FIELD(iovec, n, o->cgroup, "OBJECT_SYSTEMD_CGROUP");
822 IOVEC_ADD_STRING_FIELD(iovec, n, o->session, "OBJECT_SYSTEMD_SESSION");
823 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->owner_uid, uid_t, uid_is_valid, UID_FMT, "OBJECT_SYSTEMD_OWNER_UID");
824 IOVEC_ADD_STRING_FIELD(iovec, n, o->unit, "OBJECT_SYSTEMD_UNIT");
825 IOVEC_ADD_STRING_FIELD(iovec, n, o->user_unit, "OBJECT_SYSTEMD_USER_UNIT");
826 IOVEC_ADD_STRING_FIELD(iovec, n, o->slice, "OBJECT_SYSTEMD_SLICE");
827 IOVEC_ADD_STRING_FIELD(iovec, n, o->user_slice, "OBJECT_SYSTEMD_USER_SLICE");
d473176a 828
22e3a02b 829 IOVEC_ADD_ID128_FIELD(iovec, n, o->invocation_id, "OBJECT_SYSTEMD_INVOCATION_ID=");
968f3196 830 }
22e3a02b 831
968f3196 832 assert(n <= m);
d025f1e4
ZJS
833
834 if (tv) {
398a50cd 835 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=" USEC_FMT, timeval_load(tv));
e6a7ec4b 836 iovec[n++] = IOVEC_MAKE_STRING(source_time);
d025f1e4
ZJS
837 }
838
839 /* Note that strictly speaking storing the boot id here is
840 * redundant since the entry includes this in-line
841 * anyway. However, we need this indexed, too. */
0c24bb23 842 if (!isempty(s->boot_id_field))
e6a7ec4b 843 iovec[n++] = IOVEC_MAKE_STRING(s->boot_id_field);
d025f1e4 844
0c24bb23 845 if (!isempty(s->machine_id_field))
e6a7ec4b 846 iovec[n++] = IOVEC_MAKE_STRING(s->machine_id_field);
d025f1e4 847
0c24bb23 848 if (!isempty(s->hostname_field))
e6a7ec4b 849 iovec[n++] = IOVEC_MAKE_STRING(s->hostname_field);
d025f1e4
ZJS
850
851 assert(n <= m);
852
22e3a02b
LP
853 if (s->split_mode == SPLIT_UID && c && uid_is_valid(c->uid))
854 /* Split up strictly by (non-root) UID */
855 journal_uid = c->uid;
856 else if (s->split_mode == SPLIT_LOGIN && c && c->uid > 0 && uid_is_valid(c->owner_uid))
edc3797f
LP
857 /* Split up by login UIDs. We do this only if the
858 * realuid is not root, in order not to accidentally
859 * leak privileged information to the user that is
860 * logged by a privileged process that is part of an
7517e174 861 * unprivileged session. */
22e3a02b 862 journal_uid = c->owner_uid;
da499392
KS
863 else
864 journal_uid = 0;
759c945a 865
d07f7b9e 866 write_to_journal(s, journal_uid, iovec, n, priority);
d025f1e4
ZJS
867}
868
13181942 869void server_driver_message(Server *s, pid_t object_pid, const char *message_id, const char *format, ...) {
22e3a02b 870
d3070fbd
LP
871 struct iovec *iovec;
872 size_t n = 0, k, m;
d025f1e4 873 va_list ap;
22e3a02b 874 int r;
d025f1e4
ZJS
875
876 assert(s);
877 assert(format);
878
f643ae71 879 m = N_IOVEC_META_FIELDS + 5 + N_IOVEC_PAYLOAD_FIELDS + client_context_extra_fields_n_iovec(s->my_context) + N_IOVEC_OBJECT_FIELDS;
d3070fbd
LP
880 iovec = newa(struct iovec, m);
881
4850d39a 882 assert_cc(3 == LOG_FAC(LOG_DAEMON));
e6a7ec4b
LP
883 iovec[n++] = IOVEC_MAKE_STRING("SYSLOG_FACILITY=3");
884 iovec[n++] = IOVEC_MAKE_STRING("SYSLOG_IDENTIFIER=systemd-journald");
b6fa2555 885
e6a7ec4b 886 iovec[n++] = IOVEC_MAKE_STRING("_TRANSPORT=driver");
4850d39a 887 assert_cc(6 == LOG_INFO);
e6a7ec4b 888 iovec[n++] = IOVEC_MAKE_STRING("PRIORITY=6");
d025f1e4 889
2b044526 890 if (message_id)
e6a7ec4b 891 iovec[n++] = IOVEC_MAKE_STRING(message_id);
d3070fbd 892 k = n;
8a03c9ef
ZJS
893
894 va_start(ap, format);
d3070fbd 895 r = log_format_iovec(iovec, m, &n, false, 0, format, ap);
32917e33 896 /* Error handling below */
8a03c9ef
ZJS
897 va_end(ap);
898
32917e33 899 if (r >= 0)
d3070fbd 900 dispatch_message_real(s, iovec, n, m, s->my_context, NULL, LOG_INFO, object_pid);
8a03c9ef 901
d3070fbd
LP
902 while (k < n)
903 free(iovec[k++].iov_base);
32917e33
ZJS
904
905 if (r < 0) {
906 /* We failed to format the message. Emit a warning instead. */
907 char buf[LINE_MAX];
908
909 xsprintf(buf, "MESSAGE=Entry printing failed: %s", strerror(-r));
910
911 n = 3;
e6a7ec4b
LP
912 iovec[n++] = IOVEC_MAKE_STRING("PRIORITY=4");
913 iovec[n++] = IOVEC_MAKE_STRING(buf);
d3070fbd 914 dispatch_message_real(s, iovec, n, m, s->my_context, NULL, LOG_INFO, object_pid);
32917e33 915 }
d025f1e4
ZJS
916}
917
918void server_dispatch_message(
919 Server *s,
d3070fbd 920 struct iovec *iovec, size_t n, size_t m,
22e3a02b 921 ClientContext *c,
3b3154df 922 const struct timeval *tv,
968f3196
ZJS
923 int priority,
924 pid_t object_pid) {
d025f1e4 925
8580d1f7 926 uint64_t available = 0;
22e3a02b 927 int rl;
d025f1e4
ZJS
928
929 assert(s);
930 assert(iovec || n == 0);
931
932 if (n == 0)
933 return;
934
935 if (LOG_PRI(priority) > s->max_level_store)
936 return;
937
2f5df74a
HHPF
938 /* Stop early in case the information will not be stored
939 * in a journal. */
940 if (s->storage == STORAGE_NONE)
941 return;
942
22e3a02b
LP
943 if (c && c->unit) {
944 (void) determine_space(s, &available, NULL);
d025f1e4 945
22e3a02b
LP
946 rl = journal_rate_limit_test(s->rate_limit, c->unit, priority & LOG_PRIMASK, available);
947 if (rl == 0)
948 return;
d025f1e4 949
22e3a02b
LP
950 /* Write a suppression message if we suppressed something */
951 if (rl > 1)
13181942
LP
952 server_driver_message(s, c->pid,
953 "MESSAGE_ID=" SD_MESSAGE_JOURNAL_DROPPED_STR,
954 LOG_MESSAGE("Suppressed %i messages from %s", rl - 1, c->unit),
5908ff1c 955 "N_DROPPED=%i", rl - 1,
22e3a02b 956 NULL);
d025f1e4
ZJS
957 }
958
22e3a02b 959 dispatch_message_real(s, iovec, n, m, c, tv, priority, object_pid);
d025f1e4
ZJS
960}
961
f78273c8 962int server_flush_to_var(Server *s, bool require_flag_file) {
d025f1e4
ZJS
963 sd_id128_t machine;
964 sd_journal *j = NULL;
fbb63411
LP
965 char ts[FORMAT_TIMESPAN_MAX];
966 usec_t start;
967 unsigned n = 0;
968 int r;
d025f1e4
ZJS
969
970 assert(s);
971
f78273c8 972 if (!IN_SET(s->storage, STORAGE_AUTO, STORAGE_PERSISTENT))
d025f1e4
ZJS
973 return 0;
974
975 if (!s->runtime_journal)
976 return 0;
977
f78273c8
LP
978 if (require_flag_file && !flushed_flag_is_set())
979 return 0;
980
8580d1f7 981 (void) system_journal_open(s, true);
d025f1e4
ZJS
982
983 if (!s->system_journal)
984 return 0;
985
986 log_debug("Flushing to /var...");
987
fbb63411
LP
988 start = now(CLOCK_MONOTONIC);
989
d025f1e4 990 r = sd_id128_get_machine(&machine);
00a16861 991 if (r < 0)
d025f1e4 992 return r;
d025f1e4
ZJS
993
994 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
23bbb0de
MS
995 if (r < 0)
996 return log_error_errno(r, "Failed to read runtime journal: %m");
d025f1e4 997
93b73b06
LP
998 sd_journal_set_data_threshold(j, 0);
999
d025f1e4
ZJS
1000 SD_JOURNAL_FOREACH(j) {
1001 Object *o = NULL;
1002 JournalFile *f;
1003
1004 f = j->current_file;
1005 assert(f && f->current_offset > 0);
1006
fbb63411
LP
1007 n++;
1008
d025f1e4
ZJS
1009 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1010 if (r < 0) {
da927ba9 1011 log_error_errno(r, "Can't read entry: %m");
d025f1e4
ZJS
1012 goto finish;
1013 }
1014
5a271b08 1015 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset);
d025f1e4
ZJS
1016 if (r >= 0)
1017 continue;
1018
1019 if (!shall_try_append_again(s->system_journal, r)) {
da927ba9 1020 log_error_errno(r, "Can't write entry: %m");
d025f1e4
ZJS
1021 goto finish;
1022 }
1023
1024 server_rotate(s);
3a19f215 1025 server_vacuum(s, false);
d025f1e4 1026
253f59df
LP
1027 if (!s->system_journal) {
1028 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1029 r = -EIO;
1030 goto finish;
1031 }
1032
d025f1e4 1033 log_debug("Retrying write.");
5a271b08 1034 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset);
d025f1e4 1035 if (r < 0) {
da927ba9 1036 log_error_errno(r, "Can't write entry: %m");
d025f1e4
ZJS
1037 goto finish;
1038 }
1039 }
1040
804ae586
LP
1041 r = 0;
1042
d025f1e4
ZJS
1043finish:
1044 journal_file_post_change(s->system_journal);
1045
804ae586 1046 s->runtime_journal = journal_file_close(s->runtime_journal);
d025f1e4
ZJS
1047
1048 if (r >= 0)
c6878637 1049 (void) rm_rf("/run/log/journal", REMOVE_ROOT);
d025f1e4 1050
763c7aa2 1051 sd_journal_close(j);
d025f1e4 1052
13181942 1053 server_driver_message(s, 0, NULL,
8a03c9ef
ZJS
1054 LOG_MESSAGE("Time spent on flushing to /var is %s for %u entries.",
1055 format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0),
1056 n),
1057 NULL);
fbb63411 1058
d025f1e4
ZJS
1059 return r;
1060}
1061
8531ae70 1062int server_process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
f9a810be 1063 Server *s = userdata;
a315ac4e
LP
1064 struct ucred *ucred = NULL;
1065 struct timeval *tv = NULL;
1066 struct cmsghdr *cmsg;
1067 char *label = NULL;
1068 size_t label_len = 0, m;
1069 struct iovec iovec;
1070 ssize_t n;
1071 int *fds = NULL, v = 0;
da6053d0 1072 size_t n_fds = 0;
a315ac4e
LP
1073
1074 union {
1075 struct cmsghdr cmsghdr;
1076
1077 /* We use NAME_MAX space for the SELinux label
1078 * here. The kernel currently enforces no
1079 * limit, but according to suggestions from
1080 * the SELinux people this will change and it
1081 * will probably be identical to NAME_MAX. For
1082 * now we use that, but this should be updated
1083 * one day when the final limit is known. */
1084 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1085 CMSG_SPACE(sizeof(struct timeval)) +
1086 CMSG_SPACE(sizeof(int)) + /* fd */
1087 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1088 } control = {};
1089
1090 union sockaddr_union sa = {};
1091
1092 struct msghdr msghdr = {
1093 .msg_iov = &iovec,
1094 .msg_iovlen = 1,
1095 .msg_control = &control,
1096 .msg_controllen = sizeof(control),
1097 .msg_name = &sa,
1098 .msg_namelen = sizeof(sa),
1099 };
f9a810be 1100
d025f1e4 1101 assert(s);
875c2e22 1102 assert(fd == s->native_fd || fd == s->syslog_fd || fd == s->audit_fd);
f9a810be
LP
1103
1104 if (revents != EPOLLIN) {
1105 log_error("Got invalid event from epoll for datagram fd: %"PRIx32, revents);
1106 return -EIO;
1107 }
1108
22e3a02b
LP
1109 /* Try to get the right size, if we can. (Not all sockets support SIOCINQ, hence we just try, but don't rely on
1110 * it.) */
a315ac4e 1111 (void) ioctl(fd, SIOCINQ, &v);
d025f1e4 1112
a315ac4e
LP
1113 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1114 m = PAGE_ALIGN(MAX3((size_t) v + 1,
1115 (size_t) LINE_MAX,
1116 ALIGN(sizeof(struct nlmsghdr)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH)) + 1);
d025f1e4 1117
a315ac4e
LP
1118 if (!GREEDY_REALLOC(s->buffer, s->buffer_size, m))
1119 return log_oom();
875c2e22 1120
a315ac4e
LP
1121 iovec.iov_base = s->buffer;
1122 iovec.iov_len = s->buffer_size - 1; /* Leave room for trailing NUL we add later */
d025f1e4 1123
a315ac4e
LP
1124 n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1125 if (n < 0) {
3742095b 1126 if (IN_SET(errno, EINTR, EAGAIN))
a315ac4e 1127 return 0;
875c2e22 1128
a315ac4e
LP
1129 return log_error_errno(errno, "recvmsg() failed: %m");
1130 }
875c2e22 1131
a315ac4e
LP
1132 CMSG_FOREACH(cmsg, &msghdr) {
1133
1134 if (cmsg->cmsg_level == SOL_SOCKET &&
1135 cmsg->cmsg_type == SCM_CREDENTIALS &&
1136 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1137 ucred = (struct ucred*) CMSG_DATA(cmsg);
1138 else if (cmsg->cmsg_level == SOL_SOCKET &&
1139 cmsg->cmsg_type == SCM_SECURITY) {
1140 label = (char*) CMSG_DATA(cmsg);
1141 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1142 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1143 cmsg->cmsg_type == SO_TIMESTAMP &&
1144 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1145 tv = (struct timeval*) CMSG_DATA(cmsg);
1146 else if (cmsg->cmsg_level == SOL_SOCKET &&
1147 cmsg->cmsg_type == SCM_RIGHTS) {
1148 fds = (int*) CMSG_DATA(cmsg);
1149 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
d025f1e4 1150 }
a315ac4e 1151 }
d025f1e4 1152
a315ac4e
LP
1153 /* And a trailing NUL, just in case */
1154 s->buffer[n] = 0;
1155
1156 if (fd == s->syslog_fd) {
1157 if (n > 0 && n_fds == 0)
1158 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1159 else if (n_fds > 0)
1160 log_warning("Got file descriptors via syslog socket. Ignoring.");
1161
1162 } else if (fd == s->native_fd) {
1163 if (n > 0 && n_fds == 0)
1164 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1165 else if (n == 0 && n_fds == 1)
1166 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1167 else if (n_fds > 0)
1168 log_warning("Got too many file descriptors via native socket. Ignoring.");
1169
1170 } else {
1171 assert(fd == s->audit_fd);
1172
1173 if (n > 0 && n_fds == 0)
1174 server_process_audit_message(s, s->buffer, n, ucred, &sa, msghdr.msg_namelen);
1175 else if (n_fds > 0)
1176 log_warning("Got file descriptors via audit socket. Ignoring.");
f9a810be 1177 }
a315ac4e
LP
1178
1179 close_many(fds, n_fds);
1180 return 0;
f9a810be 1181}
d025f1e4 1182
f9a810be
LP
1183static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1184 Server *s = userdata;
33d52ab9 1185 int r;
d025f1e4 1186
f9a810be 1187 assert(s);
d025f1e4 1188
94b65516 1189 log_info("Received request to flush runtime journal from PID " PID_FMT, si->ssi_pid);
d025f1e4 1190
f78273c8 1191 (void) server_flush_to_var(s, false);
f9a810be 1192 server_sync(s);
3a19f215 1193 server_vacuum(s, false);
d025f1e4 1194
33d52ab9
LP
1195 r = touch("/run/systemd/journal/flushed");
1196 if (r < 0)
1197 log_warning_errno(r, "Failed to touch /run/systemd/journal/flushed, ignoring: %m");
74055aa7 1198
18e758bf 1199 server_space_usage_message(s, NULL);
f9a810be
LP
1200 return 0;
1201}
d025f1e4 1202
f9a810be
LP
1203static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1204 Server *s = userdata;
33d52ab9 1205 int r;
d025f1e4 1206
f9a810be 1207 assert(s);
d025f1e4 1208
94b65516 1209 log_info("Received request to rotate journal from PID " PID_FMT, si->ssi_pid);
f9a810be 1210 server_rotate(s);
3a19f215
FB
1211 server_vacuum(s, true);
1212
1213 if (s->system_journal)
1214 patch_min_use(&s->system_storage);
1215 if (s->runtime_journal)
1216 patch_min_use(&s->runtime_storage);
d025f1e4 1217
dbd6e31c 1218 /* Let clients know when the most recent rotation happened. */
33d52ab9
LP
1219 r = write_timestamp_file_atomic("/run/systemd/journal/rotated", now(CLOCK_MONOTONIC));
1220 if (r < 0)
1221 log_warning_errno(r, "Failed to write /run/systemd/journal/rotated, ignoring: %m");
dbd6e31c 1222
f9a810be
LP
1223 return 0;
1224}
d025f1e4 1225
f9a810be
LP
1226static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1227 Server *s = userdata;
d025f1e4 1228
f9a810be 1229 assert(s);
d025f1e4 1230
4daf54a8 1231 log_received_signal(LOG_INFO, si);
d025f1e4 1232
6203e07a 1233 sd_event_exit(s->event, 0);
d025f1e4
ZJS
1234 return 0;
1235}
1236
94b65516
LP
1237static int dispatch_sigrtmin1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1238 Server *s = userdata;
33d52ab9 1239 int r;
94b65516
LP
1240
1241 assert(s);
1242
1243 log_debug("Received request to sync from PID " PID_FMT, si->ssi_pid);
1244
1245 server_sync(s);
1246
1247 /* Let clients know when the most recent sync happened. */
33d52ab9
LP
1248 r = write_timestamp_file_atomic("/run/systemd/journal/synced", now(CLOCK_MONOTONIC));
1249 if (r < 0)
1250 log_warning_errno(r, "Failed to write /run/systemd/journal/synced, ignoring: %m");
94b65516
LP
1251
1252 return 0;
1253}
1254
f9a810be 1255static int setup_signals(Server *s) {
f9a810be 1256 int r;
d025f1e4
ZJS
1257
1258 assert(s);
1259
9bab3b65 1260 assert_se(sigprocmask_many(SIG_SETMASK, NULL, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, SIGRTMIN+1, -1) >= 0);
d025f1e4 1261
151b9b96 1262 r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
f9a810be
LP
1263 if (r < 0)
1264 return r;
1265
151b9b96 1266 r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s);
f9a810be
LP
1267 if (r < 0)
1268 return r;
d025f1e4 1269
151b9b96 1270 r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s);
f9a810be
LP
1271 if (r < 0)
1272 return r;
d025f1e4 1273
b374689c
LP
1274 /* Let's process SIGTERM late, so that we flush all queued
1275 * messages to disk before we exit */
1276 r = sd_event_source_set_priority(s->sigterm_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1277 if (r < 0)
1278 return r;
1279
1280 /* When journald is invoked on the terminal (when debugging),
1281 * it's useful if C-c is handled equivalent to SIGTERM. */
151b9b96 1282 r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s);
f9a810be
LP
1283 if (r < 0)
1284 return r;
d025f1e4 1285
b374689c
LP
1286 r = sd_event_source_set_priority(s->sigint_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1287 if (r < 0)
1288 return r;
1289
94b65516
LP
1290 /* SIGRTMIN+1 causes an immediate sync. We process this very
1291 * late, so that everything else queued at this point is
1292 * really written to disk. Clients can watch
1293 * /run/systemd/journal/synced with inotify until its mtime
1294 * changes to see when a sync happened. */
1295 r = sd_event_add_signal(s->event, &s->sigrtmin1_event_source, SIGRTMIN+1, dispatch_sigrtmin1, s);
1296 if (r < 0)
1297 return r;
1298
1299 r = sd_event_source_set_priority(s->sigrtmin1_event_source, SD_EVENT_PRIORITY_NORMAL+15);
1300 if (r < 0)
1301 return r;
1302
d025f1e4
ZJS
1303 return 0;
1304}
1305
5707ecf3
ZJS
1306static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
1307 Server *s = data;
74df0fca 1308 int r;
d025f1e4 1309
5707ecf3 1310 assert(s);
d025f1e4 1311
1d84ad94
LP
1312 if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_syslog")) {
1313
5707ecf3 1314 r = value ? parse_boolean(value) : true;
d581d9d9 1315 if (r < 0)
5707ecf3
ZJS
1316 log_warning("Failed to parse forward to syslog switch \"%s\". Ignoring.", value);
1317 else
1318 s->forward_to_syslog = r;
1d84ad94
LP
1319
1320 } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_kmsg")) {
1321
5707ecf3
ZJS
1322 r = value ? parse_boolean(value) : true;
1323 if (r < 0)
1324 log_warning("Failed to parse forward to kmsg switch \"%s\". Ignoring.", value);
1325 else
1326 s->forward_to_kmsg = r;
1d84ad94
LP
1327
1328 } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_console")) {
1329
5707ecf3
ZJS
1330 r = value ? parse_boolean(value) : true;
1331 if (r < 0)
1332 log_warning("Failed to parse forward to console switch \"%s\". Ignoring.", value);
1333 else
1334 s->forward_to_console = r;
1d84ad94
LP
1335
1336 } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_wall")) {
1337
5707ecf3
ZJS
1338 r = value ? parse_boolean(value) : true;
1339 if (r < 0)
1340 log_warning("Failed to parse forward to wall switch \"%s\". Ignoring.", value);
1341 else
1342 s->forward_to_wall = r;
1d84ad94
LP
1343
1344 } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_console")) {
1345
1346 if (proc_cmdline_value_missing(key, value))
1347 return 0;
1348
5707ecf3
ZJS
1349 r = log_level_from_string(value);
1350 if (r < 0)
1351 log_warning("Failed to parse max level console value \"%s\". Ignoring.", value);
1352 else
1353 s->max_level_console = r;
1d84ad94
LP
1354
1355 } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_store")) {
1356
1357 if (proc_cmdline_value_missing(key, value))
1358 return 0;
1359
5707ecf3
ZJS
1360 r = log_level_from_string(value);
1361 if (r < 0)
1362 log_warning("Failed to parse max level store value \"%s\". Ignoring.", value);
1363 else
1364 s->max_level_store = r;
1d84ad94
LP
1365
1366 } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_syslog")) {
1367
1368 if (proc_cmdline_value_missing(key, value))
1369 return 0;
1370
5707ecf3
ZJS
1371 r = log_level_from_string(value);
1372 if (r < 0)
1373 log_warning("Failed to parse max level syslog value \"%s\". Ignoring.", value);
1374 else
1375 s->max_level_syslog = r;
1d84ad94
LP
1376
1377 } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_kmsg")) {
1378
1379 if (proc_cmdline_value_missing(key, value))
1380 return 0;
1381
5707ecf3
ZJS
1382 r = log_level_from_string(value);
1383 if (r < 0)
1384 log_warning("Failed to parse max level kmsg value \"%s\". Ignoring.", value);
1385 else
1386 s->max_level_kmsg = r;
1d84ad94
LP
1387
1388 } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_wall")) {
1389
1390 if (proc_cmdline_value_missing(key, value))
1391 return 0;
1392
5707ecf3
ZJS
1393 r = log_level_from_string(value);
1394 if (r < 0)
1395 log_warning("Failed to parse max level wall value \"%s\". Ignoring.", value);
1396 else
1397 s->max_level_wall = r;
1d84ad94 1398
5707ecf3
ZJS
1399 } else if (startswith(key, "systemd.journald"))
1400 log_warning("Unknown journald kernel command line option \"%s\". Ignoring.", key);
d025f1e4 1401
804ae586 1402 /* do not warn about state here, since probably systemd already did */
db91ea32 1403 return 0;
d025f1e4
ZJS
1404}
1405
1406static int server_parse_config_file(Server *s) {
d025f1e4
ZJS
1407 assert(s);
1408
43688c49 1409 return config_parse_many_nulstr(PKGSYSCONFDIR "/journald.conf",
da412854
YW
1410 CONF_PATHS_NULSTR("systemd/journald.conf.d"),
1411 "Journal\0",
1412 config_item_perf_lookup, journald_gperf_lookup,
bcde742e 1413 CONFIG_PARSE_WARN, s);
d025f1e4
ZJS
1414}
1415
f9a810be
LP
1416static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
1417 Server *s = userdata;
26687bf8
OS
1418
1419 assert(s);
1420
f9a810be 1421 server_sync(s);
26687bf8
OS
1422 return 0;
1423}
1424
d07f7b9e 1425int server_schedule_sync(Server *s, int priority) {
26687bf8
OS
1426 int r;
1427
26687bf8
OS
1428 assert(s);
1429
d07f7b9e
LP
1430 if (priority <= LOG_CRIT) {
1431 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1432 server_sync(s);
1433 return 0;
1434 }
1435
26687bf8
OS
1436 if (s->sync_scheduled)
1437 return 0;
1438
f9a810be
LP
1439 if (s->sync_interval_usec > 0) {
1440 usec_t when;
ca267016 1441
6a0f1f6d 1442 r = sd_event_now(s->event, CLOCK_MONOTONIC, &when);
f9a810be
LP
1443 if (r < 0)
1444 return r;
26687bf8 1445
f9a810be
LP
1446 when += s->sync_interval_usec;
1447
1448 if (!s->sync_event_source) {
6a0f1f6d
LP
1449 r = sd_event_add_time(
1450 s->event,
1451 &s->sync_event_source,
1452 CLOCK_MONOTONIC,
1453 when, 0,
1454 server_dispatch_sync, s);
f9a810be
LP
1455 if (r < 0)
1456 return r;
1457
1458 r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
1459 } else {
1460 r = sd_event_source_set_time(s->sync_event_source, when);
1461 if (r < 0)
1462 return r;
1463
1464 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
1465 }
26687bf8 1466 if (r < 0)
f9a810be 1467 return r;
26687bf8 1468
f9a810be
LP
1469 s->sync_scheduled = true;
1470 }
26687bf8
OS
1471
1472 return 0;
1473}
1474
0c24bb23
LP
1475static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1476 Server *s = userdata;
1477
1478 assert(s);
1479
1480 server_cache_hostname(s);
1481 return 0;
1482}
1483
1484static int server_open_hostname(Server *s) {
1485 int r;
1486
1487 assert(s);
1488
db4a47e9
LP
1489 s->hostname_fd = open("/proc/sys/kernel/hostname",
1490 O_RDONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
4a62c710
MS
1491 if (s->hostname_fd < 0)
1492 return log_error_errno(errno, "Failed to open /proc/sys/kernel/hostname: %m");
0c24bb23 1493
151b9b96 1494 r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s);
0c24bb23 1495 if (r < 0) {
28def94c
DR
1496 /* kernels prior to 3.2 don't support polling this file. Ignore
1497 * the failure. */
1498 if (r == -EPERM) {
e53fc357 1499 log_warning_errno(r, "Failed to register hostname fd in event loop, ignoring: %m");
03e334a1 1500 s->hostname_fd = safe_close(s->hostname_fd);
28def94c
DR
1501 return 0;
1502 }
1503
23bbb0de 1504 return log_error_errno(r, "Failed to register hostname fd in event loop: %m");
0c24bb23
LP
1505 }
1506
1507 r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
23bbb0de
MS
1508 if (r < 0)
1509 return log_error_errno(r, "Failed to adjust priority of host name event source: %m");
0c24bb23
LP
1510
1511 return 0;
1512}
1513
e22aa3d3
LP
1514static int dispatch_notify_event(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1515 Server *s = userdata;
1516 int r;
1517
1518 assert(s);
1519 assert(s->notify_event_source == es);
1520 assert(s->notify_fd == fd);
1521
e22aa3d3 1522 /* The $NOTIFY_SOCKET is writable again, now send exactly one
dd835265 1523 * message on it. Either it's the watchdog event, the initial
119e9655
LP
1524 * READY=1 event or an stdout stream event. If there's nothing
1525 * to write anymore, turn our event source off. The next time
1526 * there's something to send it will be turned on again. */
e22aa3d3
LP
1527
1528 if (!s->sent_notify_ready) {
1529 static const char p[] =
1530 "READY=1\n"
1531 "STATUS=Processing requests...";
1532 ssize_t l;
1533
1534 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1535 if (l < 0) {
1536 if (errno == EAGAIN)
1537 return 0;
1538
1539 return log_error_errno(errno, "Failed to send READY=1 notification message: %m");
1540 }
1541
1542 s->sent_notify_ready = true;
1543 log_debug("Sent READY=1 notification.");
1544
119e9655
LP
1545 } else if (s->send_watchdog) {
1546
1547 static const char p[] =
1548 "WATCHDOG=1";
1549
1550 ssize_t l;
1551
1552 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1553 if (l < 0) {
1554 if (errno == EAGAIN)
1555 return 0;
1556
1557 return log_error_errno(errno, "Failed to send WATCHDOG=1 notification message: %m");
1558 }
1559
1560 s->send_watchdog = false;
1561 log_debug("Sent WATCHDOG=1 notification.");
1562
e22aa3d3
LP
1563 } else if (s->stdout_streams_notify_queue)
1564 /* Dispatch one stream notification event */
1565 stdout_stream_send_notify(s->stdout_streams_notify_queue);
1566
61233823 1567 /* Leave us enabled if there's still more to do. */
119e9655 1568 if (s->send_watchdog || s->stdout_streams_notify_queue)
e22aa3d3
LP
1569 return 0;
1570
1571 /* There was nothing to do anymore, let's turn ourselves off. */
1572 r = sd_event_source_set_enabled(es, SD_EVENT_OFF);
1573 if (r < 0)
1574 return log_error_errno(r, "Failed to turn off notify event source: %m");
1575
1576 return 0;
1577}
1578
119e9655
LP
1579static int dispatch_watchdog(sd_event_source *es, uint64_t usec, void *userdata) {
1580 Server *s = userdata;
1581 int r;
1582
1583 assert(s);
1584
1585 s->send_watchdog = true;
1586
1587 r = sd_event_source_set_enabled(s->notify_event_source, SD_EVENT_ON);
1588 if (r < 0)
1589 log_warning_errno(r, "Failed to turn on notify event source: %m");
1590
1591 r = sd_event_source_set_time(s->watchdog_event_source, usec + s->watchdog_usec / 2);
1592 if (r < 0)
1593 return log_error_errno(r, "Failed to restart watchdog event source: %m");
1594
1595 r = sd_event_source_set_enabled(s->watchdog_event_source, SD_EVENT_ON);
1596 if (r < 0)
1597 return log_error_errno(r, "Failed to enable watchdog event source: %m");
1598
1599 return 0;
1600}
1601
e22aa3d3
LP
1602static int server_connect_notify(Server *s) {
1603 union sockaddr_union sa = {
1604 .un.sun_family = AF_UNIX,
1605 };
1606 const char *e;
1607 int r;
1608
1609 assert(s);
1610 assert(s->notify_fd < 0);
1611 assert(!s->notify_event_source);
1612
1613 /*
1614 So here's the problem: we'd like to send notification
1615 messages to PID 1, but we cannot do that via sd_notify(),
1616 since that's synchronous, and we might end up blocking on
1617 it. Specifically: given that PID 1 might block on
1618 dbus-daemon during IPC, and dbus-daemon is logging to us,
1619 and might hence block on us, we might end up in a deadlock
ccddd104 1620 if we block on sending PID 1 notification messages — by
e22aa3d3
LP
1621 generating a full blocking circle. To avoid this, let's
1622 create a non-blocking socket, and connect it to the
1623 notification socket, and then wait for POLLOUT before we
1624 send anything. This should efficiently avoid any deadlocks,
1625 as we'll never block on PID 1, hence PID 1 can safely block
1626 on dbus-daemon which can safely block on us again.
1627
1628 Don't think that this issue is real? It is, see:
1629 https://github.com/systemd/systemd/issues/1505
1630 */
1631
1632 e = getenv("NOTIFY_SOCKET");
1633 if (!e)
1634 return 0;
1635
4c701096 1636 if (!IN_SET(e[0], '@', '/') || e[1] == 0) {
e22aa3d3
LP
1637 log_error("NOTIFY_SOCKET set to an invalid value: %s", e);
1638 return -EINVAL;
1639 }
1640
1641 if (strlen(e) > sizeof(sa.un.sun_path)) {
1642 log_error("NOTIFY_SOCKET path too long: %s", e);
1643 return -EINVAL;
1644 }
1645
1646 s->notify_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
1647 if (s->notify_fd < 0)
1648 return log_error_errno(errno, "Failed to create notify socket: %m");
1649
1650 (void) fd_inc_sndbuf(s->notify_fd, NOTIFY_SNDBUF_SIZE);
1651
1652 strncpy(sa.un.sun_path, e, sizeof(sa.un.sun_path));
1653 if (sa.un.sun_path[0] == '@')
1654 sa.un.sun_path[0] = 0;
1655
fc2fffe7 1656 r = connect(s->notify_fd, &sa.sa, SOCKADDR_UN_LEN(sa.un));
e22aa3d3
LP
1657 if (r < 0)
1658 return log_error_errno(errno, "Failed to connect to notify socket: %m");
1659
1660 r = sd_event_add_io(s->event, &s->notify_event_source, s->notify_fd, EPOLLOUT, dispatch_notify_event, s);
1661 if (r < 0)
1662 return log_error_errno(r, "Failed to watch notification socket: %m");
1663
119e9655
LP
1664 if (sd_watchdog_enabled(false, &s->watchdog_usec) > 0) {
1665 s->send_watchdog = true;
1666
4de2402b 1667 r = sd_event_add_time(s->event, &s->watchdog_event_source, CLOCK_MONOTONIC, now(CLOCK_MONOTONIC) + s->watchdog_usec/2, s->watchdog_usec/4, dispatch_watchdog, s);
119e9655
LP
1668 if (r < 0)
1669 return log_error_errno(r, "Failed to add watchdog time event: %m");
1670 }
1671
e22aa3d3
LP
1672 /* This should fire pretty soon, which we'll use to send the
1673 * READY=1 event. */
1674
1675 return 0;
1676}
1677
d025f1e4 1678int server_init(Server *s) {
13790add 1679 _cleanup_fdset_free_ FDSet *fds = NULL;
d025f1e4 1680 int n, r, fd;
7d18d348 1681 bool no_sockets;
d025f1e4
ZJS
1682
1683 assert(s);
1684
1685 zero(*s);
e22aa3d3 1686 s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->audit_fd = s->hostname_fd = s->notify_fd = -1;
1b7cf0e5
AG
1687 s->compress.enabled = true;
1688 s->compress.threshold_bytes = (uint64_t) -1;
d025f1e4 1689 s->seal = true;
b2392ff3 1690 s->read_kmsg = true;
d025f1e4 1691
119e9655
LP
1692 s->watchdog_usec = USEC_INFINITY;
1693
26687bf8
OS
1694 s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1695 s->sync_scheduled = false;
1696
d025f1e4
ZJS
1697 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1698 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1699
40b71e89 1700 s->forward_to_wall = true;
d025f1e4 1701
e150e820
MB
1702 s->max_file_usec = DEFAULT_MAX_FILE_USEC;
1703
d025f1e4
ZJS
1704 s->max_level_store = LOG_DEBUG;
1705 s->max_level_syslog = LOG_DEBUG;
1706 s->max_level_kmsg = LOG_NOTICE;
1707 s->max_level_console = LOG_INFO;
40b71e89 1708 s->max_level_wall = LOG_EMERG;
d025f1e4 1709
ec20fe5f
LP
1710 s->line_max = DEFAULT_LINE_MAX;
1711
266a4700
FB
1712 journal_reset_metrics(&s->system_storage.metrics);
1713 journal_reset_metrics(&s->runtime_storage.metrics);
d025f1e4
ZJS
1714
1715 server_parse_config_file(s);
1d84ad94
LP
1716
1717 r = proc_cmdline_parse(parse_proc_cmdline_item, s, PROC_CMDLINE_STRIP_RD_PREFIX);
1718 if (r < 0)
1719 log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
8580d1f7 1720
d288f79f 1721 if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
b1389b0d
ZJS
1722 log_debug("Setting both rate limit interval and burst from "USEC_FMT",%u to 0,0",
1723 s->rate_limit_interval, s->rate_limit_burst);
d288f79f
ZJS
1724 s->rate_limit_interval = s->rate_limit_burst = 0;
1725 }
d025f1e4 1726
8580d1f7 1727 (void) mkdir_p("/run/systemd/journal", 0755);
d025f1e4 1728
43cf8388 1729 s->user_journals = ordered_hashmap_new(NULL);
d025f1e4
ZJS
1730 if (!s->user_journals)
1731 return log_oom();
1732
1733 s->mmap = mmap_cache_new();
1734 if (!s->mmap)
1735 return log_oom();
1736
b58c888f
VC
1737 s->deferred_closes = set_new(NULL);
1738 if (!s->deferred_closes)
1739 return log_oom();
1740
f9a810be 1741 r = sd_event_default(&s->event);
23bbb0de
MS
1742 if (r < 0)
1743 return log_error_errno(r, "Failed to create event loop: %m");
d025f1e4
ZJS
1744
1745 n = sd_listen_fds(true);
23bbb0de
MS
1746 if (n < 0)
1747 return log_error_errno(n, "Failed to read listening file descriptors from environment: %m");
d025f1e4
ZJS
1748
1749 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1750
1751 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1752
1753 if (s->native_fd >= 0) {
1754 log_error("Too many native sockets passed.");
1755 return -EINVAL;
1756 }
1757
1758 s->native_fd = fd;
1759
1760 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1761
1762 if (s->stdout_fd >= 0) {
1763 log_error("Too many stdout sockets passed.");
1764 return -EINVAL;
1765 }
1766
1767 s->stdout_fd = fd;
1768
03ee5c38
LP
1769 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0 ||
1770 sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/dev-log", 0) > 0) {
d025f1e4
ZJS
1771
1772 if (s->syslog_fd >= 0) {
1773 log_error("Too many /dev/log sockets passed.");
1774 return -EINVAL;
1775 }
1776
1777 s->syslog_fd = fd;
1778
875c2e22
LP
1779 } else if (sd_is_socket(fd, AF_NETLINK, SOCK_RAW, -1) > 0) {
1780
1781 if (s->audit_fd >= 0) {
1782 log_error("Too many audit sockets passed.");
1783 return -EINVAL;
1784 }
1785
1786 s->audit_fd = fd;
1787
4ec3cd73 1788 } else {
4ec3cd73 1789
13790add
LP
1790 if (!fds) {
1791 fds = fdset_new();
1792 if (!fds)
1793 return log_oom();
1794 }
4ec3cd73 1795
13790add
LP
1796 r = fdset_put(fds, fd);
1797 if (r < 0)
1798 return log_oom();
4ec3cd73 1799 }
d025f1e4
ZJS
1800 }
1801
15d91bff
ZJS
1802 /* Try to restore streams, but don't bother if this fails */
1803 (void) server_restore_streams(s, fds);
d025f1e4 1804
13790add
LP
1805 if (fdset_size(fds) > 0) {
1806 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds));
1807 fds = fdset_free(fds);
1808 }
1809
7d18d348
ZJS
1810 no_sockets = s->native_fd < 0 && s->stdout_fd < 0 && s->syslog_fd < 0 && s->audit_fd < 0;
1811
1812 /* always open stdout, syslog, native, and kmsg sockets */
37b7affe
ZJS
1813
1814 /* systemd-journald.socket: /run/systemd/journal/stdout */
15d91bff
ZJS
1815 r = server_open_stdout_socket(s);
1816 if (r < 0)
1817 return r;
1818
37b7affe 1819 /* systemd-journald-dev-log.socket: /run/systemd/journal/dev-log */
13790add 1820 r = server_open_syslog_socket(s);
d025f1e4
ZJS
1821 if (r < 0)
1822 return r;
1823
37b7affe 1824 /* systemd-journald.socket: /run/systemd/journal/socket */
13790add 1825 r = server_open_native_socket(s);
d025f1e4
ZJS
1826 if (r < 0)
1827 return r;
1828
b2392ff3 1829 /* /dev/kmsg */
d025f1e4
ZJS
1830 r = server_open_dev_kmsg(s);
1831 if (r < 0)
1832 return r;
1833
7d18d348
ZJS
1834 /* Unless we got *some* sockets and not audit, open audit socket */
1835 if (s->audit_fd >= 0 || no_sockets) {
1836 r = server_open_audit(s);
1837 if (r < 0)
1838 return r;
1839 }
875c2e22 1840
d025f1e4
ZJS
1841 r = server_open_kernel_seqnum(s);
1842 if (r < 0)
1843 return r;
1844
0c24bb23
LP
1845 r = server_open_hostname(s);
1846 if (r < 0)
1847 return r;
1848
f9a810be 1849 r = setup_signals(s);
d025f1e4
ZJS
1850 if (r < 0)
1851 return r;
1852
1853 s->udev = udev_new();
1854 if (!s->udev)
1855 return -ENOMEM;
1856
f9a810be 1857 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
d025f1e4
ZJS
1858 if (!s->rate_limit)
1859 return -ENOMEM;
1860
e9174f29
LP
1861 r = cg_get_root_path(&s->cgroup_root);
1862 if (r < 0)
1863 return r;
1864
0c24bb23
LP
1865 server_cache_hostname(s);
1866 server_cache_boot_id(s);
1867 server_cache_machine_id(s);
1868
266a4700
FB
1869 s->runtime_storage.name = "Runtime journal";
1870 s->system_storage.name = "System journal";
1871
605405c6
ZJS
1872 s->runtime_storage.path = strjoin("/run/log/journal/", SERVER_MACHINE_ID(s));
1873 s->system_storage.path = strjoin("/var/log/journal/", SERVER_MACHINE_ID(s));
266a4700
FB
1874 if (!s->runtime_storage.path || !s->system_storage.path)
1875 return -ENOMEM;
1876
e22aa3d3
LP
1877 (void) server_connect_notify(s);
1878
22e3a02b
LP
1879 (void) client_context_acquire_default(s);
1880
804ae586 1881 return system_journal_open(s, false);
d025f1e4
ZJS
1882}
1883
1884void server_maybe_append_tags(Server *s) {
349cc4a5 1885#if HAVE_GCRYPT
d025f1e4
ZJS
1886 JournalFile *f;
1887 Iterator i;
1888 usec_t n;
1889
1890 n = now(CLOCK_REALTIME);
1891
1892 if (s->system_journal)
1893 journal_file_maybe_append_tag(s->system_journal, n);
1894
43cf8388 1895 ORDERED_HASHMAP_FOREACH(f, s->user_journals, i)
d025f1e4
ZJS
1896 journal_file_maybe_append_tag(f, n);
1897#endif
1898}
1899
1900void server_done(Server *s) {
d025f1e4
ZJS
1901 assert(s);
1902
f9168190 1903 set_free_with_destructor(s->deferred_closes, journal_file_close);
b58c888f 1904
d025f1e4
ZJS
1905 while (s->stdout_streams)
1906 stdout_stream_free(s->stdout_streams);
1907
22e3a02b
LP
1908 client_context_flush_all(s);
1909
d025f1e4 1910 if (s->system_journal)
69a3a6fd 1911 (void) journal_file_close(s->system_journal);
d025f1e4
ZJS
1912
1913 if (s->runtime_journal)
69a3a6fd 1914 (void) journal_file_close(s->runtime_journal);
d025f1e4 1915
f9168190 1916 ordered_hashmap_free_with_destructor(s->user_journals, journal_file_close);
d025f1e4 1917
f9a810be
LP
1918 sd_event_source_unref(s->syslog_event_source);
1919 sd_event_source_unref(s->native_event_source);
1920 sd_event_source_unref(s->stdout_event_source);
1921 sd_event_source_unref(s->dev_kmsg_event_source);
875c2e22 1922 sd_event_source_unref(s->audit_event_source);
f9a810be
LP
1923 sd_event_source_unref(s->sync_event_source);
1924 sd_event_source_unref(s->sigusr1_event_source);
1925 sd_event_source_unref(s->sigusr2_event_source);
1926 sd_event_source_unref(s->sigterm_event_source);
1927 sd_event_source_unref(s->sigint_event_source);
94b65516 1928 sd_event_source_unref(s->sigrtmin1_event_source);
0c24bb23 1929 sd_event_source_unref(s->hostname_event_source);
e22aa3d3 1930 sd_event_source_unref(s->notify_event_source);
119e9655 1931 sd_event_source_unref(s->watchdog_event_source);
f9a810be 1932 sd_event_unref(s->event);
d025f1e4 1933
03e334a1
LP
1934 safe_close(s->syslog_fd);
1935 safe_close(s->native_fd);
1936 safe_close(s->stdout_fd);
1937 safe_close(s->dev_kmsg_fd);
875c2e22 1938 safe_close(s->audit_fd);
03e334a1 1939 safe_close(s->hostname_fd);
e22aa3d3 1940 safe_close(s->notify_fd);
0c24bb23 1941
d025f1e4
ZJS
1942 if (s->rate_limit)
1943 journal_rate_limit_free(s->rate_limit);
1944
1945 if (s->kernel_seqnum)
1946 munmap(s->kernel_seqnum, sizeof(uint64_t));
1947
1948 free(s->buffer);
1949 free(s->tty_path);
e9174f29 1950 free(s->cgroup_root);
99d0966e 1951 free(s->hostname_field);
c6e9e16f
ZJS
1952 free(s->runtime_storage.path);
1953 free(s->system_storage.path);
d025f1e4
ZJS
1954
1955 if (s->mmap)
1956 mmap_cache_unref(s->mmap);
1957
3e044c49 1958 udev_unref(s->udev);
d025f1e4 1959}
8580d1f7
LP
1960
1961static const char* const storage_table[_STORAGE_MAX] = {
1962 [STORAGE_AUTO] = "auto",
1963 [STORAGE_VOLATILE] = "volatile",
1964 [STORAGE_PERSISTENT] = "persistent",
1965 [STORAGE_NONE] = "none"
1966};
1967
1968DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
1969DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
1970
1971static const char* const split_mode_table[_SPLIT_MAX] = {
1972 [SPLIT_LOGIN] = "login",
1973 [SPLIT_UID] = "uid",
1974 [SPLIT_NONE] = "none",
1975};
1976
1977DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
1978DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
ec20fe5f
LP
1979
1980int config_parse_line_max(
1981 const char* unit,
1982 const char *filename,
1983 unsigned line,
1984 const char *section,
1985 unsigned section_line,
1986 const char *lvalue,
1987 int ltype,
1988 const char *rvalue,
1989 void *data,
1990 void *userdata) {
1991
1992 size_t *sz = data;
1993 int r;
1994
1995 assert(filename);
1996 assert(lvalue);
1997 assert(rvalue);
1998 assert(data);
1999
2000 if (isempty(rvalue))
2001 /* Empty assignment means default */
2002 *sz = DEFAULT_LINE_MAX;
2003 else {
2004 uint64_t v;
2005
2006 r = parse_size(rvalue, 1024, &v);
2007 if (r < 0) {
2008 log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse LineMax= value, ignoring: %s", rvalue);
2009 return 0;
2010 }
2011
2012 if (v < 79) {
2013 /* Why specify 79 here as minimum line length? Simply, because the most common traditional
2014 * terminal size is 80ch, and it might make sense to break one character before the natural
2015 * line break would occur on that. */
2016 log_syntax(unit, LOG_WARNING, filename, line, 0, "LineMax= too small, clamping to 79: %s", rvalue);
2017 *sz = 79;
2018 } else if (v > (uint64_t) (SSIZE_MAX-1)) {
2019 /* So, why specify SSIZE_MAX-1 here? Because that's one below the largest size value read()
2020 * can return, and we need one extra byte for the trailing NUL byte. Of course IRL such large
2021 * memory allocations will fail anyway, hence this limit is mostly theoretical anyway, as we'll
2022 * fail much earlier anyway. */
2023 log_syntax(unit, LOG_WARNING, filename, line, 0, "LineMax= too large, clamping to %" PRIu64 ": %s", (uint64_t) (SSIZE_MAX-1), rvalue);
2024 *sz = SSIZE_MAX-1;
2025 } else
2026 *sz = (size_t) v;
2027 }
2028
2029 return 0;
2030}
1b7cf0e5
AG
2031
2032int config_parse_compress(const char* unit,
2033 const char *filename,
2034 unsigned line,
2035 const char *section,
2036 unsigned section_line,
2037 const char *lvalue,
2038 int ltype,
2039 const char *rvalue,
2040 void *data,
2041 void *userdata) {
2042 JournalCompressOptions* compress = data;
2043 int r;
2044
2045 if (streq(rvalue, "1")) {
2046 log_syntax(unit, LOG_WARNING, filename, line, 0,
2047 "Compress= ambiguously specified as 1, enabling compression with default threshold");
2048 compress->enabled = true;
2049 } else if (streq(rvalue, "0")) {
2050 log_syntax(unit, LOG_WARNING, filename, line, 0,
2051 "Compress= ambiguously specified as 0, disabling compression");
2052 compress->enabled = false;
2053 } else if ((r = parse_boolean(rvalue)) >= 0)
2054 compress->enabled = r;
2055 else if (parse_size(rvalue, 1024, &compress->threshold_bytes) == 0)
2056 compress->enabled = true;
2057 else if (isempty(rvalue)) {
2058 compress->enabled = true;
2059 compress->threshold_bytes = (uint64_t) -1;
2060 } else
2061 log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse Compress= value, ignoring: %s", rvalue);
2062
2063 return 0;
2064}