]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/journal/journald-server.c
socket-util: add sockaddr_un_set_path() helper
[thirdparty/systemd.git] / src / journal / journald-server.c
CommitLineData
53e1b683 1/* SPDX-License-Identifier: LGPL-2.1+ */
d025f1e4 2
349cc4a5 3#if HAVE_SELINUX
24882e06
LP
4#include <selinux/selinux.h>
5#endif
8580d1f7
LP
6#include <sys/ioctl.h>
7#include <sys/mman.h>
8#include <sys/signalfd.h>
9#include <sys/statvfs.h>
07630cea 10#include <linux/sockios.h>
24882e06 11
8580d1f7 12#include "sd-daemon.h"
74df0fca
LP
13#include "sd-journal.h"
14#include "sd-messages.h"
8580d1f7
LP
15
16#include "acl-util.h"
b5efdb8a 17#include "alloc-util.h"
430f0182 18#include "audit-util.h"
d025f1e4 19#include "cgroup-util.h"
d025f1e4 20#include "conf-parser.h"
a0956174 21#include "dirent-util.h"
0dec689b 22#include "extract-word.h"
3ffd4af2 23#include "fd-util.h"
33d52ab9 24#include "fileio.h"
f97b34a6 25#include "format-util.h"
f4f15635 26#include "fs-util.h"
8580d1f7 27#include "hashmap.h"
958b66ea 28#include "hostname-util.h"
4b58153d 29#include "id128-util.h"
afc5dbf3 30#include "io-util.h"
8580d1f7
LP
31#include "journal-authenticate.h"
32#include "journal-file.h"
d025f1e4
ZJS
33#include "journal-internal.h"
34#include "journal-vacuum.h"
8580d1f7 35#include "journald-audit.h"
22e3a02b 36#include "journald-context.h"
d025f1e4 37#include "journald-kmsg.h"
d025f1e4 38#include "journald-native.h"
8580d1f7 39#include "journald-rate-limit.h"
3ffd4af2 40#include "journald-server.h"
8580d1f7
LP
41#include "journald-stream.h"
42#include "journald-syslog.h"
4b58153d 43#include "log.h"
07630cea
LP
44#include "missing.h"
45#include "mkdir.h"
6bedfcbb 46#include "parse-util.h"
4e731273 47#include "proc-cmdline.h"
07630cea
LP
48#include "process-util.h"
49#include "rm-rf.h"
50#include "selinux-util.h"
51#include "signal-util.h"
52#include "socket-util.h"
32917e33 53#include "stdio-util.h"
8b43440b 54#include "string-table.h"
07630cea 55#include "string-util.h"
863a5610 56#include "syslog-util.h"
22e3a02b 57#include "user-util.h"
d025f1e4 58
d025f1e4
ZJS
59#define USER_JOURNALS_MAX 1024
60
26687bf8 61#define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
7f1ad696 62#define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
3de8ff5a 63#define DEFAULT_RATE_LIMIT_BURST 10000
e150e820 64#define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
d025f1e4 65
8580d1f7 66#define RECHECK_SPACE_USEC (30*USEC_PER_SEC)
d025f1e4 67
e22aa3d3
LP
68#define NOTIFY_SNDBUF_SIZE (8*1024*1024)
69
7a24f3bf
VC
70/* The period to insert between posting changes for coalescing */
71#define POST_CHANGE_TIMER_INTERVAL_USEC (250*USEC_PER_MSEC)
72
ec20fe5f
LP
73/* Pick a good default that is likely to fit into AF_UNIX and AF_INET SOCK_DGRAM datagrams, and even leaves some room
74 * for a bit of additional metadata. */
75#define DEFAULT_LINE_MAX (48*1024)
76
e0ed6db9
FB
77static int determine_path_usage(Server *s, const char *path, uint64_t *ret_used, uint64_t *ret_free) {
78 _cleanup_closedir_ DIR *d = NULL;
79 struct dirent *de;
80 struct statvfs ss;
e0ed6db9
FB
81
82 assert(ret_used);
83 assert(ret_free);
84
266a4700 85 d = opendir(path);
e0ed6db9
FB
86 if (!d)
87 return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR,
266a4700 88 errno, "Failed to open %s: %m", path);
e0ed6db9
FB
89
90 if (fstatvfs(dirfd(d), &ss) < 0)
266a4700 91 return log_error_errno(errno, "Failed to fstatvfs(%s): %m", path);
e0ed6db9
FB
92
93 *ret_free = ss.f_bsize * ss.f_bavail;
94 *ret_used = 0;
95 FOREACH_DIRENT_ALL(de, d, break) {
96 struct stat st;
97
98 if (!endswith(de->d_name, ".journal") &&
99 !endswith(de->d_name, ".journal~"))
100 continue;
101
102 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) {
266a4700 103 log_debug_errno(errno, "Failed to stat %s/%s, ignoring: %m", path, de->d_name);
e0ed6db9
FB
104 continue;
105 }
106
107 if (!S_ISREG(st.st_mode))
108 continue;
109
110 *ret_used += (uint64_t) st.st_blocks * 512UL;
111 }
112
113 return 0;
114}
115
a0edc477 116static void cache_space_invalidate(JournalStorageSpace *space) {
67319249 117 zero(*space);
a0edc477
FB
118}
119
57f443a6 120static int cache_space_refresh(Server *s, JournalStorage *storage) {
23aba343 121 JournalStorageSpace *space;
266a4700 122 JournalMetrics *metrics;
23aba343 123 uint64_t vfs_used, vfs_avail, avail;
d025f1e4 124 usec_t ts;
e0ed6db9 125 int r;
d025f1e4 126
8580d1f7 127 assert(s);
266a4700 128
266a4700 129 metrics = &storage->metrics;
23aba343 130 space = &storage->space;
d025f1e4 131
8580d1f7 132 ts = now(CLOCK_MONOTONIC);
d025f1e4 133
3099caf2 134 if (space->timestamp != 0 && space->timestamp + RECHECK_SPACE_USEC > ts)
d025f1e4
ZJS
135 return 0;
136
23aba343 137 r = determine_path_usage(s, storage->path, &vfs_used, &vfs_avail);
e0ed6db9
FB
138 if (r < 0)
139 return r;
d025f1e4 140
23aba343
FB
141 space->vfs_used = vfs_used;
142 space->vfs_available = vfs_avail;
143
144 avail = LESS_BY(vfs_avail, metrics->keep_free);
145
23aba343
FB
146 space->limit = MIN(MAX(vfs_used + avail, metrics->min_use), metrics->max_use);
147 space->available = LESS_BY(space->limit, vfs_used);
148 space->timestamp = ts;
8580d1f7
LP
149 return 1;
150}
151
3a19f215
FB
152static void patch_min_use(JournalStorage *storage) {
153 assert(storage);
154
155 /* Let's bump the min_use limit to the current usage on disk. We do
156 * this when starting up and first opening the journal files. This way
157 * sudden spikes in disk usage will not cause journald to vacuum files
158 * without bounds. Note that this means that only a restart of journald
159 * will make it reset this value. */
160
161 storage->metrics.min_use = MAX(storage->metrics.min_use, storage->space.vfs_used);
162}
163
3a19f215 164static int determine_space(Server *s, uint64_t *available, uint64_t *limit) {
266a4700 165 JournalStorage *js;
57f443a6 166 int r;
8580d1f7
LP
167
168 assert(s);
169
266a4700 170 js = s->system_journal ? &s->system_storage : &s->runtime_storage;
57f443a6
FB
171
172 r = cache_space_refresh(s, js);
173 if (r >= 0) {
174 if (available)
175 *available = js->space.available;
176 if (limit)
177 *limit = js->space.limit;
178 }
179 return r;
d025f1e4
ZJS
180}
181
cba5629e
FB
182void server_space_usage_message(Server *s, JournalStorage *storage) {
183 char fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
184 fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX], fb6[FORMAT_BYTES_MAX];
185 JournalMetrics *metrics;
cba5629e
FB
186
187 assert(s);
188
189 if (!storage)
190 storage = s->system_journal ? &s->system_storage : &s->runtime_storage;
191
57f443a6 192 if (cache_space_refresh(s, storage) < 0)
cba5629e
FB
193 return;
194
195 metrics = &storage->metrics;
23aba343 196 format_bytes(fb1, sizeof(fb1), storage->space.vfs_used);
cba5629e
FB
197 format_bytes(fb2, sizeof(fb2), metrics->max_use);
198 format_bytes(fb3, sizeof(fb3), metrics->keep_free);
23aba343 199 format_bytes(fb4, sizeof(fb4), storage->space.vfs_available);
cba5629e
FB
200 format_bytes(fb5, sizeof(fb5), storage->space.limit);
201 format_bytes(fb6, sizeof(fb6), storage->space.available);
202
13181942
LP
203 server_driver_message(s, 0,
204 "MESSAGE_ID=" SD_MESSAGE_JOURNAL_USAGE_STR,
cba5629e
FB
205 LOG_MESSAGE("%s (%s) is %s, max %s, %s free.",
206 storage->name, storage->path, fb1, fb5, fb6),
207 "JOURNAL_NAME=%s", storage->name,
208 "JOURNAL_PATH=%s", storage->path,
23aba343 209 "CURRENT_USE=%"PRIu64, storage->space.vfs_used,
cba5629e
FB
210 "CURRENT_USE_PRETTY=%s", fb1,
211 "MAX_USE=%"PRIu64, metrics->max_use,
212 "MAX_USE_PRETTY=%s", fb2,
213 "DISK_KEEP_FREE=%"PRIu64, metrics->keep_free,
214 "DISK_KEEP_FREE_PRETTY=%s", fb3,
23aba343 215 "DISK_AVAILABLE=%"PRIu64, storage->space.vfs_available,
cba5629e
FB
216 "DISK_AVAILABLE_PRETTY=%s", fb4,
217 "LIMIT=%"PRIu64, storage->space.limit,
218 "LIMIT_PRETTY=%s", fb5,
219 "AVAILABLE=%"PRIu64, storage->space.available,
220 "AVAILABLE_PRETTY=%s", fb6,
221 NULL);
222}
223
2fce06b0
LP
224static bool uid_for_system_journal(uid_t uid) {
225
226 /* Returns true if the specified UID shall get its data stored in the system journal*/
227
228 return uid_is_system(uid) || uid_is_dynamic(uid) || uid == UID_NOBODY;
229}
230
5c3bde3f 231static void server_add_acls(JournalFile *f, uid_t uid) {
349cc4a5 232#if HAVE_ACL
5c3bde3f 233 int r;
d025f1e4 234#endif
d025f1e4
ZJS
235 assert(f);
236
349cc4a5 237#if HAVE_ACL
2fce06b0 238 if (uid_for_system_journal(uid))
d025f1e4
ZJS
239 return;
240
5c3bde3f
ZJS
241 r = add_acls_for_user(f->fd, uid);
242 if (r < 0)
243 log_warning_errno(r, "Failed to set ACL on %s, ignoring: %m", f->path);
d025f1e4
ZJS
244#endif
245}
246
7a24f3bf
VC
247static int open_journal(
248 Server *s,
249 bool reliably,
250 const char *fname,
251 int flags,
252 bool seal,
253 JournalMetrics *metrics,
7a24f3bf
VC
254 JournalFile **ret) {
255 int r;
e167d7fd 256 JournalFile *f;
7a24f3bf
VC
257
258 assert(s);
259 assert(fname);
260 assert(ret);
261
262 if (reliably)
1b7cf0e5
AG
263 r = journal_file_open_reliably(fname, flags, 0640, s->compress.enabled, s->compress.threshold_bytes,
264 seal, metrics, s->mmap, s->deferred_closes, NULL, &f);
7a24f3bf 265 else
1b7cf0e5
AG
266 r = journal_file_open(-1, fname, flags, 0640, s->compress.enabled, s->compress.threshold_bytes, seal,
267 metrics, s->mmap, s->deferred_closes, NULL, &f);
268
7a24f3bf
VC
269 if (r < 0)
270 return r;
271
e167d7fd 272 r = journal_file_enable_post_change_timer(f, s->event, POST_CHANGE_TIMER_INTERVAL_USEC);
7a24f3bf 273 if (r < 0) {
69a3a6fd 274 (void) journal_file_close(f);
7a24f3bf
VC
275 return r;
276 }
277
e167d7fd 278 *ret = f;
7a24f3bf
VC
279 return r;
280}
281
6431c7e2 282static bool flushed_flag_is_set(void) {
f78273c8 283 return access("/run/systemd/journal/flushed", F_OK) >= 0;
6431c7e2
VC
284}
285
105bdb46
VC
286static int system_journal_open(Server *s, bool flush_requested) {
287 const char *fn;
288 int r = 0;
289
290 if (!s->system_journal &&
f78273c8
LP
291 IN_SET(s->storage, STORAGE_PERSISTENT, STORAGE_AUTO) &&
292 (flush_requested || flushed_flag_is_set())) {
105bdb46
VC
293
294 /* If in auto mode: first try to create the machine
295 * path, but not the prefix.
296 *
297 * If in persistent mode: create /var/log/journal and
298 * the machine path */
299
300 if (s->storage == STORAGE_PERSISTENT)
301 (void) mkdir_p("/var/log/journal/", 0755);
302
266a4700 303 (void) mkdir(s->system_storage.path, 0755);
105bdb46 304
266a4700
FB
305 fn = strjoina(s->system_storage.path, "/system.journal");
306 r = open_journal(s, true, fn, O_RDWR|O_CREAT, s->seal, &s->system_storage.metrics, &s->system_journal);
105bdb46
VC
307 if (r >= 0) {
308 server_add_acls(s->system_journal, 0);
57f443a6 309 (void) cache_space_refresh(s, &s->system_storage);
3a19f215 310 patch_min_use(&s->system_storage);
29bfb683 311 } else {
4c701096 312 if (!IN_SET(r, -ENOENT, -EROFS))
105bdb46
VC
313 log_warning_errno(r, "Failed to open system journal: %m");
314
315 r = 0;
316 }
929eeb54
VC
317
318 /* If the runtime journal is open, and we're post-flush, we're
319 * recovering from a failed system journal rotate (ENOSPC)
320 * for which the runtime journal was reopened.
321 *
322 * Perform an implicit flush to var, leaving the runtime
323 * journal closed, now that the system journal is back.
324 */
f78273c8
LP
325 if (!flush_requested)
326 (void) server_flush_to_var(s, true);
105bdb46
VC
327 }
328
329 if (!s->runtime_journal &&
330 (s->storage != STORAGE_NONE)) {
331
266a4700 332 fn = strjoina(s->runtime_storage.path, "/system.journal");
105bdb46
VC
333
334 if (s->system_journal) {
335
336 /* Try to open the runtime journal, but only
337 * if it already exists, so that we can flush
338 * it into the system journal */
339
266a4700 340 r = open_journal(s, false, fn, O_RDWR, false, &s->runtime_storage.metrics, &s->runtime_journal);
105bdb46
VC
341 if (r < 0) {
342 if (r != -ENOENT)
343 log_warning_errno(r, "Failed to open runtime journal: %m");
344
345 r = 0;
346 }
347
348 } else {
349
350 /* OK, we really need the runtime journal, so create
351 * it if necessary. */
352
353 (void) mkdir("/run/log", 0755);
354 (void) mkdir("/run/log/journal", 0755);
355 (void) mkdir_parents(fn, 0750);
356
266a4700 357 r = open_journal(s, true, fn, O_RDWR|O_CREAT, false, &s->runtime_storage.metrics, &s->runtime_journal);
105bdb46
VC
358 if (r < 0)
359 return log_error_errno(r, "Failed to open runtime journal: %m");
360 }
361
362 if (s->runtime_journal) {
363 server_add_acls(s->runtime_journal, 0);
57f443a6 364 (void) cache_space_refresh(s, &s->runtime_storage);
3a19f215 365 patch_min_use(&s->runtime_storage);
105bdb46
VC
366 }
367 }
368
369 return r;
370}
371
d025f1e4 372static JournalFile* find_journal(Server *s, uid_t uid) {
ed375beb 373 _cleanup_free_ char *p = NULL;
d025f1e4
ZJS
374 int r;
375 JournalFile *f;
376 sd_id128_t machine;
377
378 assert(s);
379
105bdb46
VC
380 /* A rotate that fails to create the new journal (ENOSPC) leaves the
381 * rotated journal as NULL. Unless we revisit opening, even after
382 * space is made available we'll continue to return NULL indefinitely.
383 *
384 * system_journal_open() is a noop if the journals are already open, so
385 * we can just call it here to recover from failed rotates (or anything
386 * else that's left the journals as NULL).
387 *
388 * Fixes https://github.com/systemd/systemd/issues/3968 */
389 (void) system_journal_open(s, false);
390
d025f1e4
ZJS
391 /* We split up user logs only on /var, not on /run. If the
392 * runtime file is open, we write to it exclusively, in order
393 * to guarantee proper order as soon as we flush /run to
394 * /var and close the runtime file. */
395
396 if (s->runtime_journal)
397 return s->runtime_journal;
398
2fce06b0 399 if (uid_for_system_journal(uid))
d025f1e4
ZJS
400 return s->system_journal;
401
402 r = sd_id128_get_machine(&machine);
403 if (r < 0)
404 return s->system_journal;
405
4a0b58c4 406 f = ordered_hashmap_get(s->user_journals, UID_TO_PTR(uid));
d025f1e4
ZJS
407 if (f)
408 return f;
409
de0671ee
ZJS
410 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-"UID_FMT".journal",
411 SD_ID128_FORMAT_VAL(machine), uid) < 0)
d025f1e4
ZJS
412 return s->system_journal;
413
43cf8388 414 while (ordered_hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
d025f1e4 415 /* Too many open? Then let's close one */
43cf8388 416 f = ordered_hashmap_steal_first(s->user_journals);
d025f1e4 417 assert(f);
69a3a6fd 418 (void) journal_file_close(f);
d025f1e4
ZJS
419 }
420
266a4700 421 r = open_journal(s, true, p, O_RDWR|O_CREAT, s->seal, &s->system_storage.metrics, &f);
d025f1e4
ZJS
422 if (r < 0)
423 return s->system_journal;
424
5c3bde3f 425 server_add_acls(f, uid);
d025f1e4 426
4a0b58c4 427 r = ordered_hashmap_put(s->user_journals, UID_TO_PTR(uid), f);
d025f1e4 428 if (r < 0) {
69a3a6fd 429 (void) journal_file_close(f);
d025f1e4
ZJS
430 return s->system_journal;
431 }
432
433 return f;
434}
435
ea69bd41
LP
436static int do_rotate(
437 Server *s,
438 JournalFile **f,
439 const char* name,
440 bool seal,
441 uint32_t uid) {
442
fc55baee
ZJS
443 int r;
444 assert(s);
445
446 if (!*f)
447 return -EINVAL;
448
1b7cf0e5 449 r = journal_file_rotate(f, s->compress.enabled, s->compress.threshold_bytes, seal, s->deferred_closes);
bb6b922f 450 if (r < 0) {
fc55baee 451 if (*f)
bb6b922f 452 return log_error_errno(r, "Failed to rotate %s: %m", (*f)->path);
fc55baee 453 else
bb6b922f
YW
454 return log_error_errno(r, "Failed to create new %s journal: %m", name);
455 }
456
457 server_add_acls(*f, uid);
2678031a 458
fc55baee
ZJS
459 return r;
460}
461
d025f1e4
ZJS
462void server_rotate(Server *s) {
463 JournalFile *f;
464 void *k;
465 Iterator i;
466 int r;
467
468 log_debug("Rotating...");
469
8580d1f7
LP
470 (void) do_rotate(s, &s->runtime_journal, "runtime", false, 0);
471 (void) do_rotate(s, &s->system_journal, "system", s->seal, 0);
d025f1e4 472
43cf8388 473 ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
4a0b58c4 474 r = do_rotate(s, &f, "user", s->seal, PTR_TO_UID(k));
fc55baee 475 if (r >= 0)
43cf8388 476 ordered_hashmap_replace(s->user_journals, k, f);
fc55baee
ZJS
477 else if (!f)
478 /* Old file has been closed and deallocated */
43cf8388 479 ordered_hashmap_remove(s->user_journals, k);
d025f1e4 480 }
b58c888f
VC
481
482 /* Perform any deferred closes which aren't still offlining. */
483 SET_FOREACH(f, s->deferred_closes, i)
484 if (!journal_file_is_offlining(f)) {
485 (void) set_remove(s->deferred_closes, f);
486 (void) journal_file_close(f);
487 }
d025f1e4
ZJS
488}
489
26687bf8
OS
490void server_sync(Server *s) {
491 JournalFile *f;
26687bf8
OS
492 Iterator i;
493 int r;
494
26687bf8 495 if (s->system_journal) {
ac2e41f5 496 r = journal_file_set_offline(s->system_journal, false);
26687bf8 497 if (r < 0)
65089b82 498 log_warning_errno(r, "Failed to sync system journal, ignoring: %m");
26687bf8
OS
499 }
500
65c1d46b 501 ORDERED_HASHMAP_FOREACH(f, s->user_journals, i) {
ac2e41f5 502 r = journal_file_set_offline(f, false);
26687bf8 503 if (r < 0)
65089b82 504 log_warning_errno(r, "Failed to sync user journal, ignoring: %m");
26687bf8
OS
505 }
506
f9a810be
LP
507 if (s->sync_event_source) {
508 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
509 if (r < 0)
da927ba9 510 log_error_errno(r, "Failed to disable sync timer source: %m");
f9a810be 511 }
26687bf8
OS
512
513 s->sync_scheduled = false;
514}
515
3a19f215 516static void do_vacuum(Server *s, JournalStorage *storage, bool verbose) {
ea69bd41 517
63c8666b
ZJS
518 int r;
519
8580d1f7 520 assert(s);
266a4700 521 assert(storage);
8580d1f7 522
57f443a6 523 (void) cache_space_refresh(s, storage);
18e758bf
FB
524
525 if (verbose)
526 server_space_usage_message(s, storage);
8580d1f7 527
57f443a6
FB
528 r = journal_directory_vacuum(storage->path, storage->space.limit,
529 storage->metrics.n_max_files, s->max_retention_usec,
530 &s->oldest_file_usec, verbose);
63c8666b 531 if (r < 0 && r != -ENOENT)
266a4700
FB
532 log_warning_errno(r, "Failed to vacuum %s, ignoring: %m", storage->path);
533
a0edc477 534 cache_space_invalidate(&storage->space);
63c8666b
ZJS
535}
536
3a19f215 537int server_vacuum(Server *s, bool verbose) {
8580d1f7 538 assert(s);
d025f1e4
ZJS
539
540 log_debug("Vacuuming...");
541
542 s->oldest_file_usec = 0;
543
266a4700 544 if (s->system_journal)
3a19f215 545 do_vacuum(s, &s->system_storage, verbose);
266a4700 546 if (s->runtime_journal)
3a19f215 547 do_vacuum(s, &s->runtime_storage, verbose);
d025f1e4 548
8580d1f7 549 return 0;
d025f1e4
ZJS
550}
551
0c24bb23
LP
552static void server_cache_machine_id(Server *s) {
553 sd_id128_t id;
554 int r;
555
556 assert(s);
557
558 r = sd_id128_get_machine(&id);
559 if (r < 0)
560 return;
561
562 sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
563}
564
565static void server_cache_boot_id(Server *s) {
566 sd_id128_t id;
567 int r;
568
569 assert(s);
570
571 r = sd_id128_get_boot(&id);
572 if (r < 0)
573 return;
574
575 sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
576}
577
578static void server_cache_hostname(Server *s) {
579 _cleanup_free_ char *t = NULL;
580 char *x;
581
582 assert(s);
583
584 t = gethostname_malloc();
585 if (!t)
586 return;
587
588 x = strappend("_HOSTNAME=", t);
589 if (!x)
590 return;
591
592 free(s->hostname_field);
593 s->hostname_field = x;
594}
595
8531ae70 596static bool shall_try_append_again(JournalFile *f, int r) {
6e1045e5 597 switch(r) {
ae739cc1 598
6e1045e5
ZJS
599 case -E2BIG: /* Hit configured limit */
600 case -EFBIG: /* Hit fs limit */
601 case -EDQUOT: /* Quota limit hit */
602 case -ENOSPC: /* Disk full */
d025f1e4 603 log_debug("%s: Allocation limit reached, rotating.", f->path);
6e1045e5 604 return true;
ae739cc1 605
6e1045e5
ZJS
606 case -EIO: /* I/O error of some kind (mmap) */
607 log_warning("%s: IO error, rotating.", f->path);
608 return true;
ae739cc1 609
6e1045e5 610 case -EHOSTDOWN: /* Other machine */
d025f1e4 611 log_info("%s: Journal file from other machine, rotating.", f->path);
6e1045e5 612 return true;
ae739cc1 613
6e1045e5 614 case -EBUSY: /* Unclean shutdown */
d025f1e4 615 log_info("%s: Unclean shutdown, rotating.", f->path);
6e1045e5 616 return true;
ae739cc1 617
6e1045e5 618 case -EPROTONOSUPPORT: /* Unsupported feature */
d025f1e4 619 log_info("%s: Unsupported feature, rotating.", f->path);
6e1045e5 620 return true;
ae739cc1 621
6e1045e5
ZJS
622 case -EBADMSG: /* Corrupted */
623 case -ENODATA: /* Truncated */
624 case -ESHUTDOWN: /* Already archived */
d025f1e4 625 log_warning("%s: Journal file corrupted, rotating.", f->path);
6e1045e5 626 return true;
ae739cc1 627
6e1045e5 628 case -EIDRM: /* Journal file has been deleted */
2678031a 629 log_warning("%s: Journal file has been deleted, rotating.", f->path);
6e1045e5 630 return true;
ae739cc1
LP
631
632 case -ETXTBSY: /* Journal file is from the future */
c1a9199e 633 log_warning("%s: Journal file is from the future, rotating.", f->path);
ae739cc1
LP
634 return true;
635
6e1045e5 636 default:
d025f1e4 637 return false;
6e1045e5 638 }
d025f1e4
ZJS
639}
640
da6053d0 641static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, size_t n, int priority) {
7c070017 642 bool vacuumed = false, rotate = false;
0f972d66 643 struct dual_timestamp ts;
d025f1e4 644 JournalFile *f;
d025f1e4
ZJS
645 int r;
646
647 assert(s);
648 assert(iovec);
649 assert(n > 0);
650
0f972d66
LP
651 /* Get the closest, linearized time we have for this log event from the event loop. (Note that we do not use
652 * the source time, and not even the time the event was originally seen, but instead simply the time we started
653 * processing it, as we want strictly linear ordering in what we write out.) */
654 assert_se(sd_event_now(s->event, CLOCK_REALTIME, &ts.realtime) >= 0);
655 assert_se(sd_event_now(s->event, CLOCK_MONOTONIC, &ts.monotonic) >= 0);
656
7c070017
LP
657 if (ts.realtime < s->last_realtime_clock) {
658 /* When the time jumps backwards, let's immediately rotate. Of course, this should not happen during
659 * regular operation. However, when it does happen, then we should make sure that we start fresh files
660 * to ensure that the entries in the journal files are strictly ordered by time, in order to ensure
661 * bisection works correctly. */
d025f1e4 662
7c070017
LP
663 log_debug("Time jumped backwards, rotating.");
664 rotate = true;
665 } else {
666
667 f = find_journal(s, uid);
668 if (!f)
669 return;
670
671 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
672 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
673 rotate = true;
674 }
675 }
d025f1e4 676
7c070017 677 if (rotate) {
d025f1e4 678 server_rotate(s);
3a19f215 679 server_vacuum(s, false);
d025f1e4
ZJS
680 vacuumed = true;
681
682 f = find_journal(s, uid);
683 if (!f)
684 return;
685 }
686
7c070017
LP
687 s->last_realtime_clock = ts.realtime;
688
d180c349 689 r = journal_file_append_entry(f, &ts, NULL, iovec, n, &s->seqnum, NULL, NULL);
26687bf8 690 if (r >= 0) {
d07f7b9e 691 server_schedule_sync(s, priority);
d025f1e4 692 return;
26687bf8 693 }
d025f1e4
ZJS
694
695 if (vacuumed || !shall_try_append_again(f, r)) {
da6053d0 696 log_error_errno(r, "Failed to write entry (%zu items, %zu bytes), ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
d025f1e4
ZJS
697 return;
698 }
699
700 server_rotate(s);
3a19f215 701 server_vacuum(s, false);
d025f1e4
ZJS
702
703 f = find_journal(s, uid);
704 if (!f)
705 return;
706
707 log_debug("Retrying write.");
d180c349 708 r = journal_file_append_entry(f, &ts, NULL, iovec, n, &s->seqnum, NULL, NULL);
8266e1c0 709 if (r < 0)
da6053d0 710 log_error_errno(r, "Failed to write entry (%zu items, %zu bytes) despite vacuuming, ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
8266e1c0 711 else
d07f7b9e 712 server_schedule_sync(s, priority);
d025f1e4
ZJS
713}
714
22e3a02b
LP
715#define IOVEC_ADD_NUMERIC_FIELD(iovec, n, value, type, isset, format, field) \
716 if (isset(value)) { \
717 char *k; \
fbd0b64f 718 k = newa(char, STRLEN(field "=") + DECIMAL_STR_MAX(type) + 1); \
22e3a02b 719 sprintf(k, field "=" format, value); \
e6a7ec4b 720 iovec[n++] = IOVEC_MAKE_STRING(k); \
22e3a02b 721 }
4b58153d 722
22e3a02b
LP
723#define IOVEC_ADD_STRING_FIELD(iovec, n, value, field) \
724 if (!isempty(value)) { \
725 char *k; \
726 k = strjoina(field "=", value); \
e6a7ec4b 727 iovec[n++] = IOVEC_MAKE_STRING(k); \
22e3a02b 728 }
4b58153d 729
22e3a02b
LP
730#define IOVEC_ADD_ID128_FIELD(iovec, n, value, field) \
731 if (!sd_id128_is_null(value)) { \
732 char *k; \
fbd0b64f 733 k = newa(char, STRLEN(field "=") + SD_ID128_STRING_MAX); \
22e3a02b 734 sd_id128_to_string(value, stpcpy(k, field "=")); \
e6a7ec4b 735 iovec[n++] = IOVEC_MAKE_STRING(k); \
22e3a02b 736 }
4b58153d 737
22e3a02b
LP
738#define IOVEC_ADD_SIZED_FIELD(iovec, n, value, value_size, field) \
739 if (value_size > 0) { \
740 char *k; \
fbd0b64f 741 k = newa(char, STRLEN(field "=") + value_size + 1); \
22e3a02b 742 *((char*) mempcpy(stpcpy(k, field "="), value, value_size)) = 0; \
e6a7ec4b 743 iovec[n++] = IOVEC_MAKE_STRING(k); \
22e3a02b 744 } \
4b58153d 745
d025f1e4
ZJS
746static void dispatch_message_real(
747 Server *s,
d3070fbd 748 struct iovec *iovec, size_t n, size_t m,
22e3a02b 749 const ClientContext *c,
3b3154df 750 const struct timeval *tv,
d07f7b9e 751 int priority,
22e3a02b
LP
752 pid_t object_pid) {
753
754 char source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)];
755 uid_t journal_uid;
756 ClientContext *o;
d025f1e4
ZJS
757
758 assert(s);
759 assert(iovec);
760 assert(n > 0);
d3070fbd
LP
761 assert(n +
762 N_IOVEC_META_FIELDS +
763 (pid_is_valid(object_pid) ? N_IOVEC_OBJECT_FIELDS : 0) +
764 client_context_extra_fields_n_iovec(c) <= m);
19cace37 765
22e3a02b
LP
766 if (c) {
767 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->pid, pid_t, pid_is_valid, PID_FMT, "_PID");
768 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->uid, uid_t, uid_is_valid, UID_FMT, "_UID");
769 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->gid, gid_t, gid_is_valid, GID_FMT, "_GID");
4b58153d 770
22e3a02b
LP
771 IOVEC_ADD_STRING_FIELD(iovec, n, c->comm, "_COMM");
772 IOVEC_ADD_STRING_FIELD(iovec, n, c->exe, "_EXE");
773 IOVEC_ADD_STRING_FIELD(iovec, n, c->cmdline, "_CMDLINE");
774 IOVEC_ADD_STRING_FIELD(iovec, n, c->capeff, "_CAP_EFFECTIVE");
d025f1e4 775
22e3a02b 776 IOVEC_ADD_SIZED_FIELD(iovec, n, c->label, c->label_size, "_SELINUX_CONTEXT");
ae018d9b 777
22e3a02b
LP
778 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->auditid, uint32_t, audit_session_is_valid, "%" PRIu32, "_AUDIT_SESSION");
779 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->loginuid, uid_t, uid_is_valid, UID_FMT, "_AUDIT_LOGINUID");
d025f1e4 780
22e3a02b
LP
781 IOVEC_ADD_STRING_FIELD(iovec, n, c->cgroup, "_SYSTEMD_CGROUP");
782 IOVEC_ADD_STRING_FIELD(iovec, n, c->session, "_SYSTEMD_SESSION");
783 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->owner_uid, uid_t, uid_is_valid, UID_FMT, "_SYSTEMD_OWNER_UID");
784 IOVEC_ADD_STRING_FIELD(iovec, n, c->unit, "_SYSTEMD_UNIT");
785 IOVEC_ADD_STRING_FIELD(iovec, n, c->user_unit, "_SYSTEMD_USER_UNIT");
786 IOVEC_ADD_STRING_FIELD(iovec, n, c->slice, "_SYSTEMD_SLICE");
787 IOVEC_ADD_STRING_FIELD(iovec, n, c->user_slice, "_SYSTEMD_USER_SLICE");
e7ff4e7f 788
22e3a02b 789 IOVEC_ADD_ID128_FIELD(iovec, n, c->invocation_id, "_SYSTEMD_INVOCATION_ID");
d3070fbd
LP
790
791 if (c->extra_fields_n_iovec > 0) {
792 memcpy(iovec + n, c->extra_fields_iovec, c->extra_fields_n_iovec * sizeof(struct iovec));
793 n += c->extra_fields_n_iovec;
794 }
d025f1e4 795 }
968f3196 796
22e3a02b 797 assert(n <= m);
968f3196 798
22e3a02b 799 if (pid_is_valid(object_pid) && client_context_get(s, object_pid, NULL, NULL, 0, NULL, &o) >= 0) {
968f3196 800
22e3a02b
LP
801 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->pid, pid_t, pid_is_valid, PID_FMT, "OBJECT_PID");
802 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->uid, uid_t, uid_is_valid, UID_FMT, "OBJECT_UID");
803 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->gid, gid_t, gid_is_valid, GID_FMT, "OBJECT_GID");
968f3196 804
22e3a02b
LP
805 IOVEC_ADD_STRING_FIELD(iovec, n, o->comm, "OBJECT_COMM");
806 IOVEC_ADD_STRING_FIELD(iovec, n, o->exe, "OBJECT_EXE");
807 IOVEC_ADD_STRING_FIELD(iovec, n, o->cmdline, "OBJECT_CMDLINE");
808 IOVEC_ADD_STRING_FIELD(iovec, n, o->capeff, "OBJECT_CAP_EFFECTIVE");
968f3196 809
22e3a02b 810 IOVEC_ADD_SIZED_FIELD(iovec, n, o->label, o->label_size, "OBJECT_SELINUX_CONTEXT");
19cace37 811
22e3a02b
LP
812 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->auditid, uint32_t, audit_session_is_valid, "%" PRIu32, "OBJECT_AUDIT_SESSION");
813 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->loginuid, uid_t, uid_is_valid, UID_FMT, "OBJECT_AUDIT_LOGINUID");
d473176a 814
22e3a02b
LP
815 IOVEC_ADD_STRING_FIELD(iovec, n, o->cgroup, "OBJECT_SYSTEMD_CGROUP");
816 IOVEC_ADD_STRING_FIELD(iovec, n, o->session, "OBJECT_SYSTEMD_SESSION");
817 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->owner_uid, uid_t, uid_is_valid, UID_FMT, "OBJECT_SYSTEMD_OWNER_UID");
818 IOVEC_ADD_STRING_FIELD(iovec, n, o->unit, "OBJECT_SYSTEMD_UNIT");
819 IOVEC_ADD_STRING_FIELD(iovec, n, o->user_unit, "OBJECT_SYSTEMD_USER_UNIT");
820 IOVEC_ADD_STRING_FIELD(iovec, n, o->slice, "OBJECT_SYSTEMD_SLICE");
821 IOVEC_ADD_STRING_FIELD(iovec, n, o->user_slice, "OBJECT_SYSTEMD_USER_SLICE");
d473176a 822
22e3a02b 823 IOVEC_ADD_ID128_FIELD(iovec, n, o->invocation_id, "OBJECT_SYSTEMD_INVOCATION_ID=");
968f3196 824 }
22e3a02b 825
968f3196 826 assert(n <= m);
d025f1e4
ZJS
827
828 if (tv) {
398a50cd 829 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=" USEC_FMT, timeval_load(tv));
e6a7ec4b 830 iovec[n++] = IOVEC_MAKE_STRING(source_time);
d025f1e4
ZJS
831 }
832
833 /* Note that strictly speaking storing the boot id here is
834 * redundant since the entry includes this in-line
835 * anyway. However, we need this indexed, too. */
0c24bb23 836 if (!isempty(s->boot_id_field))
e6a7ec4b 837 iovec[n++] = IOVEC_MAKE_STRING(s->boot_id_field);
d025f1e4 838
0c24bb23 839 if (!isempty(s->machine_id_field))
e6a7ec4b 840 iovec[n++] = IOVEC_MAKE_STRING(s->machine_id_field);
d025f1e4 841
0c24bb23 842 if (!isempty(s->hostname_field))
e6a7ec4b 843 iovec[n++] = IOVEC_MAKE_STRING(s->hostname_field);
d025f1e4
ZJS
844
845 assert(n <= m);
846
22e3a02b
LP
847 if (s->split_mode == SPLIT_UID && c && uid_is_valid(c->uid))
848 /* Split up strictly by (non-root) UID */
849 journal_uid = c->uid;
850 else if (s->split_mode == SPLIT_LOGIN && c && c->uid > 0 && uid_is_valid(c->owner_uid))
edc3797f
LP
851 /* Split up by login UIDs. We do this only if the
852 * realuid is not root, in order not to accidentally
853 * leak privileged information to the user that is
854 * logged by a privileged process that is part of an
7517e174 855 * unprivileged session. */
22e3a02b 856 journal_uid = c->owner_uid;
da499392
KS
857 else
858 journal_uid = 0;
759c945a 859
d07f7b9e 860 write_to_journal(s, journal_uid, iovec, n, priority);
d025f1e4
ZJS
861}
862
13181942 863void server_driver_message(Server *s, pid_t object_pid, const char *message_id, const char *format, ...) {
22e3a02b 864
d3070fbd
LP
865 struct iovec *iovec;
866 size_t n = 0, k, m;
d025f1e4 867 va_list ap;
22e3a02b 868 int r;
d025f1e4
ZJS
869
870 assert(s);
871 assert(format);
872
f643ae71 873 m = N_IOVEC_META_FIELDS + 5 + N_IOVEC_PAYLOAD_FIELDS + client_context_extra_fields_n_iovec(s->my_context) + N_IOVEC_OBJECT_FIELDS;
d3070fbd
LP
874 iovec = newa(struct iovec, m);
875
4850d39a 876 assert_cc(3 == LOG_FAC(LOG_DAEMON));
e6a7ec4b
LP
877 iovec[n++] = IOVEC_MAKE_STRING("SYSLOG_FACILITY=3");
878 iovec[n++] = IOVEC_MAKE_STRING("SYSLOG_IDENTIFIER=systemd-journald");
b6fa2555 879
e6a7ec4b 880 iovec[n++] = IOVEC_MAKE_STRING("_TRANSPORT=driver");
4850d39a 881 assert_cc(6 == LOG_INFO);
e6a7ec4b 882 iovec[n++] = IOVEC_MAKE_STRING("PRIORITY=6");
d025f1e4 883
2b044526 884 if (message_id)
e6a7ec4b 885 iovec[n++] = IOVEC_MAKE_STRING(message_id);
d3070fbd 886 k = n;
8a03c9ef
ZJS
887
888 va_start(ap, format);
d3070fbd 889 r = log_format_iovec(iovec, m, &n, false, 0, format, ap);
32917e33 890 /* Error handling below */
8a03c9ef
ZJS
891 va_end(ap);
892
32917e33 893 if (r >= 0)
d3070fbd 894 dispatch_message_real(s, iovec, n, m, s->my_context, NULL, LOG_INFO, object_pid);
8a03c9ef 895
d3070fbd
LP
896 while (k < n)
897 free(iovec[k++].iov_base);
32917e33
ZJS
898
899 if (r < 0) {
900 /* We failed to format the message. Emit a warning instead. */
901 char buf[LINE_MAX];
902
903 xsprintf(buf, "MESSAGE=Entry printing failed: %s", strerror(-r));
904
905 n = 3;
e6a7ec4b
LP
906 iovec[n++] = IOVEC_MAKE_STRING("PRIORITY=4");
907 iovec[n++] = IOVEC_MAKE_STRING(buf);
d3070fbd 908 dispatch_message_real(s, iovec, n, m, s->my_context, NULL, LOG_INFO, object_pid);
32917e33 909 }
d025f1e4
ZJS
910}
911
912void server_dispatch_message(
913 Server *s,
d3070fbd 914 struct iovec *iovec, size_t n, size_t m,
22e3a02b 915 ClientContext *c,
3b3154df 916 const struct timeval *tv,
968f3196
ZJS
917 int priority,
918 pid_t object_pid) {
d025f1e4 919
8580d1f7 920 uint64_t available = 0;
22e3a02b 921 int rl;
d025f1e4
ZJS
922
923 assert(s);
924 assert(iovec || n == 0);
925
926 if (n == 0)
927 return;
928
929 if (LOG_PRI(priority) > s->max_level_store)
930 return;
931
2f5df74a
HHPF
932 /* Stop early in case the information will not be stored
933 * in a journal. */
934 if (s->storage == STORAGE_NONE)
935 return;
936
22e3a02b
LP
937 if (c && c->unit) {
938 (void) determine_space(s, &available, NULL);
d025f1e4 939
22e3a02b
LP
940 rl = journal_rate_limit_test(s->rate_limit, c->unit, priority & LOG_PRIMASK, available);
941 if (rl == 0)
942 return;
d025f1e4 943
22e3a02b
LP
944 /* Write a suppression message if we suppressed something */
945 if (rl > 1)
13181942
LP
946 server_driver_message(s, c->pid,
947 "MESSAGE_ID=" SD_MESSAGE_JOURNAL_DROPPED_STR,
948 LOG_MESSAGE("Suppressed %i messages from %s", rl - 1, c->unit),
5908ff1c 949 "N_DROPPED=%i", rl - 1,
22e3a02b 950 NULL);
d025f1e4
ZJS
951 }
952
22e3a02b 953 dispatch_message_real(s, iovec, n, m, c, tv, priority, object_pid);
d025f1e4
ZJS
954}
955
f78273c8 956int server_flush_to_var(Server *s, bool require_flag_file) {
d025f1e4
ZJS
957 sd_id128_t machine;
958 sd_journal *j = NULL;
fbb63411
LP
959 char ts[FORMAT_TIMESPAN_MAX];
960 usec_t start;
961 unsigned n = 0;
962 int r;
d025f1e4
ZJS
963
964 assert(s);
965
f78273c8 966 if (!IN_SET(s->storage, STORAGE_AUTO, STORAGE_PERSISTENT))
d025f1e4
ZJS
967 return 0;
968
969 if (!s->runtime_journal)
970 return 0;
971
f78273c8
LP
972 if (require_flag_file && !flushed_flag_is_set())
973 return 0;
974
8580d1f7 975 (void) system_journal_open(s, true);
d025f1e4
ZJS
976
977 if (!s->system_journal)
978 return 0;
979
980 log_debug("Flushing to /var...");
981
fbb63411
LP
982 start = now(CLOCK_MONOTONIC);
983
d025f1e4 984 r = sd_id128_get_machine(&machine);
00a16861 985 if (r < 0)
d025f1e4 986 return r;
d025f1e4
ZJS
987
988 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
23bbb0de
MS
989 if (r < 0)
990 return log_error_errno(r, "Failed to read runtime journal: %m");
d025f1e4 991
93b73b06
LP
992 sd_journal_set_data_threshold(j, 0);
993
d025f1e4
ZJS
994 SD_JOURNAL_FOREACH(j) {
995 Object *o = NULL;
996 JournalFile *f;
997
998 f = j->current_file;
999 assert(f && f->current_offset > 0);
1000
fbb63411
LP
1001 n++;
1002
d025f1e4
ZJS
1003 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1004 if (r < 0) {
da927ba9 1005 log_error_errno(r, "Can't read entry: %m");
d025f1e4
ZJS
1006 goto finish;
1007 }
1008
5a271b08 1009 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset);
d025f1e4
ZJS
1010 if (r >= 0)
1011 continue;
1012
1013 if (!shall_try_append_again(s->system_journal, r)) {
da927ba9 1014 log_error_errno(r, "Can't write entry: %m");
d025f1e4
ZJS
1015 goto finish;
1016 }
1017
1018 server_rotate(s);
3a19f215 1019 server_vacuum(s, false);
d025f1e4 1020
253f59df
LP
1021 if (!s->system_journal) {
1022 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1023 r = -EIO;
1024 goto finish;
1025 }
1026
d025f1e4 1027 log_debug("Retrying write.");
5a271b08 1028 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset);
d025f1e4 1029 if (r < 0) {
da927ba9 1030 log_error_errno(r, "Can't write entry: %m");
d025f1e4
ZJS
1031 goto finish;
1032 }
1033 }
1034
804ae586
LP
1035 r = 0;
1036
d025f1e4 1037finish:
fd790d6f
RM
1038 if (s->system_journal)
1039 journal_file_post_change(s->system_journal);
d025f1e4 1040
804ae586 1041 s->runtime_journal = journal_file_close(s->runtime_journal);
d025f1e4
ZJS
1042
1043 if (r >= 0)
c6878637 1044 (void) rm_rf("/run/log/journal", REMOVE_ROOT);
d025f1e4 1045
763c7aa2 1046 sd_journal_close(j);
d025f1e4 1047
13181942 1048 server_driver_message(s, 0, NULL,
8a03c9ef
ZJS
1049 LOG_MESSAGE("Time spent on flushing to /var is %s for %u entries.",
1050 format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0),
1051 n),
1052 NULL);
fbb63411 1053
d025f1e4
ZJS
1054 return r;
1055}
1056
8531ae70 1057int server_process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
f9a810be 1058 Server *s = userdata;
a315ac4e
LP
1059 struct ucred *ucred = NULL;
1060 struct timeval *tv = NULL;
1061 struct cmsghdr *cmsg;
1062 char *label = NULL;
1063 size_t label_len = 0, m;
1064 struct iovec iovec;
1065 ssize_t n;
1066 int *fds = NULL, v = 0;
da6053d0 1067 size_t n_fds = 0;
a315ac4e
LP
1068
1069 union {
1070 struct cmsghdr cmsghdr;
1071
1072 /* We use NAME_MAX space for the SELinux label
1073 * here. The kernel currently enforces no
1074 * limit, but according to suggestions from
1075 * the SELinux people this will change and it
1076 * will probably be identical to NAME_MAX. For
1077 * now we use that, but this should be updated
1078 * one day when the final limit is known. */
1079 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1080 CMSG_SPACE(sizeof(struct timeval)) +
1081 CMSG_SPACE(sizeof(int)) + /* fd */
1082 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1083 } control = {};
1084
1085 union sockaddr_union sa = {};
1086
1087 struct msghdr msghdr = {
1088 .msg_iov = &iovec,
1089 .msg_iovlen = 1,
1090 .msg_control = &control,
1091 .msg_controllen = sizeof(control),
1092 .msg_name = &sa,
1093 .msg_namelen = sizeof(sa),
1094 };
f9a810be 1095
d025f1e4 1096 assert(s);
875c2e22 1097 assert(fd == s->native_fd || fd == s->syslog_fd || fd == s->audit_fd);
f9a810be
LP
1098
1099 if (revents != EPOLLIN) {
1100 log_error("Got invalid event from epoll for datagram fd: %"PRIx32, revents);
1101 return -EIO;
1102 }
1103
22e3a02b
LP
1104 /* Try to get the right size, if we can. (Not all sockets support SIOCINQ, hence we just try, but don't rely on
1105 * it.) */
a315ac4e 1106 (void) ioctl(fd, SIOCINQ, &v);
d025f1e4 1107
a315ac4e
LP
1108 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1109 m = PAGE_ALIGN(MAX3((size_t) v + 1,
1110 (size_t) LINE_MAX,
1111 ALIGN(sizeof(struct nlmsghdr)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH)) + 1);
d025f1e4 1112
a315ac4e
LP
1113 if (!GREEDY_REALLOC(s->buffer, s->buffer_size, m))
1114 return log_oom();
875c2e22 1115
a315ac4e
LP
1116 iovec.iov_base = s->buffer;
1117 iovec.iov_len = s->buffer_size - 1; /* Leave room for trailing NUL we add later */
d025f1e4 1118
a315ac4e
LP
1119 n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1120 if (n < 0) {
3742095b 1121 if (IN_SET(errno, EINTR, EAGAIN))
a315ac4e 1122 return 0;
875c2e22 1123
a315ac4e
LP
1124 return log_error_errno(errno, "recvmsg() failed: %m");
1125 }
875c2e22 1126
a315ac4e
LP
1127 CMSG_FOREACH(cmsg, &msghdr) {
1128
1129 if (cmsg->cmsg_level == SOL_SOCKET &&
1130 cmsg->cmsg_type == SCM_CREDENTIALS &&
1131 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1132 ucred = (struct ucred*) CMSG_DATA(cmsg);
1133 else if (cmsg->cmsg_level == SOL_SOCKET &&
1134 cmsg->cmsg_type == SCM_SECURITY) {
1135 label = (char*) CMSG_DATA(cmsg);
1136 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1137 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1138 cmsg->cmsg_type == SO_TIMESTAMP &&
1139 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1140 tv = (struct timeval*) CMSG_DATA(cmsg);
1141 else if (cmsg->cmsg_level == SOL_SOCKET &&
1142 cmsg->cmsg_type == SCM_RIGHTS) {
1143 fds = (int*) CMSG_DATA(cmsg);
1144 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
d025f1e4 1145 }
a315ac4e 1146 }
d025f1e4 1147
a315ac4e
LP
1148 /* And a trailing NUL, just in case */
1149 s->buffer[n] = 0;
1150
1151 if (fd == s->syslog_fd) {
1152 if (n > 0 && n_fds == 0)
bb3ff70a 1153 server_process_syslog_message(s, s->buffer, n, ucred, tv, label, label_len);
a315ac4e
LP
1154 else if (n_fds > 0)
1155 log_warning("Got file descriptors via syslog socket. Ignoring.");
1156
1157 } else if (fd == s->native_fd) {
1158 if (n > 0 && n_fds == 0)
1159 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1160 else if (n == 0 && n_fds == 1)
1161 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1162 else if (n_fds > 0)
1163 log_warning("Got too many file descriptors via native socket. Ignoring.");
1164
1165 } else {
1166 assert(fd == s->audit_fd);
1167
1168 if (n > 0 && n_fds == 0)
1169 server_process_audit_message(s, s->buffer, n, ucred, &sa, msghdr.msg_namelen);
1170 else if (n_fds > 0)
1171 log_warning("Got file descriptors via audit socket. Ignoring.");
f9a810be 1172 }
a315ac4e
LP
1173
1174 close_many(fds, n_fds);
1175 return 0;
f9a810be 1176}
d025f1e4 1177
f9a810be
LP
1178static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1179 Server *s = userdata;
33d52ab9 1180 int r;
d025f1e4 1181
f9a810be 1182 assert(s);
d025f1e4 1183
94b65516 1184 log_info("Received request to flush runtime journal from PID " PID_FMT, si->ssi_pid);
d025f1e4 1185
f78273c8 1186 (void) server_flush_to_var(s, false);
f9a810be 1187 server_sync(s);
3a19f215 1188 server_vacuum(s, false);
d025f1e4 1189
33d52ab9
LP
1190 r = touch("/run/systemd/journal/flushed");
1191 if (r < 0)
1192 log_warning_errno(r, "Failed to touch /run/systemd/journal/flushed, ignoring: %m");
74055aa7 1193
18e758bf 1194 server_space_usage_message(s, NULL);
f9a810be
LP
1195 return 0;
1196}
d025f1e4 1197
f9a810be
LP
1198static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1199 Server *s = userdata;
33d52ab9 1200 int r;
d025f1e4 1201
f9a810be 1202 assert(s);
d025f1e4 1203
94b65516 1204 log_info("Received request to rotate journal from PID " PID_FMT, si->ssi_pid);
f9a810be 1205 server_rotate(s);
3a19f215
FB
1206 server_vacuum(s, true);
1207
1208 if (s->system_journal)
1209 patch_min_use(&s->system_storage);
1210 if (s->runtime_journal)
1211 patch_min_use(&s->runtime_storage);
d025f1e4 1212
dbd6e31c 1213 /* Let clients know when the most recent rotation happened. */
33d52ab9
LP
1214 r = write_timestamp_file_atomic("/run/systemd/journal/rotated", now(CLOCK_MONOTONIC));
1215 if (r < 0)
1216 log_warning_errno(r, "Failed to write /run/systemd/journal/rotated, ignoring: %m");
dbd6e31c 1217
f9a810be
LP
1218 return 0;
1219}
d025f1e4 1220
f9a810be
LP
1221static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1222 Server *s = userdata;
d025f1e4 1223
f9a810be 1224 assert(s);
d025f1e4 1225
4daf54a8 1226 log_received_signal(LOG_INFO, si);
d025f1e4 1227
6203e07a 1228 sd_event_exit(s->event, 0);
d025f1e4
ZJS
1229 return 0;
1230}
1231
94b65516
LP
1232static int dispatch_sigrtmin1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1233 Server *s = userdata;
33d52ab9 1234 int r;
94b65516
LP
1235
1236 assert(s);
1237
1238 log_debug("Received request to sync from PID " PID_FMT, si->ssi_pid);
1239
1240 server_sync(s);
1241
1242 /* Let clients know when the most recent sync happened. */
33d52ab9
LP
1243 r = write_timestamp_file_atomic("/run/systemd/journal/synced", now(CLOCK_MONOTONIC));
1244 if (r < 0)
1245 log_warning_errno(r, "Failed to write /run/systemd/journal/synced, ignoring: %m");
94b65516
LP
1246
1247 return 0;
1248}
1249
f9a810be 1250static int setup_signals(Server *s) {
f9a810be 1251 int r;
d025f1e4
ZJS
1252
1253 assert(s);
1254
9bab3b65 1255 assert_se(sigprocmask_many(SIG_SETMASK, NULL, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, SIGRTMIN+1, -1) >= 0);
d025f1e4 1256
151b9b96 1257 r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
f9a810be
LP
1258 if (r < 0)
1259 return r;
1260
151b9b96 1261 r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s);
f9a810be
LP
1262 if (r < 0)
1263 return r;
d025f1e4 1264
151b9b96 1265 r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s);
f9a810be
LP
1266 if (r < 0)
1267 return r;
d025f1e4 1268
b374689c
LP
1269 /* Let's process SIGTERM late, so that we flush all queued
1270 * messages to disk before we exit */
1271 r = sd_event_source_set_priority(s->sigterm_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1272 if (r < 0)
1273 return r;
1274
1275 /* When journald is invoked on the terminal (when debugging),
1276 * it's useful if C-c is handled equivalent to SIGTERM. */
151b9b96 1277 r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s);
f9a810be
LP
1278 if (r < 0)
1279 return r;
d025f1e4 1280
b374689c
LP
1281 r = sd_event_source_set_priority(s->sigint_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1282 if (r < 0)
1283 return r;
1284
94b65516
LP
1285 /* SIGRTMIN+1 causes an immediate sync. We process this very
1286 * late, so that everything else queued at this point is
1287 * really written to disk. Clients can watch
1288 * /run/systemd/journal/synced with inotify until its mtime
1289 * changes to see when a sync happened. */
1290 r = sd_event_add_signal(s->event, &s->sigrtmin1_event_source, SIGRTMIN+1, dispatch_sigrtmin1, s);
1291 if (r < 0)
1292 return r;
1293
1294 r = sd_event_source_set_priority(s->sigrtmin1_event_source, SD_EVENT_PRIORITY_NORMAL+15);
1295 if (r < 0)
1296 return r;
1297
d025f1e4
ZJS
1298 return 0;
1299}
1300
5707ecf3
ZJS
1301static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
1302 Server *s = data;
74df0fca 1303 int r;
d025f1e4 1304
5707ecf3 1305 assert(s);
d025f1e4 1306
1d84ad94
LP
1307 if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_syslog")) {
1308
5707ecf3 1309 r = value ? parse_boolean(value) : true;
d581d9d9 1310 if (r < 0)
5707ecf3
ZJS
1311 log_warning("Failed to parse forward to syslog switch \"%s\". Ignoring.", value);
1312 else
1313 s->forward_to_syslog = r;
1d84ad94
LP
1314
1315 } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_kmsg")) {
1316
5707ecf3
ZJS
1317 r = value ? parse_boolean(value) : true;
1318 if (r < 0)
1319 log_warning("Failed to parse forward to kmsg switch \"%s\". Ignoring.", value);
1320 else
1321 s->forward_to_kmsg = r;
1d84ad94
LP
1322
1323 } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_console")) {
1324
5707ecf3
ZJS
1325 r = value ? parse_boolean(value) : true;
1326 if (r < 0)
1327 log_warning("Failed to parse forward to console switch \"%s\". Ignoring.", value);
1328 else
1329 s->forward_to_console = r;
1d84ad94
LP
1330
1331 } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_wall")) {
1332
5707ecf3
ZJS
1333 r = value ? parse_boolean(value) : true;
1334 if (r < 0)
1335 log_warning("Failed to parse forward to wall switch \"%s\". Ignoring.", value);
1336 else
1337 s->forward_to_wall = r;
1d84ad94
LP
1338
1339 } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_console")) {
1340
1341 if (proc_cmdline_value_missing(key, value))
1342 return 0;
1343
5707ecf3
ZJS
1344 r = log_level_from_string(value);
1345 if (r < 0)
1346 log_warning("Failed to parse max level console value \"%s\". Ignoring.", value);
1347 else
1348 s->max_level_console = r;
1d84ad94
LP
1349
1350 } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_store")) {
1351
1352 if (proc_cmdline_value_missing(key, value))
1353 return 0;
1354
5707ecf3
ZJS
1355 r = log_level_from_string(value);
1356 if (r < 0)
1357 log_warning("Failed to parse max level store value \"%s\". Ignoring.", value);
1358 else
1359 s->max_level_store = r;
1d84ad94
LP
1360
1361 } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_syslog")) {
1362
1363 if (proc_cmdline_value_missing(key, value))
1364 return 0;
1365
5707ecf3
ZJS
1366 r = log_level_from_string(value);
1367 if (r < 0)
1368 log_warning("Failed to parse max level syslog value \"%s\". Ignoring.", value);
1369 else
1370 s->max_level_syslog = r;
1d84ad94
LP
1371
1372 } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_kmsg")) {
1373
1374 if (proc_cmdline_value_missing(key, value))
1375 return 0;
1376
5707ecf3
ZJS
1377 r = log_level_from_string(value);
1378 if (r < 0)
1379 log_warning("Failed to parse max level kmsg value \"%s\". Ignoring.", value);
1380 else
1381 s->max_level_kmsg = r;
1d84ad94
LP
1382
1383 } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_wall")) {
1384
1385 if (proc_cmdline_value_missing(key, value))
1386 return 0;
1387
5707ecf3
ZJS
1388 r = log_level_from_string(value);
1389 if (r < 0)
1390 log_warning("Failed to parse max level wall value \"%s\". Ignoring.", value);
1391 else
1392 s->max_level_wall = r;
1d84ad94 1393
5707ecf3
ZJS
1394 } else if (startswith(key, "systemd.journald"))
1395 log_warning("Unknown journald kernel command line option \"%s\". Ignoring.", key);
d025f1e4 1396
804ae586 1397 /* do not warn about state here, since probably systemd already did */
db91ea32 1398 return 0;
d025f1e4
ZJS
1399}
1400
1401static int server_parse_config_file(Server *s) {
d025f1e4
ZJS
1402 assert(s);
1403
43688c49 1404 return config_parse_many_nulstr(PKGSYSCONFDIR "/journald.conf",
da412854
YW
1405 CONF_PATHS_NULSTR("systemd/journald.conf.d"),
1406 "Journal\0",
1407 config_item_perf_lookup, journald_gperf_lookup,
bcde742e 1408 CONFIG_PARSE_WARN, s);
d025f1e4
ZJS
1409}
1410
f9a810be
LP
1411static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
1412 Server *s = userdata;
26687bf8
OS
1413
1414 assert(s);
1415
f9a810be 1416 server_sync(s);
26687bf8
OS
1417 return 0;
1418}
1419
d07f7b9e 1420int server_schedule_sync(Server *s, int priority) {
26687bf8
OS
1421 int r;
1422
26687bf8
OS
1423 assert(s);
1424
d07f7b9e
LP
1425 if (priority <= LOG_CRIT) {
1426 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1427 server_sync(s);
1428 return 0;
1429 }
1430
26687bf8
OS
1431 if (s->sync_scheduled)
1432 return 0;
1433
f9a810be
LP
1434 if (s->sync_interval_usec > 0) {
1435 usec_t when;
ca267016 1436
6a0f1f6d 1437 r = sd_event_now(s->event, CLOCK_MONOTONIC, &when);
f9a810be
LP
1438 if (r < 0)
1439 return r;
26687bf8 1440
f9a810be
LP
1441 when += s->sync_interval_usec;
1442
1443 if (!s->sync_event_source) {
6a0f1f6d
LP
1444 r = sd_event_add_time(
1445 s->event,
1446 &s->sync_event_source,
1447 CLOCK_MONOTONIC,
1448 when, 0,
1449 server_dispatch_sync, s);
f9a810be
LP
1450 if (r < 0)
1451 return r;
1452
1453 r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
1454 } else {
1455 r = sd_event_source_set_time(s->sync_event_source, when);
1456 if (r < 0)
1457 return r;
1458
1459 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
1460 }
26687bf8 1461 if (r < 0)
f9a810be 1462 return r;
26687bf8 1463
f9a810be
LP
1464 s->sync_scheduled = true;
1465 }
26687bf8
OS
1466
1467 return 0;
1468}
1469
0c24bb23
LP
1470static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1471 Server *s = userdata;
1472
1473 assert(s);
1474
1475 server_cache_hostname(s);
1476 return 0;
1477}
1478
1479static int server_open_hostname(Server *s) {
1480 int r;
1481
1482 assert(s);
1483
db4a47e9
LP
1484 s->hostname_fd = open("/proc/sys/kernel/hostname",
1485 O_RDONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
4a62c710
MS
1486 if (s->hostname_fd < 0)
1487 return log_error_errno(errno, "Failed to open /proc/sys/kernel/hostname: %m");
0c24bb23 1488
151b9b96 1489 r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s);
0c24bb23 1490 if (r < 0) {
28def94c
DR
1491 /* kernels prior to 3.2 don't support polling this file. Ignore
1492 * the failure. */
1493 if (r == -EPERM) {
e53fc357 1494 log_warning_errno(r, "Failed to register hostname fd in event loop, ignoring: %m");
03e334a1 1495 s->hostname_fd = safe_close(s->hostname_fd);
28def94c
DR
1496 return 0;
1497 }
1498
23bbb0de 1499 return log_error_errno(r, "Failed to register hostname fd in event loop: %m");
0c24bb23
LP
1500 }
1501
1502 r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
23bbb0de
MS
1503 if (r < 0)
1504 return log_error_errno(r, "Failed to adjust priority of host name event source: %m");
0c24bb23
LP
1505
1506 return 0;
1507}
1508
e22aa3d3
LP
1509static int dispatch_notify_event(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1510 Server *s = userdata;
1511 int r;
1512
1513 assert(s);
1514 assert(s->notify_event_source == es);
1515 assert(s->notify_fd == fd);
1516
e22aa3d3 1517 /* The $NOTIFY_SOCKET is writable again, now send exactly one
dd835265 1518 * message on it. Either it's the watchdog event, the initial
119e9655
LP
1519 * READY=1 event or an stdout stream event. If there's nothing
1520 * to write anymore, turn our event source off. The next time
1521 * there's something to send it will be turned on again. */
e22aa3d3
LP
1522
1523 if (!s->sent_notify_ready) {
1524 static const char p[] =
1525 "READY=1\n"
1526 "STATUS=Processing requests...";
1527 ssize_t l;
1528
1529 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1530 if (l < 0) {
1531 if (errno == EAGAIN)
1532 return 0;
1533
1534 return log_error_errno(errno, "Failed to send READY=1 notification message: %m");
1535 }
1536
1537 s->sent_notify_ready = true;
1538 log_debug("Sent READY=1 notification.");
1539
119e9655
LP
1540 } else if (s->send_watchdog) {
1541
1542 static const char p[] =
1543 "WATCHDOG=1";
1544
1545 ssize_t l;
1546
1547 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1548 if (l < 0) {
1549 if (errno == EAGAIN)
1550 return 0;
1551
1552 return log_error_errno(errno, "Failed to send WATCHDOG=1 notification message: %m");
1553 }
1554
1555 s->send_watchdog = false;
1556 log_debug("Sent WATCHDOG=1 notification.");
1557
e22aa3d3
LP
1558 } else if (s->stdout_streams_notify_queue)
1559 /* Dispatch one stream notification event */
1560 stdout_stream_send_notify(s->stdout_streams_notify_queue);
1561
61233823 1562 /* Leave us enabled if there's still more to do. */
119e9655 1563 if (s->send_watchdog || s->stdout_streams_notify_queue)
e22aa3d3
LP
1564 return 0;
1565
1566 /* There was nothing to do anymore, let's turn ourselves off. */
1567 r = sd_event_source_set_enabled(es, SD_EVENT_OFF);
1568 if (r < 0)
1569 return log_error_errno(r, "Failed to turn off notify event source: %m");
1570
1571 return 0;
1572}
1573
119e9655
LP
1574static int dispatch_watchdog(sd_event_source *es, uint64_t usec, void *userdata) {
1575 Server *s = userdata;
1576 int r;
1577
1578 assert(s);
1579
1580 s->send_watchdog = true;
1581
1582 r = sd_event_source_set_enabled(s->notify_event_source, SD_EVENT_ON);
1583 if (r < 0)
1584 log_warning_errno(r, "Failed to turn on notify event source: %m");
1585
1586 r = sd_event_source_set_time(s->watchdog_event_source, usec + s->watchdog_usec / 2);
1587 if (r < 0)
1588 return log_error_errno(r, "Failed to restart watchdog event source: %m");
1589
1590 r = sd_event_source_set_enabled(s->watchdog_event_source, SD_EVENT_ON);
1591 if (r < 0)
1592 return log_error_errno(r, "Failed to enable watchdog event source: %m");
1593
1594 return 0;
1595}
1596
e22aa3d3
LP
1597static int server_connect_notify(Server *s) {
1598 union sockaddr_union sa = {
1599 .un.sun_family = AF_UNIX,
1600 };
1601 const char *e;
1602 int r;
1603
1604 assert(s);
1605 assert(s->notify_fd < 0);
1606 assert(!s->notify_event_source);
1607
1608 /*
1609 So here's the problem: we'd like to send notification
1610 messages to PID 1, but we cannot do that via sd_notify(),
1611 since that's synchronous, and we might end up blocking on
1612 it. Specifically: given that PID 1 might block on
1613 dbus-daemon during IPC, and dbus-daemon is logging to us,
1614 and might hence block on us, we might end up in a deadlock
ccddd104 1615 if we block on sending PID 1 notification messages — by
e22aa3d3
LP
1616 generating a full blocking circle. To avoid this, let's
1617 create a non-blocking socket, and connect it to the
1618 notification socket, and then wait for POLLOUT before we
1619 send anything. This should efficiently avoid any deadlocks,
1620 as we'll never block on PID 1, hence PID 1 can safely block
1621 on dbus-daemon which can safely block on us again.
1622
1623 Don't think that this issue is real? It is, see:
1624 https://github.com/systemd/systemd/issues/1505
1625 */
1626
1627 e = getenv("NOTIFY_SOCKET");
1628 if (!e)
1629 return 0;
1630
4c701096 1631 if (!IN_SET(e[0], '@', '/') || e[1] == 0) {
e22aa3d3
LP
1632 log_error("NOTIFY_SOCKET set to an invalid value: %s", e);
1633 return -EINVAL;
1634 }
1635
1636 if (strlen(e) > sizeof(sa.un.sun_path)) {
1637 log_error("NOTIFY_SOCKET path too long: %s", e);
1638 return -EINVAL;
1639 }
1640
1641 s->notify_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
1642 if (s->notify_fd < 0)
1643 return log_error_errno(errno, "Failed to create notify socket: %m");
1644
1645 (void) fd_inc_sndbuf(s->notify_fd, NOTIFY_SNDBUF_SIZE);
1646
1647 strncpy(sa.un.sun_path, e, sizeof(sa.un.sun_path));
1648 if (sa.un.sun_path[0] == '@')
1649 sa.un.sun_path[0] = 0;
1650
fc2fffe7 1651 r = connect(s->notify_fd, &sa.sa, SOCKADDR_UN_LEN(sa.un));
e22aa3d3
LP
1652 if (r < 0)
1653 return log_error_errno(errno, "Failed to connect to notify socket: %m");
1654
1655 r = sd_event_add_io(s->event, &s->notify_event_source, s->notify_fd, EPOLLOUT, dispatch_notify_event, s);
1656 if (r < 0)
1657 return log_error_errno(r, "Failed to watch notification socket: %m");
1658
119e9655
LP
1659 if (sd_watchdog_enabled(false, &s->watchdog_usec) > 0) {
1660 s->send_watchdog = true;
1661
4de2402b 1662 r = sd_event_add_time(s->event, &s->watchdog_event_source, CLOCK_MONOTONIC, now(CLOCK_MONOTONIC) + s->watchdog_usec/2, s->watchdog_usec/4, dispatch_watchdog, s);
119e9655
LP
1663 if (r < 0)
1664 return log_error_errno(r, "Failed to add watchdog time event: %m");
1665 }
1666
e22aa3d3
LP
1667 /* This should fire pretty soon, which we'll use to send the
1668 * READY=1 event. */
1669
1670 return 0;
1671}
1672
d025f1e4 1673int server_init(Server *s) {
13790add 1674 _cleanup_fdset_free_ FDSet *fds = NULL;
d025f1e4 1675 int n, r, fd;
7d18d348 1676 bool no_sockets;
d025f1e4
ZJS
1677
1678 assert(s);
1679
1680 zero(*s);
e22aa3d3 1681 s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->audit_fd = s->hostname_fd = s->notify_fd = -1;
1b7cf0e5
AG
1682 s->compress.enabled = true;
1683 s->compress.threshold_bytes = (uint64_t) -1;
d025f1e4 1684 s->seal = true;
b2392ff3 1685 s->read_kmsg = true;
d025f1e4 1686
119e9655
LP
1687 s->watchdog_usec = USEC_INFINITY;
1688
26687bf8
OS
1689 s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1690 s->sync_scheduled = false;
1691
d025f1e4
ZJS
1692 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1693 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1694
40b71e89 1695 s->forward_to_wall = true;
d025f1e4 1696
e150e820
MB
1697 s->max_file_usec = DEFAULT_MAX_FILE_USEC;
1698
d025f1e4
ZJS
1699 s->max_level_store = LOG_DEBUG;
1700 s->max_level_syslog = LOG_DEBUG;
1701 s->max_level_kmsg = LOG_NOTICE;
1702 s->max_level_console = LOG_INFO;
40b71e89 1703 s->max_level_wall = LOG_EMERG;
d025f1e4 1704
ec20fe5f
LP
1705 s->line_max = DEFAULT_LINE_MAX;
1706
266a4700
FB
1707 journal_reset_metrics(&s->system_storage.metrics);
1708 journal_reset_metrics(&s->runtime_storage.metrics);
d025f1e4
ZJS
1709
1710 server_parse_config_file(s);
1d84ad94
LP
1711
1712 r = proc_cmdline_parse(parse_proc_cmdline_item, s, PROC_CMDLINE_STRIP_RD_PREFIX);
1713 if (r < 0)
1714 log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
8580d1f7 1715
d288f79f 1716 if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
b1389b0d
ZJS
1717 log_debug("Setting both rate limit interval and burst from "USEC_FMT",%u to 0,0",
1718 s->rate_limit_interval, s->rate_limit_burst);
d288f79f
ZJS
1719 s->rate_limit_interval = s->rate_limit_burst = 0;
1720 }
d025f1e4 1721
8580d1f7 1722 (void) mkdir_p("/run/systemd/journal", 0755);
d025f1e4 1723
43cf8388 1724 s->user_journals = ordered_hashmap_new(NULL);
d025f1e4
ZJS
1725 if (!s->user_journals)
1726 return log_oom();
1727
1728 s->mmap = mmap_cache_new();
1729 if (!s->mmap)
1730 return log_oom();
1731
b58c888f
VC
1732 s->deferred_closes = set_new(NULL);
1733 if (!s->deferred_closes)
1734 return log_oom();
1735
f9a810be 1736 r = sd_event_default(&s->event);
23bbb0de
MS
1737 if (r < 0)
1738 return log_error_errno(r, "Failed to create event loop: %m");
d025f1e4
ZJS
1739
1740 n = sd_listen_fds(true);
23bbb0de
MS
1741 if (n < 0)
1742 return log_error_errno(n, "Failed to read listening file descriptors from environment: %m");
d025f1e4
ZJS
1743
1744 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1745
1746 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1747
1748 if (s->native_fd >= 0) {
1749 log_error("Too many native sockets passed.");
1750 return -EINVAL;
1751 }
1752
1753 s->native_fd = fd;
1754
1755 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1756
1757 if (s->stdout_fd >= 0) {
1758 log_error("Too many stdout sockets passed.");
1759 return -EINVAL;
1760 }
1761
1762 s->stdout_fd = fd;
1763
03ee5c38
LP
1764 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0 ||
1765 sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/dev-log", 0) > 0) {
d025f1e4
ZJS
1766
1767 if (s->syslog_fd >= 0) {
1768 log_error("Too many /dev/log sockets passed.");
1769 return -EINVAL;
1770 }
1771
1772 s->syslog_fd = fd;
1773
875c2e22
LP
1774 } else if (sd_is_socket(fd, AF_NETLINK, SOCK_RAW, -1) > 0) {
1775
1776 if (s->audit_fd >= 0) {
1777 log_error("Too many audit sockets passed.");
1778 return -EINVAL;
1779 }
1780
1781 s->audit_fd = fd;
1782
4ec3cd73 1783 } else {
4ec3cd73 1784
13790add
LP
1785 if (!fds) {
1786 fds = fdset_new();
1787 if (!fds)
1788 return log_oom();
1789 }
4ec3cd73 1790
13790add
LP
1791 r = fdset_put(fds, fd);
1792 if (r < 0)
1793 return log_oom();
4ec3cd73 1794 }
d025f1e4
ZJS
1795 }
1796
15d91bff
ZJS
1797 /* Try to restore streams, but don't bother if this fails */
1798 (void) server_restore_streams(s, fds);
d025f1e4 1799
13790add
LP
1800 if (fdset_size(fds) > 0) {
1801 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds));
1802 fds = fdset_free(fds);
1803 }
1804
7d18d348
ZJS
1805 no_sockets = s->native_fd < 0 && s->stdout_fd < 0 && s->syslog_fd < 0 && s->audit_fd < 0;
1806
1807 /* always open stdout, syslog, native, and kmsg sockets */
37b7affe
ZJS
1808
1809 /* systemd-journald.socket: /run/systemd/journal/stdout */
15d91bff
ZJS
1810 r = server_open_stdout_socket(s);
1811 if (r < 0)
1812 return r;
1813
37b7affe 1814 /* systemd-journald-dev-log.socket: /run/systemd/journal/dev-log */
13790add 1815 r = server_open_syslog_socket(s);
d025f1e4
ZJS
1816 if (r < 0)
1817 return r;
1818
37b7affe 1819 /* systemd-journald.socket: /run/systemd/journal/socket */
13790add 1820 r = server_open_native_socket(s);
d025f1e4
ZJS
1821 if (r < 0)
1822 return r;
1823
b2392ff3 1824 /* /dev/kmsg */
d025f1e4
ZJS
1825 r = server_open_dev_kmsg(s);
1826 if (r < 0)
1827 return r;
1828
7d18d348
ZJS
1829 /* Unless we got *some* sockets and not audit, open audit socket */
1830 if (s->audit_fd >= 0 || no_sockets) {
1831 r = server_open_audit(s);
1832 if (r < 0)
1833 return r;
1834 }
875c2e22 1835
d025f1e4
ZJS
1836 r = server_open_kernel_seqnum(s);
1837 if (r < 0)
1838 return r;
1839
0c24bb23
LP
1840 r = server_open_hostname(s);
1841 if (r < 0)
1842 return r;
1843
f9a810be 1844 r = setup_signals(s);
d025f1e4
ZJS
1845 if (r < 0)
1846 return r;
1847
f9a810be 1848 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
d025f1e4
ZJS
1849 if (!s->rate_limit)
1850 return -ENOMEM;
1851
e9174f29
LP
1852 r = cg_get_root_path(&s->cgroup_root);
1853 if (r < 0)
1854 return r;
1855
0c24bb23
LP
1856 server_cache_hostname(s);
1857 server_cache_boot_id(s);
1858 server_cache_machine_id(s);
1859
266a4700
FB
1860 s->runtime_storage.name = "Runtime journal";
1861 s->system_storage.name = "System journal";
1862
605405c6
ZJS
1863 s->runtime_storage.path = strjoin("/run/log/journal/", SERVER_MACHINE_ID(s));
1864 s->system_storage.path = strjoin("/var/log/journal/", SERVER_MACHINE_ID(s));
266a4700
FB
1865 if (!s->runtime_storage.path || !s->system_storage.path)
1866 return -ENOMEM;
1867
e22aa3d3
LP
1868 (void) server_connect_notify(s);
1869
22e3a02b
LP
1870 (void) client_context_acquire_default(s);
1871
804ae586 1872 return system_journal_open(s, false);
d025f1e4
ZJS
1873}
1874
1875void server_maybe_append_tags(Server *s) {
349cc4a5 1876#if HAVE_GCRYPT
d025f1e4
ZJS
1877 JournalFile *f;
1878 Iterator i;
1879 usec_t n;
1880
1881 n = now(CLOCK_REALTIME);
1882
1883 if (s->system_journal)
1884 journal_file_maybe_append_tag(s->system_journal, n);
1885
43cf8388 1886 ORDERED_HASHMAP_FOREACH(f, s->user_journals, i)
d025f1e4
ZJS
1887 journal_file_maybe_append_tag(f, n);
1888#endif
1889}
1890
1891void server_done(Server *s) {
d025f1e4
ZJS
1892 assert(s);
1893
f9168190 1894 set_free_with_destructor(s->deferred_closes, journal_file_close);
b58c888f 1895
d025f1e4
ZJS
1896 while (s->stdout_streams)
1897 stdout_stream_free(s->stdout_streams);
1898
22e3a02b
LP
1899 client_context_flush_all(s);
1900
d025f1e4 1901 if (s->system_journal)
69a3a6fd 1902 (void) journal_file_close(s->system_journal);
d025f1e4
ZJS
1903
1904 if (s->runtime_journal)
69a3a6fd 1905 (void) journal_file_close(s->runtime_journal);
d025f1e4 1906
f9168190 1907 ordered_hashmap_free_with_destructor(s->user_journals, journal_file_close);
d025f1e4 1908
f9a810be
LP
1909 sd_event_source_unref(s->syslog_event_source);
1910 sd_event_source_unref(s->native_event_source);
1911 sd_event_source_unref(s->stdout_event_source);
1912 sd_event_source_unref(s->dev_kmsg_event_source);
875c2e22 1913 sd_event_source_unref(s->audit_event_source);
f9a810be
LP
1914 sd_event_source_unref(s->sync_event_source);
1915 sd_event_source_unref(s->sigusr1_event_source);
1916 sd_event_source_unref(s->sigusr2_event_source);
1917 sd_event_source_unref(s->sigterm_event_source);
1918 sd_event_source_unref(s->sigint_event_source);
94b65516 1919 sd_event_source_unref(s->sigrtmin1_event_source);
0c24bb23 1920 sd_event_source_unref(s->hostname_event_source);
e22aa3d3 1921 sd_event_source_unref(s->notify_event_source);
119e9655 1922 sd_event_source_unref(s->watchdog_event_source);
f9a810be 1923 sd_event_unref(s->event);
d025f1e4 1924
03e334a1
LP
1925 safe_close(s->syslog_fd);
1926 safe_close(s->native_fd);
1927 safe_close(s->stdout_fd);
1928 safe_close(s->dev_kmsg_fd);
875c2e22 1929 safe_close(s->audit_fd);
03e334a1 1930 safe_close(s->hostname_fd);
e22aa3d3 1931 safe_close(s->notify_fd);
0c24bb23 1932
d025f1e4
ZJS
1933 if (s->rate_limit)
1934 journal_rate_limit_free(s->rate_limit);
1935
1936 if (s->kernel_seqnum)
1937 munmap(s->kernel_seqnum, sizeof(uint64_t));
1938
1939 free(s->buffer);
1940 free(s->tty_path);
e9174f29 1941 free(s->cgroup_root);
99d0966e 1942 free(s->hostname_field);
c6e9e16f
ZJS
1943 free(s->runtime_storage.path);
1944 free(s->system_storage.path);
d025f1e4
ZJS
1945
1946 if (s->mmap)
1947 mmap_cache_unref(s->mmap);
d025f1e4 1948}
8580d1f7
LP
1949
1950static const char* const storage_table[_STORAGE_MAX] = {
1951 [STORAGE_AUTO] = "auto",
1952 [STORAGE_VOLATILE] = "volatile",
1953 [STORAGE_PERSISTENT] = "persistent",
1954 [STORAGE_NONE] = "none"
1955};
1956
1957DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
1958DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
1959
1960static const char* const split_mode_table[_SPLIT_MAX] = {
1961 [SPLIT_LOGIN] = "login",
1962 [SPLIT_UID] = "uid",
1963 [SPLIT_NONE] = "none",
1964};
1965
1966DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
1967DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
ec20fe5f
LP
1968
1969int config_parse_line_max(
1970 const char* unit,
1971 const char *filename,
1972 unsigned line,
1973 const char *section,
1974 unsigned section_line,
1975 const char *lvalue,
1976 int ltype,
1977 const char *rvalue,
1978 void *data,
1979 void *userdata) {
1980
1981 size_t *sz = data;
1982 int r;
1983
1984 assert(filename);
1985 assert(lvalue);
1986 assert(rvalue);
1987 assert(data);
1988
1989 if (isempty(rvalue))
1990 /* Empty assignment means default */
1991 *sz = DEFAULT_LINE_MAX;
1992 else {
1993 uint64_t v;
1994
1995 r = parse_size(rvalue, 1024, &v);
1996 if (r < 0) {
1997 log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse LineMax= value, ignoring: %s", rvalue);
1998 return 0;
1999 }
2000
2001 if (v < 79) {
2002 /* Why specify 79 here as minimum line length? Simply, because the most common traditional
2003 * terminal size is 80ch, and it might make sense to break one character before the natural
2004 * line break would occur on that. */
2005 log_syntax(unit, LOG_WARNING, filename, line, 0, "LineMax= too small, clamping to 79: %s", rvalue);
2006 *sz = 79;
2007 } else if (v > (uint64_t) (SSIZE_MAX-1)) {
2008 /* So, why specify SSIZE_MAX-1 here? Because that's one below the largest size value read()
2009 * can return, and we need one extra byte for the trailing NUL byte. Of course IRL such large
2010 * memory allocations will fail anyway, hence this limit is mostly theoretical anyway, as we'll
2011 * fail much earlier anyway. */
2012 log_syntax(unit, LOG_WARNING, filename, line, 0, "LineMax= too large, clamping to %" PRIu64 ": %s", (uint64_t) (SSIZE_MAX-1), rvalue);
2013 *sz = SSIZE_MAX-1;
2014 } else
2015 *sz = (size_t) v;
2016 }
2017
2018 return 0;
2019}
1b7cf0e5
AG
2020
2021int config_parse_compress(const char* unit,
2022 const char *filename,
2023 unsigned line,
2024 const char *section,
2025 unsigned section_line,
2026 const char *lvalue,
2027 int ltype,
2028 const char *rvalue,
2029 void *data,
2030 void *userdata) {
2031 JournalCompressOptions* compress = data;
2032 int r;
2033
2034 if (streq(rvalue, "1")) {
2035 log_syntax(unit, LOG_WARNING, filename, line, 0,
2036 "Compress= ambiguously specified as 1, enabling compression with default threshold");
2037 compress->enabled = true;
2038 } else if (streq(rvalue, "0")) {
2039 log_syntax(unit, LOG_WARNING, filename, line, 0,
2040 "Compress= ambiguously specified as 0, disabling compression");
2041 compress->enabled = false;
2042 } else if ((r = parse_boolean(rvalue)) >= 0)
2043 compress->enabled = r;
2044 else if (parse_size(rvalue, 1024, &compress->threshold_bytes) == 0)
2045 compress->enabled = true;
2046 else if (isempty(rvalue)) {
2047 compress->enabled = true;
2048 compress->threshold_bytes = (uint64_t) -1;
2049 } else
2050 log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse Compress= value, ignoring: %s", rvalue);
2051
2052 return 0;
2053}