]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/journal/journald-server.c
Merge pull request #7983 from poettering/tmpfiles-eexist
[thirdparty/systemd.git] / src / journal / journald-server.c
CommitLineData
53e1b683 1/* SPDX-License-Identifier: LGPL-2.1+ */
d025f1e4
ZJS
2/***
3 This file is part of systemd.
4
5 Copyright 2011 Lennart Poettering
6
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
11
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
19***/
20
349cc4a5 21#if HAVE_SELINUX
24882e06
LP
22#include <selinux/selinux.h>
23#endif
8580d1f7
LP
24#include <sys/ioctl.h>
25#include <sys/mman.h>
26#include <sys/signalfd.h>
27#include <sys/statvfs.h>
07630cea 28#include <linux/sockios.h>
24882e06 29
b4bbcaa9 30#include "libudev.h"
8580d1f7 31#include "sd-daemon.h"
74df0fca
LP
32#include "sd-journal.h"
33#include "sd-messages.h"
8580d1f7
LP
34
35#include "acl-util.h"
b5efdb8a 36#include "alloc-util.h"
430f0182 37#include "audit-util.h"
d025f1e4 38#include "cgroup-util.h"
d025f1e4 39#include "conf-parser.h"
a0956174 40#include "dirent-util.h"
0dec689b 41#include "extract-word.h"
3ffd4af2 42#include "fd-util.h"
33d52ab9 43#include "fileio.h"
f97b34a6 44#include "format-util.h"
f4f15635 45#include "fs-util.h"
8580d1f7 46#include "hashmap.h"
958b66ea 47#include "hostname-util.h"
4b58153d 48#include "id128-util.h"
afc5dbf3 49#include "io-util.h"
8580d1f7
LP
50#include "journal-authenticate.h"
51#include "journal-file.h"
d025f1e4
ZJS
52#include "journal-internal.h"
53#include "journal-vacuum.h"
8580d1f7 54#include "journald-audit.h"
22e3a02b 55#include "journald-context.h"
d025f1e4 56#include "journald-kmsg.h"
d025f1e4 57#include "journald-native.h"
8580d1f7 58#include "journald-rate-limit.h"
3ffd4af2 59#include "journald-server.h"
8580d1f7
LP
60#include "journald-stream.h"
61#include "journald-syslog.h"
4b58153d 62#include "log.h"
07630cea
LP
63#include "missing.h"
64#include "mkdir.h"
6bedfcbb 65#include "parse-util.h"
4e731273 66#include "proc-cmdline.h"
07630cea
LP
67#include "process-util.h"
68#include "rm-rf.h"
69#include "selinux-util.h"
70#include "signal-util.h"
71#include "socket-util.h"
32917e33 72#include "stdio-util.h"
8b43440b 73#include "string-table.h"
07630cea 74#include "string-util.h"
863a5610 75#include "syslog-util.h"
22e3a02b 76#include "user-util.h"
d025f1e4 77
d025f1e4
ZJS
78#define USER_JOURNALS_MAX 1024
79
26687bf8 80#define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
7f1ad696
LP
81#define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
82#define DEFAULT_RATE_LIMIT_BURST 1000
e150e820 83#define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
d025f1e4 84
8580d1f7 85#define RECHECK_SPACE_USEC (30*USEC_PER_SEC)
d025f1e4 86
e22aa3d3
LP
87#define NOTIFY_SNDBUF_SIZE (8*1024*1024)
88
7a24f3bf
VC
89/* The period to insert between posting changes for coalescing */
90#define POST_CHANGE_TIMER_INTERVAL_USEC (250*USEC_PER_MSEC)
91
ec20fe5f
LP
92/* Pick a good default that is likely to fit into AF_UNIX and AF_INET SOCK_DGRAM datagrams, and even leaves some room
93 * for a bit of additional metadata. */
94#define DEFAULT_LINE_MAX (48*1024)
95
e0ed6db9
FB
96static int determine_path_usage(Server *s, const char *path, uint64_t *ret_used, uint64_t *ret_free) {
97 _cleanup_closedir_ DIR *d = NULL;
98 struct dirent *de;
99 struct statvfs ss;
e0ed6db9
FB
100
101 assert(ret_used);
102 assert(ret_free);
103
266a4700 104 d = opendir(path);
e0ed6db9
FB
105 if (!d)
106 return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR,
266a4700 107 errno, "Failed to open %s: %m", path);
e0ed6db9
FB
108
109 if (fstatvfs(dirfd(d), &ss) < 0)
266a4700 110 return log_error_errno(errno, "Failed to fstatvfs(%s): %m", path);
e0ed6db9
FB
111
112 *ret_free = ss.f_bsize * ss.f_bavail;
113 *ret_used = 0;
114 FOREACH_DIRENT_ALL(de, d, break) {
115 struct stat st;
116
117 if (!endswith(de->d_name, ".journal") &&
118 !endswith(de->d_name, ".journal~"))
119 continue;
120
121 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) {
266a4700 122 log_debug_errno(errno, "Failed to stat %s/%s, ignoring: %m", path, de->d_name);
e0ed6db9
FB
123 continue;
124 }
125
126 if (!S_ISREG(st.st_mode))
127 continue;
128
129 *ret_used += (uint64_t) st.st_blocks * 512UL;
130 }
131
132 return 0;
133}
134
a0edc477 135static void cache_space_invalidate(JournalStorageSpace *space) {
67319249 136 zero(*space);
a0edc477
FB
137}
138
57f443a6 139static int cache_space_refresh(Server *s, JournalStorage *storage) {
23aba343 140 JournalStorageSpace *space;
266a4700 141 JournalMetrics *metrics;
23aba343 142 uint64_t vfs_used, vfs_avail, avail;
d025f1e4 143 usec_t ts;
e0ed6db9 144 int r;
d025f1e4 145
8580d1f7 146 assert(s);
266a4700 147
266a4700 148 metrics = &storage->metrics;
23aba343 149 space = &storage->space;
d025f1e4 150
8580d1f7 151 ts = now(CLOCK_MONOTONIC);
d025f1e4 152
3099caf2 153 if (space->timestamp != 0 && space->timestamp + RECHECK_SPACE_USEC > ts)
d025f1e4
ZJS
154 return 0;
155
23aba343 156 r = determine_path_usage(s, storage->path, &vfs_used, &vfs_avail);
e0ed6db9
FB
157 if (r < 0)
158 return r;
d025f1e4 159
23aba343
FB
160 space->vfs_used = vfs_used;
161 space->vfs_available = vfs_avail;
162
163 avail = LESS_BY(vfs_avail, metrics->keep_free);
164
23aba343
FB
165 space->limit = MIN(MAX(vfs_used + avail, metrics->min_use), metrics->max_use);
166 space->available = LESS_BY(space->limit, vfs_used);
167 space->timestamp = ts;
8580d1f7
LP
168 return 1;
169}
170
3a19f215
FB
171static void patch_min_use(JournalStorage *storage) {
172 assert(storage);
173
174 /* Let's bump the min_use limit to the current usage on disk. We do
175 * this when starting up and first opening the journal files. This way
176 * sudden spikes in disk usage will not cause journald to vacuum files
177 * without bounds. Note that this means that only a restart of journald
178 * will make it reset this value. */
179
180 storage->metrics.min_use = MAX(storage->metrics.min_use, storage->space.vfs_used);
181}
182
183
184static int determine_space(Server *s, uint64_t *available, uint64_t *limit) {
266a4700 185 JournalStorage *js;
57f443a6 186 int r;
8580d1f7
LP
187
188 assert(s);
189
266a4700 190 js = s->system_journal ? &s->system_storage : &s->runtime_storage;
57f443a6
FB
191
192 r = cache_space_refresh(s, js);
193 if (r >= 0) {
194 if (available)
195 *available = js->space.available;
196 if (limit)
197 *limit = js->space.limit;
198 }
199 return r;
d025f1e4
ZJS
200}
201
cba5629e
FB
202void server_space_usage_message(Server *s, JournalStorage *storage) {
203 char fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
204 fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX], fb6[FORMAT_BYTES_MAX];
205 JournalMetrics *metrics;
cba5629e
FB
206
207 assert(s);
208
209 if (!storage)
210 storage = s->system_journal ? &s->system_storage : &s->runtime_storage;
211
57f443a6 212 if (cache_space_refresh(s, storage) < 0)
cba5629e
FB
213 return;
214
215 metrics = &storage->metrics;
23aba343 216 format_bytes(fb1, sizeof(fb1), storage->space.vfs_used);
cba5629e
FB
217 format_bytes(fb2, sizeof(fb2), metrics->max_use);
218 format_bytes(fb3, sizeof(fb3), metrics->keep_free);
23aba343 219 format_bytes(fb4, sizeof(fb4), storage->space.vfs_available);
cba5629e
FB
220 format_bytes(fb5, sizeof(fb5), storage->space.limit);
221 format_bytes(fb6, sizeof(fb6), storage->space.available);
222
13181942
LP
223 server_driver_message(s, 0,
224 "MESSAGE_ID=" SD_MESSAGE_JOURNAL_USAGE_STR,
cba5629e
FB
225 LOG_MESSAGE("%s (%s) is %s, max %s, %s free.",
226 storage->name, storage->path, fb1, fb5, fb6),
227 "JOURNAL_NAME=%s", storage->name,
228 "JOURNAL_PATH=%s", storage->path,
23aba343 229 "CURRENT_USE=%"PRIu64, storage->space.vfs_used,
cba5629e
FB
230 "CURRENT_USE_PRETTY=%s", fb1,
231 "MAX_USE=%"PRIu64, metrics->max_use,
232 "MAX_USE_PRETTY=%s", fb2,
233 "DISK_KEEP_FREE=%"PRIu64, metrics->keep_free,
234 "DISK_KEEP_FREE_PRETTY=%s", fb3,
23aba343 235 "DISK_AVAILABLE=%"PRIu64, storage->space.vfs_available,
cba5629e
FB
236 "DISK_AVAILABLE_PRETTY=%s", fb4,
237 "LIMIT=%"PRIu64, storage->space.limit,
238 "LIMIT_PRETTY=%s", fb5,
239 "AVAILABLE=%"PRIu64, storage->space.available,
240 "AVAILABLE_PRETTY=%s", fb6,
241 NULL);
242}
243
2fce06b0
LP
244static bool uid_for_system_journal(uid_t uid) {
245
246 /* Returns true if the specified UID shall get its data stored in the system journal*/
247
248 return uid_is_system(uid) || uid_is_dynamic(uid) || uid == UID_NOBODY;
249}
250
5c3bde3f 251static void server_add_acls(JournalFile *f, uid_t uid) {
349cc4a5 252#if HAVE_ACL
5c3bde3f 253 int r;
d025f1e4 254#endif
d025f1e4
ZJS
255 assert(f);
256
349cc4a5 257#if HAVE_ACL
2fce06b0 258 if (uid_for_system_journal(uid))
d025f1e4
ZJS
259 return;
260
5c3bde3f
ZJS
261 r = add_acls_for_user(f->fd, uid);
262 if (r < 0)
263 log_warning_errno(r, "Failed to set ACL on %s, ignoring: %m", f->path);
d025f1e4
ZJS
264#endif
265}
266
7a24f3bf
VC
267static int open_journal(
268 Server *s,
269 bool reliably,
270 const char *fname,
271 int flags,
272 bool seal,
273 JournalMetrics *metrics,
7a24f3bf
VC
274 JournalFile **ret) {
275 int r;
e167d7fd 276 JournalFile *f;
7a24f3bf
VC
277
278 assert(s);
279 assert(fname);
280 assert(ret);
281
282 if (reliably)
b58c888f 283 r = journal_file_open_reliably(fname, flags, 0640, s->compress, seal, metrics, s->mmap, s->deferred_closes, NULL, &f);
7a24f3bf 284 else
5d1ce257 285 r = journal_file_open(-1, fname, flags, 0640, s->compress, seal, metrics, s->mmap, s->deferred_closes, NULL, &f);
7a24f3bf
VC
286 if (r < 0)
287 return r;
288
e167d7fd 289 r = journal_file_enable_post_change_timer(f, s->event, POST_CHANGE_TIMER_INTERVAL_USEC);
7a24f3bf 290 if (r < 0) {
69a3a6fd 291 (void) journal_file_close(f);
7a24f3bf
VC
292 return r;
293 }
294
e167d7fd 295 *ret = f;
7a24f3bf
VC
296 return r;
297}
298
6431c7e2 299static bool flushed_flag_is_set(void) {
f78273c8 300 return access("/run/systemd/journal/flushed", F_OK) >= 0;
6431c7e2
VC
301}
302
105bdb46
VC
303static int system_journal_open(Server *s, bool flush_requested) {
304 const char *fn;
305 int r = 0;
306
307 if (!s->system_journal &&
f78273c8
LP
308 IN_SET(s->storage, STORAGE_PERSISTENT, STORAGE_AUTO) &&
309 (flush_requested || flushed_flag_is_set())) {
105bdb46
VC
310
311 /* If in auto mode: first try to create the machine
312 * path, but not the prefix.
313 *
314 * If in persistent mode: create /var/log/journal and
315 * the machine path */
316
317 if (s->storage == STORAGE_PERSISTENT)
318 (void) mkdir_p("/var/log/journal/", 0755);
319
266a4700 320 (void) mkdir(s->system_storage.path, 0755);
105bdb46 321
266a4700
FB
322 fn = strjoina(s->system_storage.path, "/system.journal");
323 r = open_journal(s, true, fn, O_RDWR|O_CREAT, s->seal, &s->system_storage.metrics, &s->system_journal);
105bdb46
VC
324 if (r >= 0) {
325 server_add_acls(s->system_journal, 0);
57f443a6 326 (void) cache_space_refresh(s, &s->system_storage);
3a19f215 327 patch_min_use(&s->system_storage);
105bdb46 328 } else if (r < 0) {
4c701096 329 if (!IN_SET(r, -ENOENT, -EROFS))
105bdb46
VC
330 log_warning_errno(r, "Failed to open system journal: %m");
331
332 r = 0;
333 }
929eeb54
VC
334
335 /* If the runtime journal is open, and we're post-flush, we're
336 * recovering from a failed system journal rotate (ENOSPC)
337 * for which the runtime journal was reopened.
338 *
339 * Perform an implicit flush to var, leaving the runtime
340 * journal closed, now that the system journal is back.
341 */
f78273c8
LP
342 if (!flush_requested)
343 (void) server_flush_to_var(s, true);
105bdb46
VC
344 }
345
346 if (!s->runtime_journal &&
347 (s->storage != STORAGE_NONE)) {
348
266a4700 349 fn = strjoina(s->runtime_storage.path, "/system.journal");
105bdb46
VC
350
351 if (s->system_journal) {
352
353 /* Try to open the runtime journal, but only
354 * if it already exists, so that we can flush
355 * it into the system journal */
356
266a4700 357 r = open_journal(s, false, fn, O_RDWR, false, &s->runtime_storage.metrics, &s->runtime_journal);
105bdb46
VC
358 if (r < 0) {
359 if (r != -ENOENT)
360 log_warning_errno(r, "Failed to open runtime journal: %m");
361
362 r = 0;
363 }
364
365 } else {
366
367 /* OK, we really need the runtime journal, so create
368 * it if necessary. */
369
370 (void) mkdir("/run/log", 0755);
371 (void) mkdir("/run/log/journal", 0755);
372 (void) mkdir_parents(fn, 0750);
373
266a4700 374 r = open_journal(s, true, fn, O_RDWR|O_CREAT, false, &s->runtime_storage.metrics, &s->runtime_journal);
105bdb46
VC
375 if (r < 0)
376 return log_error_errno(r, "Failed to open runtime journal: %m");
377 }
378
379 if (s->runtime_journal) {
380 server_add_acls(s->runtime_journal, 0);
57f443a6 381 (void) cache_space_refresh(s, &s->runtime_storage);
3a19f215 382 patch_min_use(&s->runtime_storage);
105bdb46
VC
383 }
384 }
385
386 return r;
387}
388
d025f1e4 389static JournalFile* find_journal(Server *s, uid_t uid) {
ed375beb 390 _cleanup_free_ char *p = NULL;
d025f1e4
ZJS
391 int r;
392 JournalFile *f;
393 sd_id128_t machine;
394
395 assert(s);
396
105bdb46
VC
397 /* A rotate that fails to create the new journal (ENOSPC) leaves the
398 * rotated journal as NULL. Unless we revisit opening, even after
399 * space is made available we'll continue to return NULL indefinitely.
400 *
401 * system_journal_open() is a noop if the journals are already open, so
402 * we can just call it here to recover from failed rotates (or anything
403 * else that's left the journals as NULL).
404 *
405 * Fixes https://github.com/systemd/systemd/issues/3968 */
406 (void) system_journal_open(s, false);
407
d025f1e4
ZJS
408 /* We split up user logs only on /var, not on /run. If the
409 * runtime file is open, we write to it exclusively, in order
410 * to guarantee proper order as soon as we flush /run to
411 * /var and close the runtime file. */
412
413 if (s->runtime_journal)
414 return s->runtime_journal;
415
2fce06b0 416 if (uid_for_system_journal(uid))
d025f1e4
ZJS
417 return s->system_journal;
418
419 r = sd_id128_get_machine(&machine);
420 if (r < 0)
421 return s->system_journal;
422
4a0b58c4 423 f = ordered_hashmap_get(s->user_journals, UID_TO_PTR(uid));
d025f1e4
ZJS
424 if (f)
425 return f;
426
de0671ee
ZJS
427 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-"UID_FMT".journal",
428 SD_ID128_FORMAT_VAL(machine), uid) < 0)
d025f1e4
ZJS
429 return s->system_journal;
430
43cf8388 431 while (ordered_hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
d025f1e4 432 /* Too many open? Then let's close one */
43cf8388 433 f = ordered_hashmap_steal_first(s->user_journals);
d025f1e4 434 assert(f);
69a3a6fd 435 (void) journal_file_close(f);
d025f1e4
ZJS
436 }
437
266a4700 438 r = open_journal(s, true, p, O_RDWR|O_CREAT, s->seal, &s->system_storage.metrics, &f);
d025f1e4
ZJS
439 if (r < 0)
440 return s->system_journal;
441
5c3bde3f 442 server_add_acls(f, uid);
d025f1e4 443
4a0b58c4 444 r = ordered_hashmap_put(s->user_journals, UID_TO_PTR(uid), f);
d025f1e4 445 if (r < 0) {
69a3a6fd 446 (void) journal_file_close(f);
d025f1e4
ZJS
447 return s->system_journal;
448 }
449
450 return f;
451}
452
ea69bd41
LP
453static int do_rotate(
454 Server *s,
455 JournalFile **f,
456 const char* name,
457 bool seal,
458 uint32_t uid) {
459
fc55baee
ZJS
460 int r;
461 assert(s);
462
463 if (!*f)
464 return -EINVAL;
465
b58c888f 466 r = journal_file_rotate(f, s->compress, seal, s->deferred_closes);
bb6b922f 467 if (r < 0) {
fc55baee 468 if (*f)
bb6b922f 469 return log_error_errno(r, "Failed to rotate %s: %m", (*f)->path);
fc55baee 470 else
bb6b922f
YW
471 return log_error_errno(r, "Failed to create new %s journal: %m", name);
472 }
473
474 server_add_acls(*f, uid);
2678031a 475
fc55baee
ZJS
476 return r;
477}
478
d025f1e4
ZJS
479void server_rotate(Server *s) {
480 JournalFile *f;
481 void *k;
482 Iterator i;
483 int r;
484
485 log_debug("Rotating...");
486
8580d1f7
LP
487 (void) do_rotate(s, &s->runtime_journal, "runtime", false, 0);
488 (void) do_rotate(s, &s->system_journal, "system", s->seal, 0);
d025f1e4 489
43cf8388 490 ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
4a0b58c4 491 r = do_rotate(s, &f, "user", s->seal, PTR_TO_UID(k));
fc55baee 492 if (r >= 0)
43cf8388 493 ordered_hashmap_replace(s->user_journals, k, f);
fc55baee
ZJS
494 else if (!f)
495 /* Old file has been closed and deallocated */
43cf8388 496 ordered_hashmap_remove(s->user_journals, k);
d025f1e4 497 }
b58c888f
VC
498
499 /* Perform any deferred closes which aren't still offlining. */
500 SET_FOREACH(f, s->deferred_closes, i)
501 if (!journal_file_is_offlining(f)) {
502 (void) set_remove(s->deferred_closes, f);
503 (void) journal_file_close(f);
504 }
d025f1e4
ZJS
505}
506
26687bf8
OS
507void server_sync(Server *s) {
508 JournalFile *f;
26687bf8
OS
509 Iterator i;
510 int r;
511
26687bf8 512 if (s->system_journal) {
ac2e41f5 513 r = journal_file_set_offline(s->system_journal, false);
26687bf8 514 if (r < 0)
65089b82 515 log_warning_errno(r, "Failed to sync system journal, ignoring: %m");
26687bf8
OS
516 }
517
65c1d46b 518 ORDERED_HASHMAP_FOREACH(f, s->user_journals, i) {
ac2e41f5 519 r = journal_file_set_offline(f, false);
26687bf8 520 if (r < 0)
65089b82 521 log_warning_errno(r, "Failed to sync user journal, ignoring: %m");
26687bf8
OS
522 }
523
f9a810be
LP
524 if (s->sync_event_source) {
525 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
526 if (r < 0)
da927ba9 527 log_error_errno(r, "Failed to disable sync timer source: %m");
f9a810be 528 }
26687bf8
OS
529
530 s->sync_scheduled = false;
531}
532
3a19f215 533static void do_vacuum(Server *s, JournalStorage *storage, bool verbose) {
ea69bd41 534
63c8666b
ZJS
535 int r;
536
8580d1f7 537 assert(s);
266a4700 538 assert(storage);
8580d1f7 539
57f443a6 540 (void) cache_space_refresh(s, storage);
18e758bf
FB
541
542 if (verbose)
543 server_space_usage_message(s, storage);
8580d1f7 544
57f443a6
FB
545 r = journal_directory_vacuum(storage->path, storage->space.limit,
546 storage->metrics.n_max_files, s->max_retention_usec,
547 &s->oldest_file_usec, verbose);
63c8666b 548 if (r < 0 && r != -ENOENT)
266a4700
FB
549 log_warning_errno(r, "Failed to vacuum %s, ignoring: %m", storage->path);
550
a0edc477 551 cache_space_invalidate(&storage->space);
63c8666b
ZJS
552}
553
3a19f215 554int server_vacuum(Server *s, bool verbose) {
8580d1f7 555 assert(s);
d025f1e4
ZJS
556
557 log_debug("Vacuuming...");
558
559 s->oldest_file_usec = 0;
560
266a4700 561 if (s->system_journal)
3a19f215 562 do_vacuum(s, &s->system_storage, verbose);
266a4700 563 if (s->runtime_journal)
3a19f215 564 do_vacuum(s, &s->runtime_storage, verbose);
d025f1e4 565
8580d1f7 566 return 0;
d025f1e4
ZJS
567}
568
0c24bb23
LP
569static void server_cache_machine_id(Server *s) {
570 sd_id128_t id;
571 int r;
572
573 assert(s);
574
575 r = sd_id128_get_machine(&id);
576 if (r < 0)
577 return;
578
579 sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
580}
581
582static void server_cache_boot_id(Server *s) {
583 sd_id128_t id;
584 int r;
585
586 assert(s);
587
588 r = sd_id128_get_boot(&id);
589 if (r < 0)
590 return;
591
592 sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
593}
594
595static void server_cache_hostname(Server *s) {
596 _cleanup_free_ char *t = NULL;
597 char *x;
598
599 assert(s);
600
601 t = gethostname_malloc();
602 if (!t)
603 return;
604
605 x = strappend("_HOSTNAME=", t);
606 if (!x)
607 return;
608
609 free(s->hostname_field);
610 s->hostname_field = x;
611}
612
8531ae70 613static bool shall_try_append_again(JournalFile *f, int r) {
6e1045e5 614 switch(r) {
ae739cc1 615
6e1045e5
ZJS
616 case -E2BIG: /* Hit configured limit */
617 case -EFBIG: /* Hit fs limit */
618 case -EDQUOT: /* Quota limit hit */
619 case -ENOSPC: /* Disk full */
d025f1e4 620 log_debug("%s: Allocation limit reached, rotating.", f->path);
6e1045e5 621 return true;
ae739cc1 622
6e1045e5
ZJS
623 case -EIO: /* I/O error of some kind (mmap) */
624 log_warning("%s: IO error, rotating.", f->path);
625 return true;
ae739cc1 626
6e1045e5 627 case -EHOSTDOWN: /* Other machine */
d025f1e4 628 log_info("%s: Journal file from other machine, rotating.", f->path);
6e1045e5 629 return true;
ae739cc1 630
6e1045e5 631 case -EBUSY: /* Unclean shutdown */
d025f1e4 632 log_info("%s: Unclean shutdown, rotating.", f->path);
6e1045e5 633 return true;
ae739cc1 634
6e1045e5 635 case -EPROTONOSUPPORT: /* Unsupported feature */
d025f1e4 636 log_info("%s: Unsupported feature, rotating.", f->path);
6e1045e5 637 return true;
ae739cc1 638
6e1045e5
ZJS
639 case -EBADMSG: /* Corrupted */
640 case -ENODATA: /* Truncated */
641 case -ESHUTDOWN: /* Already archived */
d025f1e4 642 log_warning("%s: Journal file corrupted, rotating.", f->path);
6e1045e5 643 return true;
ae739cc1 644
6e1045e5 645 case -EIDRM: /* Journal file has been deleted */
2678031a 646 log_warning("%s: Journal file has been deleted, rotating.", f->path);
6e1045e5 647 return true;
ae739cc1
LP
648
649 case -ETXTBSY: /* Journal file is from the future */
c1a9199e 650 log_warning("%s: Journal file is from the future, rotating.", f->path);
ae739cc1
LP
651 return true;
652
6e1045e5 653 default:
d025f1e4 654 return false;
6e1045e5 655 }
d025f1e4
ZJS
656}
657
d07f7b9e 658static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
7c070017 659 bool vacuumed = false, rotate = false;
0f972d66 660 struct dual_timestamp ts;
d025f1e4 661 JournalFile *f;
d025f1e4
ZJS
662 int r;
663
664 assert(s);
665 assert(iovec);
666 assert(n > 0);
667
0f972d66
LP
668 /* Get the closest, linearized time we have for this log event from the event loop. (Note that we do not use
669 * the source time, and not even the time the event was originally seen, but instead simply the time we started
670 * processing it, as we want strictly linear ordering in what we write out.) */
671 assert_se(sd_event_now(s->event, CLOCK_REALTIME, &ts.realtime) >= 0);
672 assert_se(sd_event_now(s->event, CLOCK_MONOTONIC, &ts.monotonic) >= 0);
673
7c070017
LP
674 if (ts.realtime < s->last_realtime_clock) {
675 /* When the time jumps backwards, let's immediately rotate. Of course, this should not happen during
676 * regular operation. However, when it does happen, then we should make sure that we start fresh files
677 * to ensure that the entries in the journal files are strictly ordered by time, in order to ensure
678 * bisection works correctly. */
d025f1e4 679
7c070017
LP
680 log_debug("Time jumped backwards, rotating.");
681 rotate = true;
682 } else {
683
684 f = find_journal(s, uid);
685 if (!f)
686 return;
687
688 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
689 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
690 rotate = true;
691 }
692 }
d025f1e4 693
7c070017 694 if (rotate) {
d025f1e4 695 server_rotate(s);
3a19f215 696 server_vacuum(s, false);
d025f1e4
ZJS
697 vacuumed = true;
698
699 f = find_journal(s, uid);
700 if (!f)
701 return;
702 }
703
7c070017
LP
704 s->last_realtime_clock = ts.realtime;
705
0f972d66 706 r = journal_file_append_entry(f, &ts, iovec, n, &s->seqnum, NULL, NULL);
26687bf8 707 if (r >= 0) {
d07f7b9e 708 server_schedule_sync(s, priority);
d025f1e4 709 return;
26687bf8 710 }
d025f1e4
ZJS
711
712 if (vacuumed || !shall_try_append_again(f, r)) {
8266e1c0 713 log_error_errno(r, "Failed to write entry (%d items, %zu bytes), ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
d025f1e4
ZJS
714 return;
715 }
716
717 server_rotate(s);
3a19f215 718 server_vacuum(s, false);
d025f1e4
ZJS
719
720 f = find_journal(s, uid);
721 if (!f)
722 return;
723
724 log_debug("Retrying write.");
0f972d66 725 r = journal_file_append_entry(f, &ts, iovec, n, &s->seqnum, NULL, NULL);
8266e1c0
LP
726 if (r < 0)
727 log_error_errno(r, "Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
728 else
d07f7b9e 729 server_schedule_sync(s, priority);
d025f1e4
ZJS
730}
731
22e3a02b
LP
732#define IOVEC_ADD_NUMERIC_FIELD(iovec, n, value, type, isset, format, field) \
733 if (isset(value)) { \
734 char *k; \
fbd0b64f 735 k = newa(char, STRLEN(field "=") + DECIMAL_STR_MAX(type) + 1); \
22e3a02b 736 sprintf(k, field "=" format, value); \
e6a7ec4b 737 iovec[n++] = IOVEC_MAKE_STRING(k); \
22e3a02b 738 }
4b58153d 739
22e3a02b
LP
740#define IOVEC_ADD_STRING_FIELD(iovec, n, value, field) \
741 if (!isempty(value)) { \
742 char *k; \
743 k = strjoina(field "=", value); \
e6a7ec4b 744 iovec[n++] = IOVEC_MAKE_STRING(k); \
22e3a02b 745 }
4b58153d 746
22e3a02b
LP
747#define IOVEC_ADD_ID128_FIELD(iovec, n, value, field) \
748 if (!sd_id128_is_null(value)) { \
749 char *k; \
fbd0b64f 750 k = newa(char, STRLEN(field "=") + SD_ID128_STRING_MAX); \
22e3a02b 751 sd_id128_to_string(value, stpcpy(k, field "=")); \
e6a7ec4b 752 iovec[n++] = IOVEC_MAKE_STRING(k); \
22e3a02b 753 }
4b58153d 754
22e3a02b
LP
755#define IOVEC_ADD_SIZED_FIELD(iovec, n, value, value_size, field) \
756 if (value_size > 0) { \
757 char *k; \
fbd0b64f 758 k = newa(char, STRLEN(field "=") + value_size + 1); \
22e3a02b 759 *((char*) mempcpy(stpcpy(k, field "="), value, value_size)) = 0; \
e6a7ec4b 760 iovec[n++] = IOVEC_MAKE_STRING(k); \
22e3a02b 761 } \
4b58153d 762
d025f1e4
ZJS
763static void dispatch_message_real(
764 Server *s,
d3070fbd 765 struct iovec *iovec, size_t n, size_t m,
22e3a02b 766 const ClientContext *c,
3b3154df 767 const struct timeval *tv,
d07f7b9e 768 int priority,
22e3a02b
LP
769 pid_t object_pid) {
770
771 char source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)];
772 uid_t journal_uid;
773 ClientContext *o;
d025f1e4
ZJS
774
775 assert(s);
776 assert(iovec);
777 assert(n > 0);
d3070fbd
LP
778 assert(n +
779 N_IOVEC_META_FIELDS +
780 (pid_is_valid(object_pid) ? N_IOVEC_OBJECT_FIELDS : 0) +
781 client_context_extra_fields_n_iovec(c) <= m);
19cace37 782
22e3a02b
LP
783 if (c) {
784 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->pid, pid_t, pid_is_valid, PID_FMT, "_PID");
785 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->uid, uid_t, uid_is_valid, UID_FMT, "_UID");
786 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->gid, gid_t, gid_is_valid, GID_FMT, "_GID");
4b58153d 787
22e3a02b
LP
788 IOVEC_ADD_STRING_FIELD(iovec, n, c->comm, "_COMM");
789 IOVEC_ADD_STRING_FIELD(iovec, n, c->exe, "_EXE");
790 IOVEC_ADD_STRING_FIELD(iovec, n, c->cmdline, "_CMDLINE");
791 IOVEC_ADD_STRING_FIELD(iovec, n, c->capeff, "_CAP_EFFECTIVE");
d025f1e4 792
22e3a02b 793 IOVEC_ADD_SIZED_FIELD(iovec, n, c->label, c->label_size, "_SELINUX_CONTEXT");
ae018d9b 794
22e3a02b
LP
795 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->auditid, uint32_t, audit_session_is_valid, "%" PRIu32, "_AUDIT_SESSION");
796 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->loginuid, uid_t, uid_is_valid, UID_FMT, "_AUDIT_LOGINUID");
d025f1e4 797
22e3a02b
LP
798 IOVEC_ADD_STRING_FIELD(iovec, n, c->cgroup, "_SYSTEMD_CGROUP");
799 IOVEC_ADD_STRING_FIELD(iovec, n, c->session, "_SYSTEMD_SESSION");
800 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->owner_uid, uid_t, uid_is_valid, UID_FMT, "_SYSTEMD_OWNER_UID");
801 IOVEC_ADD_STRING_FIELD(iovec, n, c->unit, "_SYSTEMD_UNIT");
802 IOVEC_ADD_STRING_FIELD(iovec, n, c->user_unit, "_SYSTEMD_USER_UNIT");
803 IOVEC_ADD_STRING_FIELD(iovec, n, c->slice, "_SYSTEMD_SLICE");
804 IOVEC_ADD_STRING_FIELD(iovec, n, c->user_slice, "_SYSTEMD_USER_SLICE");
e7ff4e7f 805
22e3a02b 806 IOVEC_ADD_ID128_FIELD(iovec, n, c->invocation_id, "_SYSTEMD_INVOCATION_ID");
d3070fbd
LP
807
808 if (c->extra_fields_n_iovec > 0) {
809 memcpy(iovec + n, c->extra_fields_iovec, c->extra_fields_n_iovec * sizeof(struct iovec));
810 n += c->extra_fields_n_iovec;
811 }
d025f1e4 812 }
968f3196 813
22e3a02b 814 assert(n <= m);
968f3196 815
22e3a02b 816 if (pid_is_valid(object_pid) && client_context_get(s, object_pid, NULL, NULL, 0, NULL, &o) >= 0) {
968f3196 817
22e3a02b
LP
818 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->pid, pid_t, pid_is_valid, PID_FMT, "OBJECT_PID");
819 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->uid, uid_t, uid_is_valid, UID_FMT, "OBJECT_UID");
820 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->gid, gid_t, gid_is_valid, GID_FMT, "OBJECT_GID");
968f3196 821
22e3a02b
LP
822 IOVEC_ADD_STRING_FIELD(iovec, n, o->comm, "OBJECT_COMM");
823 IOVEC_ADD_STRING_FIELD(iovec, n, o->exe, "OBJECT_EXE");
824 IOVEC_ADD_STRING_FIELD(iovec, n, o->cmdline, "OBJECT_CMDLINE");
825 IOVEC_ADD_STRING_FIELD(iovec, n, o->capeff, "OBJECT_CAP_EFFECTIVE");
968f3196 826
22e3a02b 827 IOVEC_ADD_SIZED_FIELD(iovec, n, o->label, o->label_size, "OBJECT_SELINUX_CONTEXT");
19cace37 828
22e3a02b
LP
829 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->auditid, uint32_t, audit_session_is_valid, "%" PRIu32, "OBJECT_AUDIT_SESSION");
830 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->loginuid, uid_t, uid_is_valid, UID_FMT, "OBJECT_AUDIT_LOGINUID");
d473176a 831
22e3a02b
LP
832 IOVEC_ADD_STRING_FIELD(iovec, n, o->cgroup, "OBJECT_SYSTEMD_CGROUP");
833 IOVEC_ADD_STRING_FIELD(iovec, n, o->session, "OBJECT_SYSTEMD_SESSION");
834 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->owner_uid, uid_t, uid_is_valid, UID_FMT, "OBJECT_SYSTEMD_OWNER_UID");
835 IOVEC_ADD_STRING_FIELD(iovec, n, o->unit, "OBJECT_SYSTEMD_UNIT");
836 IOVEC_ADD_STRING_FIELD(iovec, n, o->user_unit, "OBJECT_SYSTEMD_USER_UNIT");
837 IOVEC_ADD_STRING_FIELD(iovec, n, o->slice, "OBJECT_SYSTEMD_SLICE");
838 IOVEC_ADD_STRING_FIELD(iovec, n, o->user_slice, "OBJECT_SYSTEMD_USER_SLICE");
d473176a 839
22e3a02b 840 IOVEC_ADD_ID128_FIELD(iovec, n, o->invocation_id, "OBJECT_SYSTEMD_INVOCATION_ID=");
968f3196 841 }
22e3a02b 842
968f3196 843 assert(n <= m);
d025f1e4
ZJS
844
845 if (tv) {
398a50cd 846 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=" USEC_FMT, timeval_load(tv));
e6a7ec4b 847 iovec[n++] = IOVEC_MAKE_STRING(source_time);
d025f1e4
ZJS
848 }
849
850 /* Note that strictly speaking storing the boot id here is
851 * redundant since the entry includes this in-line
852 * anyway. However, we need this indexed, too. */
0c24bb23 853 if (!isempty(s->boot_id_field))
e6a7ec4b 854 iovec[n++] = IOVEC_MAKE_STRING(s->boot_id_field);
d025f1e4 855
0c24bb23 856 if (!isempty(s->machine_id_field))
e6a7ec4b 857 iovec[n++] = IOVEC_MAKE_STRING(s->machine_id_field);
d025f1e4 858
0c24bb23 859 if (!isempty(s->hostname_field))
e6a7ec4b 860 iovec[n++] = IOVEC_MAKE_STRING(s->hostname_field);
d025f1e4
ZJS
861
862 assert(n <= m);
863
22e3a02b
LP
864 if (s->split_mode == SPLIT_UID && c && uid_is_valid(c->uid))
865 /* Split up strictly by (non-root) UID */
866 journal_uid = c->uid;
867 else if (s->split_mode == SPLIT_LOGIN && c && c->uid > 0 && uid_is_valid(c->owner_uid))
edc3797f
LP
868 /* Split up by login UIDs. We do this only if the
869 * realuid is not root, in order not to accidentally
870 * leak privileged information to the user that is
871 * logged by a privileged process that is part of an
7517e174 872 * unprivileged session. */
22e3a02b 873 journal_uid = c->owner_uid;
da499392
KS
874 else
875 journal_uid = 0;
759c945a 876
d07f7b9e 877 write_to_journal(s, journal_uid, iovec, n, priority);
d025f1e4
ZJS
878}
879
13181942 880void server_driver_message(Server *s, pid_t object_pid, const char *message_id, const char *format, ...) {
22e3a02b 881
d3070fbd
LP
882 struct iovec *iovec;
883 size_t n = 0, k, m;
d025f1e4 884 va_list ap;
22e3a02b 885 int r;
d025f1e4
ZJS
886
887 assert(s);
888 assert(format);
889
f643ae71 890 m = N_IOVEC_META_FIELDS + 5 + N_IOVEC_PAYLOAD_FIELDS + client_context_extra_fields_n_iovec(s->my_context) + N_IOVEC_OBJECT_FIELDS;
d3070fbd
LP
891 iovec = newa(struct iovec, m);
892
4850d39a 893 assert_cc(3 == LOG_FAC(LOG_DAEMON));
e6a7ec4b
LP
894 iovec[n++] = IOVEC_MAKE_STRING("SYSLOG_FACILITY=3");
895 iovec[n++] = IOVEC_MAKE_STRING("SYSLOG_IDENTIFIER=systemd-journald");
b6fa2555 896
e6a7ec4b 897 iovec[n++] = IOVEC_MAKE_STRING("_TRANSPORT=driver");
4850d39a 898 assert_cc(6 == LOG_INFO);
e6a7ec4b 899 iovec[n++] = IOVEC_MAKE_STRING("PRIORITY=6");
d025f1e4 900
2b044526 901 if (message_id)
e6a7ec4b 902 iovec[n++] = IOVEC_MAKE_STRING(message_id);
d3070fbd 903 k = n;
8a03c9ef
ZJS
904
905 va_start(ap, format);
d3070fbd 906 r = log_format_iovec(iovec, m, &n, false, 0, format, ap);
32917e33 907 /* Error handling below */
8a03c9ef
ZJS
908 va_end(ap);
909
32917e33 910 if (r >= 0)
d3070fbd 911 dispatch_message_real(s, iovec, n, m, s->my_context, NULL, LOG_INFO, object_pid);
8a03c9ef 912
d3070fbd
LP
913 while (k < n)
914 free(iovec[k++].iov_base);
32917e33
ZJS
915
916 if (r < 0) {
917 /* We failed to format the message. Emit a warning instead. */
918 char buf[LINE_MAX];
919
920 xsprintf(buf, "MESSAGE=Entry printing failed: %s", strerror(-r));
921
922 n = 3;
e6a7ec4b
LP
923 iovec[n++] = IOVEC_MAKE_STRING("PRIORITY=4");
924 iovec[n++] = IOVEC_MAKE_STRING(buf);
d3070fbd 925 dispatch_message_real(s, iovec, n, m, s->my_context, NULL, LOG_INFO, object_pid);
32917e33 926 }
d025f1e4
ZJS
927}
928
929void server_dispatch_message(
930 Server *s,
d3070fbd 931 struct iovec *iovec, size_t n, size_t m,
22e3a02b 932 ClientContext *c,
3b3154df 933 const struct timeval *tv,
968f3196
ZJS
934 int priority,
935 pid_t object_pid) {
d025f1e4 936
8580d1f7 937 uint64_t available = 0;
22e3a02b 938 int rl;
d025f1e4
ZJS
939
940 assert(s);
941 assert(iovec || n == 0);
942
943 if (n == 0)
944 return;
945
946 if (LOG_PRI(priority) > s->max_level_store)
947 return;
948
2f5df74a
HHPF
949 /* Stop early in case the information will not be stored
950 * in a journal. */
951 if (s->storage == STORAGE_NONE)
952 return;
953
22e3a02b
LP
954 if (c && c->unit) {
955 (void) determine_space(s, &available, NULL);
d025f1e4 956
22e3a02b
LP
957 rl = journal_rate_limit_test(s->rate_limit, c->unit, priority & LOG_PRIMASK, available);
958 if (rl == 0)
959 return;
d025f1e4 960
22e3a02b
LP
961 /* Write a suppression message if we suppressed something */
962 if (rl > 1)
13181942
LP
963 server_driver_message(s, c->pid,
964 "MESSAGE_ID=" SD_MESSAGE_JOURNAL_DROPPED_STR,
965 LOG_MESSAGE("Suppressed %i messages from %s", rl - 1, c->unit),
5908ff1c 966 "N_DROPPED=%i", rl - 1,
22e3a02b 967 NULL);
d025f1e4
ZJS
968 }
969
22e3a02b 970 dispatch_message_real(s, iovec, n, m, c, tv, priority, object_pid);
d025f1e4
ZJS
971}
972
f78273c8 973int server_flush_to_var(Server *s, bool require_flag_file) {
d025f1e4
ZJS
974 sd_id128_t machine;
975 sd_journal *j = NULL;
fbb63411
LP
976 char ts[FORMAT_TIMESPAN_MAX];
977 usec_t start;
978 unsigned n = 0;
979 int r;
d025f1e4
ZJS
980
981 assert(s);
982
f78273c8 983 if (!IN_SET(s->storage, STORAGE_AUTO, STORAGE_PERSISTENT))
d025f1e4
ZJS
984 return 0;
985
986 if (!s->runtime_journal)
987 return 0;
988
f78273c8
LP
989 if (require_flag_file && !flushed_flag_is_set())
990 return 0;
991
8580d1f7 992 (void) system_journal_open(s, true);
d025f1e4
ZJS
993
994 if (!s->system_journal)
995 return 0;
996
997 log_debug("Flushing to /var...");
998
fbb63411
LP
999 start = now(CLOCK_MONOTONIC);
1000
d025f1e4 1001 r = sd_id128_get_machine(&machine);
00a16861 1002 if (r < 0)
d025f1e4 1003 return r;
d025f1e4
ZJS
1004
1005 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
23bbb0de
MS
1006 if (r < 0)
1007 return log_error_errno(r, "Failed to read runtime journal: %m");
d025f1e4 1008
93b73b06
LP
1009 sd_journal_set_data_threshold(j, 0);
1010
d025f1e4
ZJS
1011 SD_JOURNAL_FOREACH(j) {
1012 Object *o = NULL;
1013 JournalFile *f;
1014
1015 f = j->current_file;
1016 assert(f && f->current_offset > 0);
1017
fbb63411
LP
1018 n++;
1019
d025f1e4
ZJS
1020 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1021 if (r < 0) {
da927ba9 1022 log_error_errno(r, "Can't read entry: %m");
d025f1e4
ZJS
1023 goto finish;
1024 }
1025
1026 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1027 if (r >= 0)
1028 continue;
1029
1030 if (!shall_try_append_again(s->system_journal, r)) {
da927ba9 1031 log_error_errno(r, "Can't write entry: %m");
d025f1e4
ZJS
1032 goto finish;
1033 }
1034
1035 server_rotate(s);
3a19f215 1036 server_vacuum(s, false);
d025f1e4 1037
253f59df
LP
1038 if (!s->system_journal) {
1039 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1040 r = -EIO;
1041 goto finish;
1042 }
1043
d025f1e4
ZJS
1044 log_debug("Retrying write.");
1045 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1046 if (r < 0) {
da927ba9 1047 log_error_errno(r, "Can't write entry: %m");
d025f1e4
ZJS
1048 goto finish;
1049 }
1050 }
1051
804ae586
LP
1052 r = 0;
1053
d025f1e4
ZJS
1054finish:
1055 journal_file_post_change(s->system_journal);
1056
804ae586 1057 s->runtime_journal = journal_file_close(s->runtime_journal);
d025f1e4
ZJS
1058
1059 if (r >= 0)
c6878637 1060 (void) rm_rf("/run/log/journal", REMOVE_ROOT);
d025f1e4 1061
763c7aa2 1062 sd_journal_close(j);
d025f1e4 1063
13181942 1064 server_driver_message(s, 0, NULL,
8a03c9ef
ZJS
1065 LOG_MESSAGE("Time spent on flushing to /var is %s for %u entries.",
1066 format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0),
1067 n),
1068 NULL);
fbb63411 1069
d025f1e4
ZJS
1070 return r;
1071}
1072
8531ae70 1073int server_process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
f9a810be 1074 Server *s = userdata;
a315ac4e
LP
1075 struct ucred *ucred = NULL;
1076 struct timeval *tv = NULL;
1077 struct cmsghdr *cmsg;
1078 char *label = NULL;
1079 size_t label_len = 0, m;
1080 struct iovec iovec;
1081 ssize_t n;
1082 int *fds = NULL, v = 0;
1083 unsigned n_fds = 0;
1084
1085 union {
1086 struct cmsghdr cmsghdr;
1087
1088 /* We use NAME_MAX space for the SELinux label
1089 * here. The kernel currently enforces no
1090 * limit, but according to suggestions from
1091 * the SELinux people this will change and it
1092 * will probably be identical to NAME_MAX. For
1093 * now we use that, but this should be updated
1094 * one day when the final limit is known. */
1095 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1096 CMSG_SPACE(sizeof(struct timeval)) +
1097 CMSG_SPACE(sizeof(int)) + /* fd */
1098 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1099 } control = {};
1100
1101 union sockaddr_union sa = {};
1102
1103 struct msghdr msghdr = {
1104 .msg_iov = &iovec,
1105 .msg_iovlen = 1,
1106 .msg_control = &control,
1107 .msg_controllen = sizeof(control),
1108 .msg_name = &sa,
1109 .msg_namelen = sizeof(sa),
1110 };
f9a810be 1111
d025f1e4 1112 assert(s);
875c2e22 1113 assert(fd == s->native_fd || fd == s->syslog_fd || fd == s->audit_fd);
f9a810be
LP
1114
1115 if (revents != EPOLLIN) {
1116 log_error("Got invalid event from epoll for datagram fd: %"PRIx32, revents);
1117 return -EIO;
1118 }
1119
22e3a02b
LP
1120 /* Try to get the right size, if we can. (Not all sockets support SIOCINQ, hence we just try, but don't rely on
1121 * it.) */
a315ac4e 1122 (void) ioctl(fd, SIOCINQ, &v);
d025f1e4 1123
a315ac4e
LP
1124 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1125 m = PAGE_ALIGN(MAX3((size_t) v + 1,
1126 (size_t) LINE_MAX,
1127 ALIGN(sizeof(struct nlmsghdr)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH)) + 1);
d025f1e4 1128
a315ac4e
LP
1129 if (!GREEDY_REALLOC(s->buffer, s->buffer_size, m))
1130 return log_oom();
875c2e22 1131
a315ac4e
LP
1132 iovec.iov_base = s->buffer;
1133 iovec.iov_len = s->buffer_size - 1; /* Leave room for trailing NUL we add later */
d025f1e4 1134
a315ac4e
LP
1135 n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1136 if (n < 0) {
3742095b 1137 if (IN_SET(errno, EINTR, EAGAIN))
a315ac4e 1138 return 0;
875c2e22 1139
a315ac4e
LP
1140 return log_error_errno(errno, "recvmsg() failed: %m");
1141 }
875c2e22 1142
a315ac4e
LP
1143 CMSG_FOREACH(cmsg, &msghdr) {
1144
1145 if (cmsg->cmsg_level == SOL_SOCKET &&
1146 cmsg->cmsg_type == SCM_CREDENTIALS &&
1147 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1148 ucred = (struct ucred*) CMSG_DATA(cmsg);
1149 else if (cmsg->cmsg_level == SOL_SOCKET &&
1150 cmsg->cmsg_type == SCM_SECURITY) {
1151 label = (char*) CMSG_DATA(cmsg);
1152 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1153 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1154 cmsg->cmsg_type == SO_TIMESTAMP &&
1155 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1156 tv = (struct timeval*) CMSG_DATA(cmsg);
1157 else if (cmsg->cmsg_level == SOL_SOCKET &&
1158 cmsg->cmsg_type == SCM_RIGHTS) {
1159 fds = (int*) CMSG_DATA(cmsg);
1160 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
d025f1e4 1161 }
a315ac4e 1162 }
d025f1e4 1163
a315ac4e
LP
1164 /* And a trailing NUL, just in case */
1165 s->buffer[n] = 0;
1166
1167 if (fd == s->syslog_fd) {
1168 if (n > 0 && n_fds == 0)
1169 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1170 else if (n_fds > 0)
1171 log_warning("Got file descriptors via syslog socket. Ignoring.");
1172
1173 } else if (fd == s->native_fd) {
1174 if (n > 0 && n_fds == 0)
1175 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1176 else if (n == 0 && n_fds == 1)
1177 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1178 else if (n_fds > 0)
1179 log_warning("Got too many file descriptors via native socket. Ignoring.");
1180
1181 } else {
1182 assert(fd == s->audit_fd);
1183
1184 if (n > 0 && n_fds == 0)
1185 server_process_audit_message(s, s->buffer, n, ucred, &sa, msghdr.msg_namelen);
1186 else if (n_fds > 0)
1187 log_warning("Got file descriptors via audit socket. Ignoring.");
f9a810be 1188 }
a315ac4e
LP
1189
1190 close_many(fds, n_fds);
1191 return 0;
f9a810be 1192}
d025f1e4 1193
f9a810be
LP
1194static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1195 Server *s = userdata;
33d52ab9 1196 int r;
d025f1e4 1197
f9a810be 1198 assert(s);
d025f1e4 1199
94b65516 1200 log_info("Received request to flush runtime journal from PID " PID_FMT, si->ssi_pid);
d025f1e4 1201
f78273c8 1202 (void) server_flush_to_var(s, false);
f9a810be 1203 server_sync(s);
3a19f215 1204 server_vacuum(s, false);
d025f1e4 1205
33d52ab9
LP
1206 r = touch("/run/systemd/journal/flushed");
1207 if (r < 0)
1208 log_warning_errno(r, "Failed to touch /run/systemd/journal/flushed, ignoring: %m");
74055aa7 1209
18e758bf 1210 server_space_usage_message(s, NULL);
f9a810be
LP
1211 return 0;
1212}
d025f1e4 1213
f9a810be
LP
1214static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1215 Server *s = userdata;
33d52ab9 1216 int r;
d025f1e4 1217
f9a810be 1218 assert(s);
d025f1e4 1219
94b65516 1220 log_info("Received request to rotate journal from PID " PID_FMT, si->ssi_pid);
f9a810be 1221 server_rotate(s);
3a19f215
FB
1222 server_vacuum(s, true);
1223
1224 if (s->system_journal)
1225 patch_min_use(&s->system_storage);
1226 if (s->runtime_journal)
1227 patch_min_use(&s->runtime_storage);
d025f1e4 1228
dbd6e31c 1229 /* Let clients know when the most recent rotation happened. */
33d52ab9
LP
1230 r = write_timestamp_file_atomic("/run/systemd/journal/rotated", now(CLOCK_MONOTONIC));
1231 if (r < 0)
1232 log_warning_errno(r, "Failed to write /run/systemd/journal/rotated, ignoring: %m");
dbd6e31c 1233
f9a810be
LP
1234 return 0;
1235}
d025f1e4 1236
f9a810be
LP
1237static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1238 Server *s = userdata;
d025f1e4 1239
f9a810be 1240 assert(s);
d025f1e4 1241
4daf54a8 1242 log_received_signal(LOG_INFO, si);
d025f1e4 1243
6203e07a 1244 sd_event_exit(s->event, 0);
d025f1e4
ZJS
1245 return 0;
1246}
1247
94b65516
LP
1248static int dispatch_sigrtmin1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1249 Server *s = userdata;
33d52ab9 1250 int r;
94b65516
LP
1251
1252 assert(s);
1253
1254 log_debug("Received request to sync from PID " PID_FMT, si->ssi_pid);
1255
1256 server_sync(s);
1257
1258 /* Let clients know when the most recent sync happened. */
33d52ab9
LP
1259 r = write_timestamp_file_atomic("/run/systemd/journal/synced", now(CLOCK_MONOTONIC));
1260 if (r < 0)
1261 log_warning_errno(r, "Failed to write /run/systemd/journal/synced, ignoring: %m");
94b65516
LP
1262
1263 return 0;
1264}
1265
f9a810be 1266static int setup_signals(Server *s) {
f9a810be 1267 int r;
d025f1e4
ZJS
1268
1269 assert(s);
1270
9bab3b65 1271 assert_se(sigprocmask_many(SIG_SETMASK, NULL, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, SIGRTMIN+1, -1) >= 0);
d025f1e4 1272
151b9b96 1273 r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
f9a810be
LP
1274 if (r < 0)
1275 return r;
1276
151b9b96 1277 r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s);
f9a810be
LP
1278 if (r < 0)
1279 return r;
d025f1e4 1280
151b9b96 1281 r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s);
f9a810be
LP
1282 if (r < 0)
1283 return r;
d025f1e4 1284
b374689c
LP
1285 /* Let's process SIGTERM late, so that we flush all queued
1286 * messages to disk before we exit */
1287 r = sd_event_source_set_priority(s->sigterm_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1288 if (r < 0)
1289 return r;
1290
1291 /* When journald is invoked on the terminal (when debugging),
1292 * it's useful if C-c is handled equivalent to SIGTERM. */
151b9b96 1293 r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s);
f9a810be
LP
1294 if (r < 0)
1295 return r;
d025f1e4 1296
b374689c
LP
1297 r = sd_event_source_set_priority(s->sigint_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1298 if (r < 0)
1299 return r;
1300
94b65516
LP
1301 /* SIGRTMIN+1 causes an immediate sync. We process this very
1302 * late, so that everything else queued at this point is
1303 * really written to disk. Clients can watch
1304 * /run/systemd/journal/synced with inotify until its mtime
1305 * changes to see when a sync happened. */
1306 r = sd_event_add_signal(s->event, &s->sigrtmin1_event_source, SIGRTMIN+1, dispatch_sigrtmin1, s);
1307 if (r < 0)
1308 return r;
1309
1310 r = sd_event_source_set_priority(s->sigrtmin1_event_source, SD_EVENT_PRIORITY_NORMAL+15);
1311 if (r < 0)
1312 return r;
1313
d025f1e4
ZJS
1314 return 0;
1315}
1316
5707ecf3
ZJS
1317static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
1318 Server *s = data;
74df0fca 1319 int r;
d025f1e4 1320
5707ecf3 1321 assert(s);
d025f1e4 1322
1d84ad94
LP
1323 if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_syslog")) {
1324
5707ecf3 1325 r = value ? parse_boolean(value) : true;
d581d9d9 1326 if (r < 0)
5707ecf3
ZJS
1327 log_warning("Failed to parse forward to syslog switch \"%s\". Ignoring.", value);
1328 else
1329 s->forward_to_syslog = r;
1d84ad94
LP
1330
1331 } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_kmsg")) {
1332
5707ecf3
ZJS
1333 r = value ? parse_boolean(value) : true;
1334 if (r < 0)
1335 log_warning("Failed to parse forward to kmsg switch \"%s\". Ignoring.", value);
1336 else
1337 s->forward_to_kmsg = r;
1d84ad94
LP
1338
1339 } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_console")) {
1340
5707ecf3
ZJS
1341 r = value ? parse_boolean(value) : true;
1342 if (r < 0)
1343 log_warning("Failed to parse forward to console switch \"%s\". Ignoring.", value);
1344 else
1345 s->forward_to_console = r;
1d84ad94
LP
1346
1347 } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_wall")) {
1348
5707ecf3
ZJS
1349 r = value ? parse_boolean(value) : true;
1350 if (r < 0)
1351 log_warning("Failed to parse forward to wall switch \"%s\". Ignoring.", value);
1352 else
1353 s->forward_to_wall = r;
1d84ad94
LP
1354
1355 } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_console")) {
1356
1357 if (proc_cmdline_value_missing(key, value))
1358 return 0;
1359
5707ecf3
ZJS
1360 r = log_level_from_string(value);
1361 if (r < 0)
1362 log_warning("Failed to parse max level console value \"%s\". Ignoring.", value);
1363 else
1364 s->max_level_console = r;
1d84ad94
LP
1365
1366 } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_store")) {
1367
1368 if (proc_cmdline_value_missing(key, value))
1369 return 0;
1370
5707ecf3
ZJS
1371 r = log_level_from_string(value);
1372 if (r < 0)
1373 log_warning("Failed to parse max level store value \"%s\". Ignoring.", value);
1374 else
1375 s->max_level_store = r;
1d84ad94
LP
1376
1377 } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_syslog")) {
1378
1379 if (proc_cmdline_value_missing(key, value))
1380 return 0;
1381
5707ecf3
ZJS
1382 r = log_level_from_string(value);
1383 if (r < 0)
1384 log_warning("Failed to parse max level syslog value \"%s\". Ignoring.", value);
1385 else
1386 s->max_level_syslog = r;
1d84ad94
LP
1387
1388 } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_kmsg")) {
1389
1390 if (proc_cmdline_value_missing(key, value))
1391 return 0;
1392
5707ecf3
ZJS
1393 r = log_level_from_string(value);
1394 if (r < 0)
1395 log_warning("Failed to parse max level kmsg value \"%s\". Ignoring.", value);
1396 else
1397 s->max_level_kmsg = r;
1d84ad94
LP
1398
1399 } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_wall")) {
1400
1401 if (proc_cmdline_value_missing(key, value))
1402 return 0;
1403
5707ecf3
ZJS
1404 r = log_level_from_string(value);
1405 if (r < 0)
1406 log_warning("Failed to parse max level wall value \"%s\". Ignoring.", value);
1407 else
1408 s->max_level_wall = r;
1d84ad94 1409
5707ecf3
ZJS
1410 } else if (startswith(key, "systemd.journald"))
1411 log_warning("Unknown journald kernel command line option \"%s\". Ignoring.", key);
d025f1e4 1412
804ae586 1413 /* do not warn about state here, since probably systemd already did */
db91ea32 1414 return 0;
d025f1e4
ZJS
1415}
1416
1417static int server_parse_config_file(Server *s) {
d025f1e4
ZJS
1418 assert(s);
1419
43688c49 1420 return config_parse_many_nulstr(PKGSYSCONFDIR "/journald.conf",
da412854
YW
1421 CONF_PATHS_NULSTR("systemd/journald.conf.d"),
1422 "Journal\0",
1423 config_item_perf_lookup, journald_gperf_lookup,
bcde742e 1424 CONFIG_PARSE_WARN, s);
d025f1e4
ZJS
1425}
1426
f9a810be
LP
1427static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
1428 Server *s = userdata;
26687bf8
OS
1429
1430 assert(s);
1431
f9a810be 1432 server_sync(s);
26687bf8
OS
1433 return 0;
1434}
1435
d07f7b9e 1436int server_schedule_sync(Server *s, int priority) {
26687bf8
OS
1437 int r;
1438
26687bf8
OS
1439 assert(s);
1440
d07f7b9e
LP
1441 if (priority <= LOG_CRIT) {
1442 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1443 server_sync(s);
1444 return 0;
1445 }
1446
26687bf8
OS
1447 if (s->sync_scheduled)
1448 return 0;
1449
f9a810be
LP
1450 if (s->sync_interval_usec > 0) {
1451 usec_t when;
ca267016 1452
6a0f1f6d 1453 r = sd_event_now(s->event, CLOCK_MONOTONIC, &when);
f9a810be
LP
1454 if (r < 0)
1455 return r;
26687bf8 1456
f9a810be
LP
1457 when += s->sync_interval_usec;
1458
1459 if (!s->sync_event_source) {
6a0f1f6d
LP
1460 r = sd_event_add_time(
1461 s->event,
1462 &s->sync_event_source,
1463 CLOCK_MONOTONIC,
1464 when, 0,
1465 server_dispatch_sync, s);
f9a810be
LP
1466 if (r < 0)
1467 return r;
1468
1469 r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
1470 } else {
1471 r = sd_event_source_set_time(s->sync_event_source, when);
1472 if (r < 0)
1473 return r;
1474
1475 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
1476 }
26687bf8 1477 if (r < 0)
f9a810be 1478 return r;
26687bf8 1479
f9a810be
LP
1480 s->sync_scheduled = true;
1481 }
26687bf8
OS
1482
1483 return 0;
1484}
1485
0c24bb23
LP
1486static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1487 Server *s = userdata;
1488
1489 assert(s);
1490
1491 server_cache_hostname(s);
1492 return 0;
1493}
1494
1495static int server_open_hostname(Server *s) {
1496 int r;
1497
1498 assert(s);
1499
db4a47e9
LP
1500 s->hostname_fd = open("/proc/sys/kernel/hostname",
1501 O_RDONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
4a62c710
MS
1502 if (s->hostname_fd < 0)
1503 return log_error_errno(errno, "Failed to open /proc/sys/kernel/hostname: %m");
0c24bb23 1504
151b9b96 1505 r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s);
0c24bb23 1506 if (r < 0) {
28def94c
DR
1507 /* kernels prior to 3.2 don't support polling this file. Ignore
1508 * the failure. */
1509 if (r == -EPERM) {
e53fc357 1510 log_warning_errno(r, "Failed to register hostname fd in event loop, ignoring: %m");
03e334a1 1511 s->hostname_fd = safe_close(s->hostname_fd);
28def94c
DR
1512 return 0;
1513 }
1514
23bbb0de 1515 return log_error_errno(r, "Failed to register hostname fd in event loop: %m");
0c24bb23
LP
1516 }
1517
1518 r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
23bbb0de
MS
1519 if (r < 0)
1520 return log_error_errno(r, "Failed to adjust priority of host name event source: %m");
0c24bb23
LP
1521
1522 return 0;
1523}
1524
e22aa3d3
LP
1525static int dispatch_notify_event(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1526 Server *s = userdata;
1527 int r;
1528
1529 assert(s);
1530 assert(s->notify_event_source == es);
1531 assert(s->notify_fd == fd);
1532
e22aa3d3 1533 /* The $NOTIFY_SOCKET is writable again, now send exactly one
dd835265 1534 * message on it. Either it's the watchdog event, the initial
119e9655
LP
1535 * READY=1 event or an stdout stream event. If there's nothing
1536 * to write anymore, turn our event source off. The next time
1537 * there's something to send it will be turned on again. */
e22aa3d3
LP
1538
1539 if (!s->sent_notify_ready) {
1540 static const char p[] =
1541 "READY=1\n"
1542 "STATUS=Processing requests...";
1543 ssize_t l;
1544
1545 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1546 if (l < 0) {
1547 if (errno == EAGAIN)
1548 return 0;
1549
1550 return log_error_errno(errno, "Failed to send READY=1 notification message: %m");
1551 }
1552
1553 s->sent_notify_ready = true;
1554 log_debug("Sent READY=1 notification.");
1555
119e9655
LP
1556 } else if (s->send_watchdog) {
1557
1558 static const char p[] =
1559 "WATCHDOG=1";
1560
1561 ssize_t l;
1562
1563 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1564 if (l < 0) {
1565 if (errno == EAGAIN)
1566 return 0;
1567
1568 return log_error_errno(errno, "Failed to send WATCHDOG=1 notification message: %m");
1569 }
1570
1571 s->send_watchdog = false;
1572 log_debug("Sent WATCHDOG=1 notification.");
1573
e22aa3d3
LP
1574 } else if (s->stdout_streams_notify_queue)
1575 /* Dispatch one stream notification event */
1576 stdout_stream_send_notify(s->stdout_streams_notify_queue);
1577
61233823 1578 /* Leave us enabled if there's still more to do. */
119e9655 1579 if (s->send_watchdog || s->stdout_streams_notify_queue)
e22aa3d3
LP
1580 return 0;
1581
1582 /* There was nothing to do anymore, let's turn ourselves off. */
1583 r = sd_event_source_set_enabled(es, SD_EVENT_OFF);
1584 if (r < 0)
1585 return log_error_errno(r, "Failed to turn off notify event source: %m");
1586
1587 return 0;
1588}
1589
119e9655
LP
1590static int dispatch_watchdog(sd_event_source *es, uint64_t usec, void *userdata) {
1591 Server *s = userdata;
1592 int r;
1593
1594 assert(s);
1595
1596 s->send_watchdog = true;
1597
1598 r = sd_event_source_set_enabled(s->notify_event_source, SD_EVENT_ON);
1599 if (r < 0)
1600 log_warning_errno(r, "Failed to turn on notify event source: %m");
1601
1602 r = sd_event_source_set_time(s->watchdog_event_source, usec + s->watchdog_usec / 2);
1603 if (r < 0)
1604 return log_error_errno(r, "Failed to restart watchdog event source: %m");
1605
1606 r = sd_event_source_set_enabled(s->watchdog_event_source, SD_EVENT_ON);
1607 if (r < 0)
1608 return log_error_errno(r, "Failed to enable watchdog event source: %m");
1609
1610 return 0;
1611}
1612
e22aa3d3
LP
1613static int server_connect_notify(Server *s) {
1614 union sockaddr_union sa = {
1615 .un.sun_family = AF_UNIX,
1616 };
1617 const char *e;
1618 int r;
1619
1620 assert(s);
1621 assert(s->notify_fd < 0);
1622 assert(!s->notify_event_source);
1623
1624 /*
1625 So here's the problem: we'd like to send notification
1626 messages to PID 1, but we cannot do that via sd_notify(),
1627 since that's synchronous, and we might end up blocking on
1628 it. Specifically: given that PID 1 might block on
1629 dbus-daemon during IPC, and dbus-daemon is logging to us,
1630 and might hence block on us, we might end up in a deadlock
ccddd104 1631 if we block on sending PID 1 notification messages — by
e22aa3d3
LP
1632 generating a full blocking circle. To avoid this, let's
1633 create a non-blocking socket, and connect it to the
1634 notification socket, and then wait for POLLOUT before we
1635 send anything. This should efficiently avoid any deadlocks,
1636 as we'll never block on PID 1, hence PID 1 can safely block
1637 on dbus-daemon which can safely block on us again.
1638
1639 Don't think that this issue is real? It is, see:
1640 https://github.com/systemd/systemd/issues/1505
1641 */
1642
1643 e = getenv("NOTIFY_SOCKET");
1644 if (!e)
1645 return 0;
1646
4c701096 1647 if (!IN_SET(e[0], '@', '/') || e[1] == 0) {
e22aa3d3
LP
1648 log_error("NOTIFY_SOCKET set to an invalid value: %s", e);
1649 return -EINVAL;
1650 }
1651
1652 if (strlen(e) > sizeof(sa.un.sun_path)) {
1653 log_error("NOTIFY_SOCKET path too long: %s", e);
1654 return -EINVAL;
1655 }
1656
1657 s->notify_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
1658 if (s->notify_fd < 0)
1659 return log_error_errno(errno, "Failed to create notify socket: %m");
1660
1661 (void) fd_inc_sndbuf(s->notify_fd, NOTIFY_SNDBUF_SIZE);
1662
1663 strncpy(sa.un.sun_path, e, sizeof(sa.un.sun_path));
1664 if (sa.un.sun_path[0] == '@')
1665 sa.un.sun_path[0] = 0;
1666
fc2fffe7 1667 r = connect(s->notify_fd, &sa.sa, SOCKADDR_UN_LEN(sa.un));
e22aa3d3
LP
1668 if (r < 0)
1669 return log_error_errno(errno, "Failed to connect to notify socket: %m");
1670
1671 r = sd_event_add_io(s->event, &s->notify_event_source, s->notify_fd, EPOLLOUT, dispatch_notify_event, s);
1672 if (r < 0)
1673 return log_error_errno(r, "Failed to watch notification socket: %m");
1674
119e9655
LP
1675 if (sd_watchdog_enabled(false, &s->watchdog_usec) > 0) {
1676 s->send_watchdog = true;
1677
4de2402b 1678 r = sd_event_add_time(s->event, &s->watchdog_event_source, CLOCK_MONOTONIC, now(CLOCK_MONOTONIC) + s->watchdog_usec/2, s->watchdog_usec/4, dispatch_watchdog, s);
119e9655
LP
1679 if (r < 0)
1680 return log_error_errno(r, "Failed to add watchdog time event: %m");
1681 }
1682
e22aa3d3
LP
1683 /* This should fire pretty soon, which we'll use to send the
1684 * READY=1 event. */
1685
1686 return 0;
1687}
1688
d025f1e4 1689int server_init(Server *s) {
13790add 1690 _cleanup_fdset_free_ FDSet *fds = NULL;
d025f1e4 1691 int n, r, fd;
7d18d348 1692 bool no_sockets;
d025f1e4
ZJS
1693
1694 assert(s);
1695
1696 zero(*s);
e22aa3d3 1697 s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->audit_fd = s->hostname_fd = s->notify_fd = -1;
d025f1e4
ZJS
1698 s->compress = true;
1699 s->seal = true;
b2392ff3 1700 s->read_kmsg = true;
d025f1e4 1701
119e9655
LP
1702 s->watchdog_usec = USEC_INFINITY;
1703
26687bf8
OS
1704 s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1705 s->sync_scheduled = false;
1706
d025f1e4
ZJS
1707 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1708 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1709
40b71e89 1710 s->forward_to_wall = true;
d025f1e4 1711
e150e820
MB
1712 s->max_file_usec = DEFAULT_MAX_FILE_USEC;
1713
d025f1e4
ZJS
1714 s->max_level_store = LOG_DEBUG;
1715 s->max_level_syslog = LOG_DEBUG;
1716 s->max_level_kmsg = LOG_NOTICE;
1717 s->max_level_console = LOG_INFO;
40b71e89 1718 s->max_level_wall = LOG_EMERG;
d025f1e4 1719
ec20fe5f
LP
1720 s->line_max = DEFAULT_LINE_MAX;
1721
266a4700
FB
1722 journal_reset_metrics(&s->system_storage.metrics);
1723 journal_reset_metrics(&s->runtime_storage.metrics);
d025f1e4
ZJS
1724
1725 server_parse_config_file(s);
1d84ad94
LP
1726
1727 r = proc_cmdline_parse(parse_proc_cmdline_item, s, PROC_CMDLINE_STRIP_RD_PREFIX);
1728 if (r < 0)
1729 log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
8580d1f7 1730
d288f79f 1731 if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
b1389b0d
ZJS
1732 log_debug("Setting both rate limit interval and burst from "USEC_FMT",%u to 0,0",
1733 s->rate_limit_interval, s->rate_limit_burst);
d288f79f
ZJS
1734 s->rate_limit_interval = s->rate_limit_burst = 0;
1735 }
d025f1e4 1736
8580d1f7 1737 (void) mkdir_p("/run/systemd/journal", 0755);
d025f1e4 1738
43cf8388 1739 s->user_journals = ordered_hashmap_new(NULL);
d025f1e4
ZJS
1740 if (!s->user_journals)
1741 return log_oom();
1742
1743 s->mmap = mmap_cache_new();
1744 if (!s->mmap)
1745 return log_oom();
1746
b58c888f
VC
1747 s->deferred_closes = set_new(NULL);
1748 if (!s->deferred_closes)
1749 return log_oom();
1750
f9a810be 1751 r = sd_event_default(&s->event);
23bbb0de
MS
1752 if (r < 0)
1753 return log_error_errno(r, "Failed to create event loop: %m");
d025f1e4
ZJS
1754
1755 n = sd_listen_fds(true);
23bbb0de
MS
1756 if (n < 0)
1757 return log_error_errno(n, "Failed to read listening file descriptors from environment: %m");
d025f1e4
ZJS
1758
1759 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1760
1761 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1762
1763 if (s->native_fd >= 0) {
1764 log_error("Too many native sockets passed.");
1765 return -EINVAL;
1766 }
1767
1768 s->native_fd = fd;
1769
1770 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1771
1772 if (s->stdout_fd >= 0) {
1773 log_error("Too many stdout sockets passed.");
1774 return -EINVAL;
1775 }
1776
1777 s->stdout_fd = fd;
1778
03ee5c38
LP
1779 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0 ||
1780 sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/dev-log", 0) > 0) {
d025f1e4
ZJS
1781
1782 if (s->syslog_fd >= 0) {
1783 log_error("Too many /dev/log sockets passed.");
1784 return -EINVAL;
1785 }
1786
1787 s->syslog_fd = fd;
1788
875c2e22
LP
1789 } else if (sd_is_socket(fd, AF_NETLINK, SOCK_RAW, -1) > 0) {
1790
1791 if (s->audit_fd >= 0) {
1792 log_error("Too many audit sockets passed.");
1793 return -EINVAL;
1794 }
1795
1796 s->audit_fd = fd;
1797
4ec3cd73 1798 } else {
4ec3cd73 1799
13790add
LP
1800 if (!fds) {
1801 fds = fdset_new();
1802 if (!fds)
1803 return log_oom();
1804 }
4ec3cd73 1805
13790add
LP
1806 r = fdset_put(fds, fd);
1807 if (r < 0)
1808 return log_oom();
4ec3cd73 1809 }
d025f1e4
ZJS
1810 }
1811
15d91bff
ZJS
1812 /* Try to restore streams, but don't bother if this fails */
1813 (void) server_restore_streams(s, fds);
d025f1e4 1814
13790add
LP
1815 if (fdset_size(fds) > 0) {
1816 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds));
1817 fds = fdset_free(fds);
1818 }
1819
7d18d348
ZJS
1820 no_sockets = s->native_fd < 0 && s->stdout_fd < 0 && s->syslog_fd < 0 && s->audit_fd < 0;
1821
1822 /* always open stdout, syslog, native, and kmsg sockets */
37b7affe
ZJS
1823
1824 /* systemd-journald.socket: /run/systemd/journal/stdout */
15d91bff
ZJS
1825 r = server_open_stdout_socket(s);
1826 if (r < 0)
1827 return r;
1828
37b7affe 1829 /* systemd-journald-dev-log.socket: /run/systemd/journal/dev-log */
13790add 1830 r = server_open_syslog_socket(s);
d025f1e4
ZJS
1831 if (r < 0)
1832 return r;
1833
37b7affe 1834 /* systemd-journald.socket: /run/systemd/journal/socket */
13790add 1835 r = server_open_native_socket(s);
d025f1e4
ZJS
1836 if (r < 0)
1837 return r;
1838
b2392ff3 1839 /* /dev/kmsg */
d025f1e4
ZJS
1840 r = server_open_dev_kmsg(s);
1841 if (r < 0)
1842 return r;
1843
7d18d348
ZJS
1844 /* Unless we got *some* sockets and not audit, open audit socket */
1845 if (s->audit_fd >= 0 || no_sockets) {
1846 r = server_open_audit(s);
1847 if (r < 0)
1848 return r;
1849 }
875c2e22 1850
d025f1e4
ZJS
1851 r = server_open_kernel_seqnum(s);
1852 if (r < 0)
1853 return r;
1854
0c24bb23
LP
1855 r = server_open_hostname(s);
1856 if (r < 0)
1857 return r;
1858
f9a810be 1859 r = setup_signals(s);
d025f1e4
ZJS
1860 if (r < 0)
1861 return r;
1862
1863 s->udev = udev_new();
1864 if (!s->udev)
1865 return -ENOMEM;
1866
f9a810be 1867 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
d025f1e4
ZJS
1868 if (!s->rate_limit)
1869 return -ENOMEM;
1870
e9174f29
LP
1871 r = cg_get_root_path(&s->cgroup_root);
1872 if (r < 0)
1873 return r;
1874
0c24bb23
LP
1875 server_cache_hostname(s);
1876 server_cache_boot_id(s);
1877 server_cache_machine_id(s);
1878
266a4700
FB
1879 s->runtime_storage.name = "Runtime journal";
1880 s->system_storage.name = "System journal";
1881
605405c6
ZJS
1882 s->runtime_storage.path = strjoin("/run/log/journal/", SERVER_MACHINE_ID(s));
1883 s->system_storage.path = strjoin("/var/log/journal/", SERVER_MACHINE_ID(s));
266a4700
FB
1884 if (!s->runtime_storage.path || !s->system_storage.path)
1885 return -ENOMEM;
1886
e22aa3d3
LP
1887 (void) server_connect_notify(s);
1888
22e3a02b
LP
1889 (void) client_context_acquire_default(s);
1890
804ae586 1891 return system_journal_open(s, false);
d025f1e4
ZJS
1892}
1893
1894void server_maybe_append_tags(Server *s) {
349cc4a5 1895#if HAVE_GCRYPT
d025f1e4
ZJS
1896 JournalFile *f;
1897 Iterator i;
1898 usec_t n;
1899
1900 n = now(CLOCK_REALTIME);
1901
1902 if (s->system_journal)
1903 journal_file_maybe_append_tag(s->system_journal, n);
1904
43cf8388 1905 ORDERED_HASHMAP_FOREACH(f, s->user_journals, i)
d025f1e4
ZJS
1906 journal_file_maybe_append_tag(f, n);
1907#endif
1908}
1909
1910void server_done(Server *s) {
d025f1e4
ZJS
1911 assert(s);
1912
f9168190 1913 set_free_with_destructor(s->deferred_closes, journal_file_close);
b58c888f 1914
d025f1e4
ZJS
1915 while (s->stdout_streams)
1916 stdout_stream_free(s->stdout_streams);
1917
22e3a02b
LP
1918 client_context_flush_all(s);
1919
d025f1e4 1920 if (s->system_journal)
69a3a6fd 1921 (void) journal_file_close(s->system_journal);
d025f1e4
ZJS
1922
1923 if (s->runtime_journal)
69a3a6fd 1924 (void) journal_file_close(s->runtime_journal);
d025f1e4 1925
f9168190 1926 ordered_hashmap_free_with_destructor(s->user_journals, journal_file_close);
d025f1e4 1927
f9a810be
LP
1928 sd_event_source_unref(s->syslog_event_source);
1929 sd_event_source_unref(s->native_event_source);
1930 sd_event_source_unref(s->stdout_event_source);
1931 sd_event_source_unref(s->dev_kmsg_event_source);
875c2e22 1932 sd_event_source_unref(s->audit_event_source);
f9a810be
LP
1933 sd_event_source_unref(s->sync_event_source);
1934 sd_event_source_unref(s->sigusr1_event_source);
1935 sd_event_source_unref(s->sigusr2_event_source);
1936 sd_event_source_unref(s->sigterm_event_source);
1937 sd_event_source_unref(s->sigint_event_source);
94b65516 1938 sd_event_source_unref(s->sigrtmin1_event_source);
0c24bb23 1939 sd_event_source_unref(s->hostname_event_source);
e22aa3d3 1940 sd_event_source_unref(s->notify_event_source);
119e9655 1941 sd_event_source_unref(s->watchdog_event_source);
f9a810be 1942 sd_event_unref(s->event);
d025f1e4 1943
03e334a1
LP
1944 safe_close(s->syslog_fd);
1945 safe_close(s->native_fd);
1946 safe_close(s->stdout_fd);
1947 safe_close(s->dev_kmsg_fd);
875c2e22 1948 safe_close(s->audit_fd);
03e334a1 1949 safe_close(s->hostname_fd);
e22aa3d3 1950 safe_close(s->notify_fd);
0c24bb23 1951
d025f1e4
ZJS
1952 if (s->rate_limit)
1953 journal_rate_limit_free(s->rate_limit);
1954
1955 if (s->kernel_seqnum)
1956 munmap(s->kernel_seqnum, sizeof(uint64_t));
1957
1958 free(s->buffer);
1959 free(s->tty_path);
e9174f29 1960 free(s->cgroup_root);
99d0966e 1961 free(s->hostname_field);
c6e9e16f
ZJS
1962 free(s->runtime_storage.path);
1963 free(s->system_storage.path);
d025f1e4
ZJS
1964
1965 if (s->mmap)
1966 mmap_cache_unref(s->mmap);
1967
3e044c49 1968 udev_unref(s->udev);
d025f1e4 1969}
8580d1f7
LP
1970
1971static const char* const storage_table[_STORAGE_MAX] = {
1972 [STORAGE_AUTO] = "auto",
1973 [STORAGE_VOLATILE] = "volatile",
1974 [STORAGE_PERSISTENT] = "persistent",
1975 [STORAGE_NONE] = "none"
1976};
1977
1978DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
1979DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
1980
1981static const char* const split_mode_table[_SPLIT_MAX] = {
1982 [SPLIT_LOGIN] = "login",
1983 [SPLIT_UID] = "uid",
1984 [SPLIT_NONE] = "none",
1985};
1986
1987DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
1988DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
ec20fe5f
LP
1989
1990int config_parse_line_max(
1991 const char* unit,
1992 const char *filename,
1993 unsigned line,
1994 const char *section,
1995 unsigned section_line,
1996 const char *lvalue,
1997 int ltype,
1998 const char *rvalue,
1999 void *data,
2000 void *userdata) {
2001
2002 size_t *sz = data;
2003 int r;
2004
2005 assert(filename);
2006 assert(lvalue);
2007 assert(rvalue);
2008 assert(data);
2009
2010 if (isempty(rvalue))
2011 /* Empty assignment means default */
2012 *sz = DEFAULT_LINE_MAX;
2013 else {
2014 uint64_t v;
2015
2016 r = parse_size(rvalue, 1024, &v);
2017 if (r < 0) {
2018 log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse LineMax= value, ignoring: %s", rvalue);
2019 return 0;
2020 }
2021
2022 if (v < 79) {
2023 /* Why specify 79 here as minimum line length? Simply, because the most common traditional
2024 * terminal size is 80ch, and it might make sense to break one character before the natural
2025 * line break would occur on that. */
2026 log_syntax(unit, LOG_WARNING, filename, line, 0, "LineMax= too small, clamping to 79: %s", rvalue);
2027 *sz = 79;
2028 } else if (v > (uint64_t) (SSIZE_MAX-1)) {
2029 /* So, why specify SSIZE_MAX-1 here? Because that's one below the largest size value read()
2030 * can return, and we need one extra byte for the trailing NUL byte. Of course IRL such large
2031 * memory allocations will fail anyway, hence this limit is mostly theoretical anyway, as we'll
2032 * fail much earlier anyway. */
2033 log_syntax(unit, LOG_WARNING, filename, line, 0, "LineMax= too large, clamping to %" PRIu64 ": %s", (uint64_t) (SSIZE_MAX-1), rvalue);
2034 *sz = SSIZE_MAX-1;
2035 } else
2036 *sz = (size_t) v;
2037 }
2038
2039 return 0;
2040}