]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/journal/journald-server.c
util-lib: various improvements to kernel command line parsing
[thirdparty/systemd.git] / src / journal / journald-server.c
CommitLineData
d025f1e4
ZJS
1/***
2 This file is part of systemd.
3
4 Copyright 2011 Lennart Poettering
5
6 systemd is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
10
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public License
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
18***/
19
24882e06
LP
20#ifdef HAVE_SELINUX
21#include <selinux/selinux.h>
22#endif
8580d1f7
LP
23#include <sys/ioctl.h>
24#include <sys/mman.h>
25#include <sys/signalfd.h>
26#include <sys/statvfs.h>
07630cea 27#include <linux/sockios.h>
24882e06 28
b4bbcaa9 29#include "libudev.h"
8580d1f7 30#include "sd-daemon.h"
74df0fca
LP
31#include "sd-journal.h"
32#include "sd-messages.h"
8580d1f7
LP
33
34#include "acl-util.h"
b5efdb8a 35#include "alloc-util.h"
430f0182 36#include "audit-util.h"
d025f1e4 37#include "cgroup-util.h"
d025f1e4 38#include "conf-parser.h"
a0956174 39#include "dirent-util.h"
0dec689b 40#include "extract-word.h"
3ffd4af2 41#include "fd-util.h"
33d52ab9 42#include "fileio.h"
f97b34a6 43#include "format-util.h"
f4f15635 44#include "fs-util.h"
8580d1f7 45#include "hashmap.h"
958b66ea 46#include "hostname-util.h"
4b58153d 47#include "id128-util.h"
afc5dbf3 48#include "io-util.h"
8580d1f7
LP
49#include "journal-authenticate.h"
50#include "journal-file.h"
d025f1e4
ZJS
51#include "journal-internal.h"
52#include "journal-vacuum.h"
8580d1f7 53#include "journald-audit.h"
d025f1e4 54#include "journald-kmsg.h"
d025f1e4 55#include "journald-native.h"
8580d1f7 56#include "journald-rate-limit.h"
3ffd4af2 57#include "journald-server.h"
8580d1f7
LP
58#include "journald-stream.h"
59#include "journald-syslog.h"
4b58153d 60#include "log.h"
07630cea
LP
61#include "missing.h"
62#include "mkdir.h"
6bedfcbb 63#include "parse-util.h"
4e731273 64#include "proc-cmdline.h"
07630cea
LP
65#include "process-util.h"
66#include "rm-rf.h"
67#include "selinux-util.h"
68#include "signal-util.h"
69#include "socket-util.h"
32917e33 70#include "stdio-util.h"
8b43440b 71#include "string-table.h"
07630cea 72#include "string-util.h"
4a0b58c4 73#include "user-util.h"
863a5610 74#include "syslog-util.h"
d025f1e4 75
d025f1e4
ZJS
76#define USER_JOURNALS_MAX 1024
77
26687bf8 78#define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
7f1ad696
LP
79#define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
80#define DEFAULT_RATE_LIMIT_BURST 1000
e150e820 81#define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
d025f1e4 82
8580d1f7 83#define RECHECK_SPACE_USEC (30*USEC_PER_SEC)
d025f1e4 84
e22aa3d3
LP
85#define NOTIFY_SNDBUF_SIZE (8*1024*1024)
86
7a24f3bf
VC
87/* The period to insert between posting changes for coalescing */
88#define POST_CHANGE_TIMER_INTERVAL_USEC (250*USEC_PER_MSEC)
89
e0ed6db9
FB
90static int determine_path_usage(Server *s, const char *path, uint64_t *ret_used, uint64_t *ret_free) {
91 _cleanup_closedir_ DIR *d = NULL;
92 struct dirent *de;
93 struct statvfs ss;
e0ed6db9
FB
94
95 assert(ret_used);
96 assert(ret_free);
97
266a4700 98 d = opendir(path);
e0ed6db9
FB
99 if (!d)
100 return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR,
266a4700 101 errno, "Failed to open %s: %m", path);
e0ed6db9
FB
102
103 if (fstatvfs(dirfd(d), &ss) < 0)
266a4700 104 return log_error_errno(errno, "Failed to fstatvfs(%s): %m", path);
e0ed6db9
FB
105
106 *ret_free = ss.f_bsize * ss.f_bavail;
107 *ret_used = 0;
108 FOREACH_DIRENT_ALL(de, d, break) {
109 struct stat st;
110
111 if (!endswith(de->d_name, ".journal") &&
112 !endswith(de->d_name, ".journal~"))
113 continue;
114
115 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) {
266a4700 116 log_debug_errno(errno, "Failed to stat %s/%s, ignoring: %m", path, de->d_name);
e0ed6db9
FB
117 continue;
118 }
119
120 if (!S_ISREG(st.st_mode))
121 continue;
122
123 *ret_used += (uint64_t) st.st_blocks * 512UL;
124 }
125
126 return 0;
127}
128
a0edc477
FB
129static void cache_space_invalidate(JournalStorageSpace *space) {
130 memset(space, 0, sizeof(*space));
131}
132
57f443a6 133static int cache_space_refresh(Server *s, JournalStorage *storage) {
23aba343 134 JournalStorageSpace *space;
266a4700 135 JournalMetrics *metrics;
23aba343 136 uint64_t vfs_used, vfs_avail, avail;
d025f1e4 137 usec_t ts;
e0ed6db9 138 int r;
d025f1e4 139
8580d1f7 140 assert(s);
266a4700 141
266a4700 142 metrics = &storage->metrics;
23aba343 143 space = &storage->space;
d025f1e4 144
8580d1f7 145 ts = now(CLOCK_MONOTONIC);
d025f1e4 146
3099caf2 147 if (space->timestamp != 0 && space->timestamp + RECHECK_SPACE_USEC > ts)
d025f1e4
ZJS
148 return 0;
149
23aba343 150 r = determine_path_usage(s, storage->path, &vfs_used, &vfs_avail);
e0ed6db9
FB
151 if (r < 0)
152 return r;
d025f1e4 153
23aba343
FB
154 space->vfs_used = vfs_used;
155 space->vfs_available = vfs_avail;
156
157 avail = LESS_BY(vfs_avail, metrics->keep_free);
158
23aba343
FB
159 space->limit = MIN(MAX(vfs_used + avail, metrics->min_use), metrics->max_use);
160 space->available = LESS_BY(space->limit, vfs_used);
161 space->timestamp = ts;
8580d1f7
LP
162 return 1;
163}
164
3a19f215
FB
165static void patch_min_use(JournalStorage *storage) {
166 assert(storage);
167
168 /* Let's bump the min_use limit to the current usage on disk. We do
169 * this when starting up and first opening the journal files. This way
170 * sudden spikes in disk usage will not cause journald to vacuum files
171 * without bounds. Note that this means that only a restart of journald
172 * will make it reset this value. */
173
174 storage->metrics.min_use = MAX(storage->metrics.min_use, storage->space.vfs_used);
175}
176
177
178static int determine_space(Server *s, uint64_t *available, uint64_t *limit) {
266a4700 179 JournalStorage *js;
57f443a6 180 int r;
8580d1f7
LP
181
182 assert(s);
183
266a4700 184 js = s->system_journal ? &s->system_storage : &s->runtime_storage;
57f443a6
FB
185
186 r = cache_space_refresh(s, js);
187 if (r >= 0) {
188 if (available)
189 *available = js->space.available;
190 if (limit)
191 *limit = js->space.limit;
192 }
193 return r;
d025f1e4
ZJS
194}
195
cba5629e
FB
196void server_space_usage_message(Server *s, JournalStorage *storage) {
197 char fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
198 fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX], fb6[FORMAT_BYTES_MAX];
199 JournalMetrics *metrics;
cba5629e
FB
200
201 assert(s);
202
203 if (!storage)
204 storage = s->system_journal ? &s->system_storage : &s->runtime_storage;
205
57f443a6 206 if (cache_space_refresh(s, storage) < 0)
cba5629e
FB
207 return;
208
209 metrics = &storage->metrics;
23aba343 210 format_bytes(fb1, sizeof(fb1), storage->space.vfs_used);
cba5629e
FB
211 format_bytes(fb2, sizeof(fb2), metrics->max_use);
212 format_bytes(fb3, sizeof(fb3), metrics->keep_free);
23aba343 213 format_bytes(fb4, sizeof(fb4), storage->space.vfs_available);
cba5629e
FB
214 format_bytes(fb5, sizeof(fb5), storage->space.limit);
215 format_bytes(fb6, sizeof(fb6), storage->space.available);
216
217 server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
218 LOG_MESSAGE("%s (%s) is %s, max %s, %s free.",
219 storage->name, storage->path, fb1, fb5, fb6),
220 "JOURNAL_NAME=%s", storage->name,
221 "JOURNAL_PATH=%s", storage->path,
23aba343 222 "CURRENT_USE=%"PRIu64, storage->space.vfs_used,
cba5629e
FB
223 "CURRENT_USE_PRETTY=%s", fb1,
224 "MAX_USE=%"PRIu64, metrics->max_use,
225 "MAX_USE_PRETTY=%s", fb2,
226 "DISK_KEEP_FREE=%"PRIu64, metrics->keep_free,
227 "DISK_KEEP_FREE_PRETTY=%s", fb3,
23aba343 228 "DISK_AVAILABLE=%"PRIu64, storage->space.vfs_available,
cba5629e
FB
229 "DISK_AVAILABLE_PRETTY=%s", fb4,
230 "LIMIT=%"PRIu64, storage->space.limit,
231 "LIMIT_PRETTY=%s", fb5,
232 "AVAILABLE=%"PRIu64, storage->space.available,
233 "AVAILABLE_PRETTY=%s", fb6,
234 NULL);
235}
236
5c3bde3f 237static void server_add_acls(JournalFile *f, uid_t uid) {
d025f1e4 238#ifdef HAVE_ACL
5c3bde3f 239 int r;
d025f1e4 240#endif
d025f1e4
ZJS
241 assert(f);
242
d025f1e4 243#ifdef HAVE_ACL
34c10968 244 if (uid <= SYSTEM_UID_MAX)
d025f1e4
ZJS
245 return;
246
5c3bde3f
ZJS
247 r = add_acls_for_user(f->fd, uid);
248 if (r < 0)
249 log_warning_errno(r, "Failed to set ACL on %s, ignoring: %m", f->path);
d025f1e4
ZJS
250#endif
251}
252
7a24f3bf
VC
253static int open_journal(
254 Server *s,
255 bool reliably,
256 const char *fname,
257 int flags,
258 bool seal,
259 JournalMetrics *metrics,
7a24f3bf
VC
260 JournalFile **ret) {
261 int r;
e167d7fd 262 JournalFile *f;
7a24f3bf
VC
263
264 assert(s);
265 assert(fname);
266 assert(ret);
267
268 if (reliably)
b58c888f 269 r = journal_file_open_reliably(fname, flags, 0640, s->compress, seal, metrics, s->mmap, s->deferred_closes, NULL, &f);
7a24f3bf 270 else
5d1ce257 271 r = journal_file_open(-1, fname, flags, 0640, s->compress, seal, metrics, s->mmap, s->deferred_closes, NULL, &f);
7a24f3bf
VC
272 if (r < 0)
273 return r;
274
e167d7fd 275 r = journal_file_enable_post_change_timer(f, s->event, POST_CHANGE_TIMER_INTERVAL_USEC);
7a24f3bf 276 if (r < 0) {
69a3a6fd 277 (void) journal_file_close(f);
7a24f3bf
VC
278 return r;
279 }
280
e167d7fd 281 *ret = f;
7a24f3bf
VC
282 return r;
283}
284
6431c7e2
VC
285static bool flushed_flag_is_set(void) {
286 return (access("/run/systemd/journal/flushed", F_OK) >= 0);
287}
288
105bdb46 289static int system_journal_open(Server *s, bool flush_requested) {
929eeb54 290 bool flushed = false;
105bdb46
VC
291 const char *fn;
292 int r = 0;
293
294 if (!s->system_journal &&
295 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
6431c7e2 296 (flush_requested || (flushed = flushed_flag_is_set()))) {
105bdb46
VC
297
298 /* If in auto mode: first try to create the machine
299 * path, but not the prefix.
300 *
301 * If in persistent mode: create /var/log/journal and
302 * the machine path */
303
304 if (s->storage == STORAGE_PERSISTENT)
305 (void) mkdir_p("/var/log/journal/", 0755);
306
266a4700 307 (void) mkdir(s->system_storage.path, 0755);
105bdb46 308
266a4700
FB
309 fn = strjoina(s->system_storage.path, "/system.journal");
310 r = open_journal(s, true, fn, O_RDWR|O_CREAT, s->seal, &s->system_storage.metrics, &s->system_journal);
105bdb46
VC
311 if (r >= 0) {
312 server_add_acls(s->system_journal, 0);
57f443a6 313 (void) cache_space_refresh(s, &s->system_storage);
3a19f215 314 patch_min_use(&s->system_storage);
105bdb46
VC
315 } else if (r < 0) {
316 if (r != -ENOENT && r != -EROFS)
317 log_warning_errno(r, "Failed to open system journal: %m");
318
319 r = 0;
320 }
929eeb54
VC
321
322 /* If the runtime journal is open, and we're post-flush, we're
323 * recovering from a failed system journal rotate (ENOSPC)
324 * for which the runtime journal was reopened.
325 *
326 * Perform an implicit flush to var, leaving the runtime
327 * journal closed, now that the system journal is back.
328 */
329 if (s->runtime_journal && flushed)
330 (void) server_flush_to_var(s);
105bdb46
VC
331 }
332
333 if (!s->runtime_journal &&
334 (s->storage != STORAGE_NONE)) {
335
266a4700 336 fn = strjoina(s->runtime_storage.path, "/system.journal");
105bdb46
VC
337
338 if (s->system_journal) {
339
340 /* Try to open the runtime journal, but only
341 * if it already exists, so that we can flush
342 * it into the system journal */
343
266a4700 344 r = open_journal(s, false, fn, O_RDWR, false, &s->runtime_storage.metrics, &s->runtime_journal);
105bdb46
VC
345 if (r < 0) {
346 if (r != -ENOENT)
347 log_warning_errno(r, "Failed to open runtime journal: %m");
348
349 r = 0;
350 }
351
352 } else {
353
354 /* OK, we really need the runtime journal, so create
355 * it if necessary. */
356
357 (void) mkdir("/run/log", 0755);
358 (void) mkdir("/run/log/journal", 0755);
359 (void) mkdir_parents(fn, 0750);
360
266a4700 361 r = open_journal(s, true, fn, O_RDWR|O_CREAT, false, &s->runtime_storage.metrics, &s->runtime_journal);
105bdb46
VC
362 if (r < 0)
363 return log_error_errno(r, "Failed to open runtime journal: %m");
364 }
365
366 if (s->runtime_journal) {
367 server_add_acls(s->runtime_journal, 0);
57f443a6 368 (void) cache_space_refresh(s, &s->runtime_storage);
3a19f215 369 patch_min_use(&s->runtime_storage);
105bdb46
VC
370 }
371 }
372
373 return r;
374}
375
d025f1e4 376static JournalFile* find_journal(Server *s, uid_t uid) {
ed375beb 377 _cleanup_free_ char *p = NULL;
d025f1e4
ZJS
378 int r;
379 JournalFile *f;
380 sd_id128_t machine;
381
382 assert(s);
383
105bdb46
VC
384 /* A rotate that fails to create the new journal (ENOSPC) leaves the
385 * rotated journal as NULL. Unless we revisit opening, even after
386 * space is made available we'll continue to return NULL indefinitely.
387 *
388 * system_journal_open() is a noop if the journals are already open, so
389 * we can just call it here to recover from failed rotates (or anything
390 * else that's left the journals as NULL).
391 *
392 * Fixes https://github.com/systemd/systemd/issues/3968 */
393 (void) system_journal_open(s, false);
394
d025f1e4
ZJS
395 /* We split up user logs only on /var, not on /run. If the
396 * runtime file is open, we write to it exclusively, in order
397 * to guarantee proper order as soon as we flush /run to
398 * /var and close the runtime file. */
399
400 if (s->runtime_journal)
401 return s->runtime_journal;
402
61755fda 403 if (uid <= SYSTEM_UID_MAX || uid_is_dynamic(uid))
d025f1e4
ZJS
404 return s->system_journal;
405
406 r = sd_id128_get_machine(&machine);
407 if (r < 0)
408 return s->system_journal;
409
4a0b58c4 410 f = ordered_hashmap_get(s->user_journals, UID_TO_PTR(uid));
d025f1e4
ZJS
411 if (f)
412 return f;
413
de0671ee
ZJS
414 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-"UID_FMT".journal",
415 SD_ID128_FORMAT_VAL(machine), uid) < 0)
d025f1e4
ZJS
416 return s->system_journal;
417
43cf8388 418 while (ordered_hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
d025f1e4 419 /* Too many open? Then let's close one */
43cf8388 420 f = ordered_hashmap_steal_first(s->user_journals);
d025f1e4 421 assert(f);
69a3a6fd 422 (void) journal_file_close(f);
d025f1e4
ZJS
423 }
424
266a4700 425 r = open_journal(s, true, p, O_RDWR|O_CREAT, s->seal, &s->system_storage.metrics, &f);
d025f1e4
ZJS
426 if (r < 0)
427 return s->system_journal;
428
5c3bde3f 429 server_add_acls(f, uid);
d025f1e4 430
4a0b58c4 431 r = ordered_hashmap_put(s->user_journals, UID_TO_PTR(uid), f);
d025f1e4 432 if (r < 0) {
69a3a6fd 433 (void) journal_file_close(f);
d025f1e4
ZJS
434 return s->system_journal;
435 }
436
437 return f;
438}
439
ea69bd41
LP
440static int do_rotate(
441 Server *s,
442 JournalFile **f,
443 const char* name,
444 bool seal,
445 uint32_t uid) {
446
fc55baee
ZJS
447 int r;
448 assert(s);
449
450 if (!*f)
451 return -EINVAL;
452
b58c888f 453 r = journal_file_rotate(f, s->compress, seal, s->deferred_closes);
fc55baee
ZJS
454 if (r < 0)
455 if (*f)
ea69bd41 456 log_error_errno(r, "Failed to rotate %s: %m", (*f)->path);
fc55baee 457 else
ea69bd41 458 log_error_errno(r, "Failed to create new %s journal: %m", name);
fc55baee 459 else
5c3bde3f 460 server_add_acls(*f, uid);
2678031a 461
fc55baee
ZJS
462 return r;
463}
464
d025f1e4
ZJS
465void server_rotate(Server *s) {
466 JournalFile *f;
467 void *k;
468 Iterator i;
469 int r;
470
471 log_debug("Rotating...");
472
8580d1f7
LP
473 (void) do_rotate(s, &s->runtime_journal, "runtime", false, 0);
474 (void) do_rotate(s, &s->system_journal, "system", s->seal, 0);
d025f1e4 475
43cf8388 476 ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
4a0b58c4 477 r = do_rotate(s, &f, "user", s->seal, PTR_TO_UID(k));
fc55baee 478 if (r >= 0)
43cf8388 479 ordered_hashmap_replace(s->user_journals, k, f);
fc55baee
ZJS
480 else if (!f)
481 /* Old file has been closed and deallocated */
43cf8388 482 ordered_hashmap_remove(s->user_journals, k);
d025f1e4 483 }
b58c888f
VC
484
485 /* Perform any deferred closes which aren't still offlining. */
486 SET_FOREACH(f, s->deferred_closes, i)
487 if (!journal_file_is_offlining(f)) {
488 (void) set_remove(s->deferred_closes, f);
489 (void) journal_file_close(f);
490 }
d025f1e4
ZJS
491}
492
26687bf8
OS
493void server_sync(Server *s) {
494 JournalFile *f;
26687bf8
OS
495 Iterator i;
496 int r;
497
26687bf8 498 if (s->system_journal) {
ac2e41f5 499 r = journal_file_set_offline(s->system_journal, false);
26687bf8 500 if (r < 0)
65089b82 501 log_warning_errno(r, "Failed to sync system journal, ignoring: %m");
26687bf8
OS
502 }
503
65c1d46b 504 ORDERED_HASHMAP_FOREACH(f, s->user_journals, i) {
ac2e41f5 505 r = journal_file_set_offline(f, false);
26687bf8 506 if (r < 0)
65089b82 507 log_warning_errno(r, "Failed to sync user journal, ignoring: %m");
26687bf8
OS
508 }
509
f9a810be
LP
510 if (s->sync_event_source) {
511 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
512 if (r < 0)
da927ba9 513 log_error_errno(r, "Failed to disable sync timer source: %m");
f9a810be 514 }
26687bf8
OS
515
516 s->sync_scheduled = false;
517}
518
3a19f215 519static void do_vacuum(Server *s, JournalStorage *storage, bool verbose) {
ea69bd41 520
63c8666b
ZJS
521 int r;
522
8580d1f7 523 assert(s);
266a4700 524 assert(storage);
8580d1f7 525
57f443a6 526 (void) cache_space_refresh(s, storage);
18e758bf
FB
527
528 if (verbose)
529 server_space_usage_message(s, storage);
8580d1f7 530
57f443a6
FB
531 r = journal_directory_vacuum(storage->path, storage->space.limit,
532 storage->metrics.n_max_files, s->max_retention_usec,
533 &s->oldest_file_usec, verbose);
63c8666b 534 if (r < 0 && r != -ENOENT)
266a4700
FB
535 log_warning_errno(r, "Failed to vacuum %s, ignoring: %m", storage->path);
536
a0edc477 537 cache_space_invalidate(&storage->space);
63c8666b
ZJS
538}
539
3a19f215 540int server_vacuum(Server *s, bool verbose) {
8580d1f7 541 assert(s);
d025f1e4
ZJS
542
543 log_debug("Vacuuming...");
544
545 s->oldest_file_usec = 0;
546
266a4700 547 if (s->system_journal)
3a19f215 548 do_vacuum(s, &s->system_storage, verbose);
266a4700 549 if (s->runtime_journal)
3a19f215 550 do_vacuum(s, &s->runtime_storage, verbose);
d025f1e4 551
8580d1f7 552 return 0;
d025f1e4
ZJS
553}
554
0c24bb23
LP
555static void server_cache_machine_id(Server *s) {
556 sd_id128_t id;
557 int r;
558
559 assert(s);
560
561 r = sd_id128_get_machine(&id);
562 if (r < 0)
563 return;
564
565 sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
566}
567
568static void server_cache_boot_id(Server *s) {
569 sd_id128_t id;
570 int r;
571
572 assert(s);
573
574 r = sd_id128_get_boot(&id);
575 if (r < 0)
576 return;
577
578 sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
579}
580
581static void server_cache_hostname(Server *s) {
582 _cleanup_free_ char *t = NULL;
583 char *x;
584
585 assert(s);
586
587 t = gethostname_malloc();
588 if (!t)
589 return;
590
591 x = strappend("_HOSTNAME=", t);
592 if (!x)
593 return;
594
595 free(s->hostname_field);
596 s->hostname_field = x;
597}
598
8531ae70 599static bool shall_try_append_again(JournalFile *f, int r) {
6e1045e5 600 switch(r) {
ae739cc1 601
6e1045e5
ZJS
602 case -E2BIG: /* Hit configured limit */
603 case -EFBIG: /* Hit fs limit */
604 case -EDQUOT: /* Quota limit hit */
605 case -ENOSPC: /* Disk full */
d025f1e4 606 log_debug("%s: Allocation limit reached, rotating.", f->path);
6e1045e5 607 return true;
ae739cc1 608
6e1045e5
ZJS
609 case -EIO: /* I/O error of some kind (mmap) */
610 log_warning("%s: IO error, rotating.", f->path);
611 return true;
ae739cc1 612
6e1045e5 613 case -EHOSTDOWN: /* Other machine */
d025f1e4 614 log_info("%s: Journal file from other machine, rotating.", f->path);
6e1045e5 615 return true;
ae739cc1 616
6e1045e5 617 case -EBUSY: /* Unclean shutdown */
d025f1e4 618 log_info("%s: Unclean shutdown, rotating.", f->path);
6e1045e5 619 return true;
ae739cc1 620
6e1045e5 621 case -EPROTONOSUPPORT: /* Unsupported feature */
d025f1e4 622 log_info("%s: Unsupported feature, rotating.", f->path);
6e1045e5 623 return true;
ae739cc1 624
6e1045e5
ZJS
625 case -EBADMSG: /* Corrupted */
626 case -ENODATA: /* Truncated */
627 case -ESHUTDOWN: /* Already archived */
d025f1e4 628 log_warning("%s: Journal file corrupted, rotating.", f->path);
6e1045e5 629 return true;
ae739cc1 630
6e1045e5 631 case -EIDRM: /* Journal file has been deleted */
2678031a 632 log_warning("%s: Journal file has been deleted, rotating.", f->path);
6e1045e5 633 return true;
ae739cc1
LP
634
635 case -ETXTBSY: /* Journal file is from the future */
c1a9199e 636 log_warning("%s: Journal file is from the future, rotating.", f->path);
ae739cc1
LP
637 return true;
638
6e1045e5 639 default:
d025f1e4 640 return false;
6e1045e5 641 }
d025f1e4
ZJS
642}
643
d07f7b9e 644static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
7c070017 645 bool vacuumed = false, rotate = false;
0f972d66 646 struct dual_timestamp ts;
d025f1e4 647 JournalFile *f;
d025f1e4
ZJS
648 int r;
649
650 assert(s);
651 assert(iovec);
652 assert(n > 0);
653
0f972d66
LP
654 /* Get the closest, linearized time we have for this log event from the event loop. (Note that we do not use
655 * the source time, and not even the time the event was originally seen, but instead simply the time we started
656 * processing it, as we want strictly linear ordering in what we write out.) */
657 assert_se(sd_event_now(s->event, CLOCK_REALTIME, &ts.realtime) >= 0);
658 assert_se(sd_event_now(s->event, CLOCK_MONOTONIC, &ts.monotonic) >= 0);
659
7c070017
LP
660 if (ts.realtime < s->last_realtime_clock) {
661 /* When the time jumps backwards, let's immediately rotate. Of course, this should not happen during
662 * regular operation. However, when it does happen, then we should make sure that we start fresh files
663 * to ensure that the entries in the journal files are strictly ordered by time, in order to ensure
664 * bisection works correctly. */
d025f1e4 665
7c070017
LP
666 log_debug("Time jumped backwards, rotating.");
667 rotate = true;
668 } else {
669
670 f = find_journal(s, uid);
671 if (!f)
672 return;
673
674 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
675 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
676 rotate = true;
677 }
678 }
d025f1e4 679
7c070017 680 if (rotate) {
d025f1e4 681 server_rotate(s);
3a19f215 682 server_vacuum(s, false);
d025f1e4
ZJS
683 vacuumed = true;
684
685 f = find_journal(s, uid);
686 if (!f)
687 return;
688 }
689
7c070017
LP
690 s->last_realtime_clock = ts.realtime;
691
0f972d66 692 r = journal_file_append_entry(f, &ts, iovec, n, &s->seqnum, NULL, NULL);
26687bf8 693 if (r >= 0) {
d07f7b9e 694 server_schedule_sync(s, priority);
d025f1e4 695 return;
26687bf8 696 }
d025f1e4
ZJS
697
698 if (vacuumed || !shall_try_append_again(f, r)) {
8266e1c0 699 log_error_errno(r, "Failed to write entry (%d items, %zu bytes), ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
d025f1e4
ZJS
700 return;
701 }
702
703 server_rotate(s);
3a19f215 704 server_vacuum(s, false);
d025f1e4
ZJS
705
706 f = find_journal(s, uid);
707 if (!f)
708 return;
709
710 log_debug("Retrying write.");
0f972d66 711 r = journal_file_append_entry(f, &ts, iovec, n, &s->seqnum, NULL, NULL);
8266e1c0
LP
712 if (r < 0)
713 log_error_errno(r, "Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
714 else
d07f7b9e 715 server_schedule_sync(s, priority);
d025f1e4
ZJS
716}
717
4b58153d
LP
718static int get_invocation_id(const char *cgroup_root, const char *slice, const char *unit, char **ret) {
719 _cleanup_free_ char *escaped = NULL, *slice_path = NULL, *p = NULL;
720 char *copy, ids[SD_ID128_STRING_MAX];
721 int r;
722
723 /* Read the invocation ID of a unit off a unit. It's stored in the "trusted.invocation_id" extended attribute
724 * on the cgroup path. */
725
726 r = cg_slice_to_path(slice, &slice_path);
727 if (r < 0)
728 return r;
729
730 escaped = cg_escape(unit);
731 if (!escaped)
732 return -ENOMEM;
733
605405c6 734 p = strjoin(cgroup_root, "/", slice_path, "/", escaped);
4b58153d
LP
735 if (!p)
736 return -ENOMEM;
737
738 r = cg_get_xattr(SYSTEMD_CGROUP_CONTROLLER, p, "trusted.invocation_id", ids, 32);
739 if (r < 0)
740 return r;
741 if (r != 32)
742 return -EINVAL;
743 ids[32] = 0;
744
745 if (!id128_is_valid(ids))
746 return -EINVAL;
747
748 copy = strdup(ids);
749 if (!copy)
750 return -ENOMEM;
751
752 *ret = copy;
753 return 0;
754}
755
d025f1e4
ZJS
756static void dispatch_message_real(
757 Server *s,
758 struct iovec *iovec, unsigned n, unsigned m,
3b3154df
LP
759 const struct ucred *ucred,
760 const struct timeval *tv,
d025f1e4 761 const char *label, size_t label_len,
968f3196 762 const char *unit_id,
d07f7b9e 763 int priority,
968f3196 764 pid_t object_pid) {
d025f1e4 765
968f3196 766 char pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
ae018d9b
LP
767 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
768 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
769 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
d3789917 770 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
968f3196
ZJS
771 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
772 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
773 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
774 uid_t object_uid;
775 gid_t object_gid;
968f3196 776 char *x;
d025f1e4 777 int r;
ae018d9b 778 char *t, *c;
82499507
LP
779 uid_t realuid = 0, owner = 0, journal_uid;
780 bool owner_valid = false;
ae018d9b 781#ifdef HAVE_AUDIT
968f3196
ZJS
782 char audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
783 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
784 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
785 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
ae018d9b
LP
786
787 uint32_t audit;
788 uid_t loginuid;
789#endif
d025f1e4
ZJS
790
791 assert(s);
792 assert(iovec);
793 assert(n > 0);
d473176a 794 assert(n + N_IOVEC_META_FIELDS + (object_pid > 0 ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
d025f1e4
ZJS
795
796 if (ucred) {
d025f1e4
ZJS
797 realuid = ucred->uid;
798
de0671ee 799 sprintf(pid, "_PID="PID_FMT, ucred->pid);
c2457105 800 IOVEC_SET_STRING(iovec[n++], pid);
d025f1e4 801
de0671ee 802 sprintf(uid, "_UID="UID_FMT, ucred->uid);
c2457105 803 IOVEC_SET_STRING(iovec[n++], uid);
d025f1e4 804
de0671ee 805 sprintf(gid, "_GID="GID_FMT, ucred->gid);
c2457105 806 IOVEC_SET_STRING(iovec[n++], gid);
d025f1e4
ZJS
807
808 r = get_process_comm(ucred->pid, &t);
809 if (r >= 0) {
63c372cb 810 x = strjoina("_COMM=", t);
d025f1e4 811 free(t);
968f3196 812 IOVEC_SET_STRING(iovec[n++], x);
d025f1e4
ZJS
813 }
814
815 r = get_process_exe(ucred->pid, &t);
816 if (r >= 0) {
63c372cb 817 x = strjoina("_EXE=", t);
d025f1e4 818 free(t);
968f3196 819 IOVEC_SET_STRING(iovec[n++], x);
d025f1e4
ZJS
820 }
821
9bdbc2e2 822 r = get_process_cmdline(ucred->pid, 0, false, &t);
d025f1e4 823 if (r >= 0) {
63c372cb 824 x = strjoina("_CMDLINE=", t);
d025f1e4 825 free(t);
3a832116
SL
826 IOVEC_SET_STRING(iovec[n++], x);
827 }
828
829 r = get_process_capeff(ucred->pid, &t);
830 if (r >= 0) {
63c372cb 831 x = strjoina("_CAP_EFFECTIVE=", t);
3a832116 832 free(t);
968f3196 833 IOVEC_SET_STRING(iovec[n++], x);
d025f1e4
ZJS
834 }
835
0a20e3c1 836#ifdef HAVE_AUDIT
d025f1e4 837 r = audit_session_from_pid(ucred->pid, &audit);
ae018d9b 838 if (r >= 0) {
de0671ee 839 sprintf(audit_session, "_AUDIT_SESSION=%"PRIu32, audit);
ae018d9b
LP
840 IOVEC_SET_STRING(iovec[n++], audit_session);
841 }
d025f1e4
ZJS
842
843 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
7027ff61 844 if (r >= 0) {
de0671ee 845 sprintf(audit_loginuid, "_AUDIT_LOGINUID="UID_FMT, loginuid);
ae018d9b 846 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
d025f1e4 847 }
ae018d9b 848#endif
d025f1e4 849
e9174f29 850 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &c);
7027ff61 851 if (r >= 0) {
4b58153d 852 _cleanup_free_ char *raw_unit = NULL, *raw_slice = NULL;
968f3196
ZJS
853 char *session = NULL;
854
63c372cb 855 x = strjoina("_SYSTEMD_CGROUP=", c);
968f3196 856 IOVEC_SET_STRING(iovec[n++], x);
d025f1e4 857
ae018d9b
LP
858 r = cg_path_get_session(c, &t);
859 if (r >= 0) {
63c372cb 860 session = strjoina("_SYSTEMD_SESSION=", t);
ae018d9b 861 free(t);
d025f1e4 862 IOVEC_SET_STRING(iovec[n++], session);
ae018d9b
LP
863 }
864
865 if (cg_path_get_owner_uid(c, &owner) >= 0) {
866 owner_valid = true;
d025f1e4 867
de0671ee 868 sprintf(owner_uid, "_SYSTEMD_OWNER_UID="UID_FMT, owner);
d025f1e4 869 IOVEC_SET_STRING(iovec[n++], owner_uid);
ae018d9b 870 }
d025f1e4 871
4b58153d
LP
872 if (cg_path_get_unit(c, &raw_unit) >= 0) {
873 x = strjoina("_SYSTEMD_UNIT=", raw_unit);
19cace37
LP
874 IOVEC_SET_STRING(iovec[n++], x);
875 } else if (unit_id && !session) {
63c372cb 876 x = strjoina("_SYSTEMD_UNIT=", unit_id);
19cace37
LP
877 IOVEC_SET_STRING(iovec[n++], x);
878 }
879
880 if (cg_path_get_user_unit(c, &t) >= 0) {
63c372cb 881 x = strjoina("_SYSTEMD_USER_UNIT=", t);
ae018d9b 882 free(t);
968f3196 883 IOVEC_SET_STRING(iovec[n++], x);
19cace37 884 } else if (unit_id && session) {
63c372cb 885 x = strjoina("_SYSTEMD_USER_UNIT=", unit_id);
19cace37
LP
886 IOVEC_SET_STRING(iovec[n++], x);
887 }
ae018d9b 888
4b58153d
LP
889 if (cg_path_get_slice(c, &raw_slice) >= 0) {
890 x = strjoina("_SYSTEMD_SLICE=", raw_slice);
0a244b8e
LP
891 IOVEC_SET_STRING(iovec[n++], x);
892 }
893
d473176a
LP
894 if (cg_path_get_user_slice(c, &t) >= 0) {
895 x = strjoina("_SYSTEMD_USER_SLICE=", t);
896 free(t);
897 IOVEC_SET_STRING(iovec[n++], x);
898 }
899
4b58153d
LP
900 if (raw_slice && raw_unit) {
901 if (get_invocation_id(s->cgroup_root, raw_slice, raw_unit, &t) >= 0) {
902 x = strjoina("_SYSTEMD_INVOCATION_ID=", t);
903 free(t);
904 IOVEC_SET_STRING(iovec[n++], x);
905 }
906 }
907
ae018d9b 908 free(c);
2d43b190 909 } else if (unit_id) {
63c372cb 910 x = strjoina("_SYSTEMD_UNIT=", unit_id);
2d43b190 911 IOVEC_SET_STRING(iovec[n++], x);
ef1673d1 912 }
d025f1e4 913
d025f1e4 914#ifdef HAVE_SELINUX
6355e756 915 if (mac_selinux_have()) {
d682b3a7 916 if (label) {
f8294e41 917 x = alloca(strlen("_SELINUX_CONTEXT=") + label_len + 1);
ae018d9b 918
d682b3a7
LP
919 *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
920 IOVEC_SET_STRING(iovec[n++], x);
921 } else {
2ed96880 922 char *con;
d025f1e4 923
d682b3a7 924 if (getpidcon(ucred->pid, &con) >= 0) {
63c372cb 925 x = strjoina("_SELINUX_CONTEXT=", con);
e7ff4e7f 926
d682b3a7
LP
927 freecon(con);
928 IOVEC_SET_STRING(iovec[n++], x);
929 }
d025f1e4
ZJS
930 }
931 }
932#endif
933 }
968f3196
ZJS
934 assert(n <= m);
935
936 if (object_pid) {
937 r = get_process_uid(object_pid, &object_uid);
938 if (r >= 0) {
de0671ee 939 sprintf(o_uid, "OBJECT_UID="UID_FMT, object_uid);
968f3196
ZJS
940 IOVEC_SET_STRING(iovec[n++], o_uid);
941 }
942
943 r = get_process_gid(object_pid, &object_gid);
944 if (r >= 0) {
de0671ee 945 sprintf(o_gid, "OBJECT_GID="GID_FMT, object_gid);
968f3196
ZJS
946 IOVEC_SET_STRING(iovec[n++], o_gid);
947 }
948
949 r = get_process_comm(object_pid, &t);
950 if (r >= 0) {
63c372cb 951 x = strjoina("OBJECT_COMM=", t);
968f3196
ZJS
952 free(t);
953 IOVEC_SET_STRING(iovec[n++], x);
954 }
955
956 r = get_process_exe(object_pid, &t);
957 if (r >= 0) {
63c372cb 958 x = strjoina("OBJECT_EXE=", t);
968f3196
ZJS
959 free(t);
960 IOVEC_SET_STRING(iovec[n++], x);
961 }
962
963 r = get_process_cmdline(object_pid, 0, false, &t);
964 if (r >= 0) {
63c372cb 965 x = strjoina("OBJECT_CMDLINE=", t);
968f3196
ZJS
966 free(t);
967 IOVEC_SET_STRING(iovec[n++], x);
968 }
969
970#ifdef HAVE_AUDIT
971 r = audit_session_from_pid(object_pid, &audit);
972 if (r >= 0) {
de0671ee 973 sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%"PRIu32, audit);
968f3196
ZJS
974 IOVEC_SET_STRING(iovec[n++], o_audit_session);
975 }
976
977 r = audit_loginuid_from_pid(object_pid, &loginuid);
978 if (r >= 0) {
de0671ee 979 sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID="UID_FMT, loginuid);
968f3196
ZJS
980 IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
981 }
982#endif
983
e9174f29 984 r = cg_pid_get_path_shifted(object_pid, s->cgroup_root, &c);
968f3196 985 if (r >= 0) {
63c372cb 986 x = strjoina("OBJECT_SYSTEMD_CGROUP=", c);
968f3196
ZJS
987 IOVEC_SET_STRING(iovec[n++], x);
988
989 r = cg_path_get_session(c, &t);
990 if (r >= 0) {
63c372cb 991 x = strjoina("OBJECT_SYSTEMD_SESSION=", t);
968f3196
ZJS
992 free(t);
993 IOVEC_SET_STRING(iovec[n++], x);
994 }
995
996 if (cg_path_get_owner_uid(c, &owner) >= 0) {
de0671ee 997 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID="UID_FMT, owner);
968f3196
ZJS
998 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
999 }
1000
1001 if (cg_path_get_unit(c, &t) >= 0) {
63c372cb 1002 x = strjoina("OBJECT_SYSTEMD_UNIT=", t);
968f3196 1003 free(t);
19cace37
LP
1004 IOVEC_SET_STRING(iovec[n++], x);
1005 }
1006
1007 if (cg_path_get_user_unit(c, &t) >= 0) {
63c372cb 1008 x = strjoina("OBJECT_SYSTEMD_USER_UNIT=", t);
968f3196 1009 free(t);
968f3196 1010 IOVEC_SET_STRING(iovec[n++], x);
19cace37 1011 }
968f3196 1012
d473176a
LP
1013 if (cg_path_get_slice(c, &t) >= 0) {
1014 x = strjoina("OBJECT_SYSTEMD_SLICE=", t);
1015 free(t);
1016 IOVEC_SET_STRING(iovec[n++], x);
1017 }
1018
1019 if (cg_path_get_user_slice(c, &t) >= 0) {
1020 x = strjoina("OBJECT_SYSTEMD_USER_SLICE=", t);
1021 free(t);
1022 IOVEC_SET_STRING(iovec[n++], x);
1023 }
1024
968f3196
ZJS
1025 free(c);
1026 }
1027 }
1028 assert(n <= m);
d025f1e4
ZJS
1029
1030 if (tv) {
398a50cd 1031 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=" USEC_FMT, timeval_load(tv));
a5693989 1032 IOVEC_SET_STRING(iovec[n++], source_time);
d025f1e4
ZJS
1033 }
1034
1035 /* Note that strictly speaking storing the boot id here is
1036 * redundant since the entry includes this in-line
1037 * anyway. However, we need this indexed, too. */
0c24bb23
LP
1038 if (!isempty(s->boot_id_field))
1039 IOVEC_SET_STRING(iovec[n++], s->boot_id_field);
d025f1e4 1040
0c24bb23
LP
1041 if (!isempty(s->machine_id_field))
1042 IOVEC_SET_STRING(iovec[n++], s->machine_id_field);
d025f1e4 1043
0c24bb23
LP
1044 if (!isempty(s->hostname_field))
1045 IOVEC_SET_STRING(iovec[n++], s->hostname_field);
d025f1e4
ZJS
1046
1047 assert(n <= m);
1048
da499392 1049 if (s->split_mode == SPLIT_UID && realuid > 0)
40adcda8 1050 /* Split up strictly by any UID */
759c945a 1051 journal_uid = realuid;
82499507 1052 else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
edc3797f
LP
1053 /* Split up by login UIDs. We do this only if the
1054 * realuid is not root, in order not to accidentally
1055 * leak privileged information to the user that is
1056 * logged by a privileged process that is part of an
7517e174 1057 * unprivileged session. */
8a0889df 1058 journal_uid = owner;
da499392
KS
1059 else
1060 journal_uid = 0;
759c945a 1061
d07f7b9e 1062 write_to_journal(s, journal_uid, iovec, n, priority);
d025f1e4
ZJS
1063}
1064
1065void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
1066 char mid[11 + 32 + 1];
8a03c9ef
ZJS
1067 struct iovec iovec[N_IOVEC_META_FIELDS + 5 + N_IOVEC_PAYLOAD_FIELDS];
1068 unsigned n = 0, m;
32917e33 1069 int r;
d025f1e4 1070 va_list ap;
b92bea5d 1071 struct ucred ucred = {};
d025f1e4
ZJS
1072
1073 assert(s);
1074 assert(format);
1075
4850d39a 1076 assert_cc(3 == LOG_FAC(LOG_DAEMON));
b6fa2555
EV
1077 IOVEC_SET_STRING(iovec[n++], "SYSLOG_FACILITY=3");
1078 IOVEC_SET_STRING(iovec[n++], "SYSLOG_IDENTIFIER=systemd-journald");
1079
d025f1e4 1080 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
4850d39a 1081 assert_cc(6 == LOG_INFO);
32917e33 1082 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
d025f1e4 1083
3bbaff3e 1084 if (!sd_id128_is_null(message_id)) {
e2cc6eca 1085 snprintf(mid, sizeof(mid), LOG_MESSAGE_ID(message_id));
d025f1e4
ZJS
1086 IOVEC_SET_STRING(iovec[n++], mid);
1087 }
1088
8a03c9ef
ZJS
1089 m = n;
1090
1091 va_start(ap, format);
32917e33
ZJS
1092 r = log_format_iovec(iovec, ELEMENTSOF(iovec), &n, false, 0, format, ap);
1093 /* Error handling below */
8a03c9ef
ZJS
1094 va_end(ap);
1095
d025f1e4
ZJS
1096 ucred.pid = getpid();
1097 ucred.uid = getuid();
1098 ucred.gid = getgid();
1099
32917e33
ZJS
1100 if (r >= 0)
1101 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
8a03c9ef
ZJS
1102
1103 while (m < n)
1104 free(iovec[m++].iov_base);
32917e33
ZJS
1105
1106 if (r < 0) {
1107 /* We failed to format the message. Emit a warning instead. */
1108 char buf[LINE_MAX];
1109
1110 xsprintf(buf, "MESSAGE=Entry printing failed: %s", strerror(-r));
1111
1112 n = 3;
1113 IOVEC_SET_STRING(iovec[n++], "PRIORITY=4");
1114 IOVEC_SET_STRING(iovec[n++], buf);
1115 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
1116 }
d025f1e4
ZJS
1117}
1118
1119void server_dispatch_message(
1120 Server *s,
1121 struct iovec *iovec, unsigned n, unsigned m,
3b3154df
LP
1122 const struct ucred *ucred,
1123 const struct timeval *tv,
d025f1e4
ZJS
1124 const char *label, size_t label_len,
1125 const char *unit_id,
968f3196
ZJS
1126 int priority,
1127 pid_t object_pid) {
d025f1e4 1128
7027ff61 1129 int rl, r;
7fd1b19b 1130 _cleanup_free_ char *path = NULL;
8580d1f7 1131 uint64_t available = 0;
db91ea32 1132 char *c;
d025f1e4
ZJS
1133
1134 assert(s);
1135 assert(iovec || n == 0);
1136
1137 if (n == 0)
1138 return;
1139
1140 if (LOG_PRI(priority) > s->max_level_store)
1141 return;
1142
2f5df74a
HHPF
1143 /* Stop early in case the information will not be stored
1144 * in a journal. */
1145 if (s->storage == STORAGE_NONE)
1146 return;
1147
d025f1e4
ZJS
1148 if (!ucred)
1149 goto finish;
1150
e9174f29 1151 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &path);
7027ff61 1152 if (r < 0)
d025f1e4
ZJS
1153 goto finish;
1154
1155 /* example: /user/lennart/3/foobar
1156 * /system/dbus.service/foobar
1157 *
1158 * So let's cut of everything past the third /, since that is
1159 * where user directories start */
1160
1161 c = strchr(path, '/');
1162 if (c) {
1163 c = strchr(c+1, '/');
1164 if (c) {
1165 c = strchr(c+1, '/');
1166 if (c)
1167 *c = 0;
1168 }
1169 }
1170
3a19f215 1171 (void) determine_space(s, &available, NULL);
8580d1f7 1172 rl = journal_rate_limit_test(s->rate_limit, path, priority & LOG_PRIMASK, available);
db91ea32 1173 if (rl == 0)
d025f1e4 1174 return;
d025f1e4
ZJS
1175
1176 /* Write a suppression message if we suppressed something */
1177 if (rl > 1)
db91ea32 1178 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
8a03c9ef
ZJS
1179 LOG_MESSAGE("Suppressed %u messages from %s", rl - 1, path),
1180 NULL);
d025f1e4
ZJS
1181
1182finish:
d07f7b9e 1183 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, priority, object_pid);
d025f1e4
ZJS
1184}
1185
d025f1e4 1186int server_flush_to_var(Server *s) {
d025f1e4
ZJS
1187 sd_id128_t machine;
1188 sd_journal *j = NULL;
fbb63411
LP
1189 char ts[FORMAT_TIMESPAN_MAX];
1190 usec_t start;
1191 unsigned n = 0;
1192 int r;
d025f1e4
ZJS
1193
1194 assert(s);
1195
1196 if (s->storage != STORAGE_AUTO &&
1197 s->storage != STORAGE_PERSISTENT)
1198 return 0;
1199
1200 if (!s->runtime_journal)
1201 return 0;
1202
8580d1f7 1203 (void) system_journal_open(s, true);
d025f1e4
ZJS
1204
1205 if (!s->system_journal)
1206 return 0;
1207
1208 log_debug("Flushing to /var...");
1209
fbb63411
LP
1210 start = now(CLOCK_MONOTONIC);
1211
d025f1e4 1212 r = sd_id128_get_machine(&machine);
00a16861 1213 if (r < 0)
d025f1e4 1214 return r;
d025f1e4
ZJS
1215
1216 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
23bbb0de
MS
1217 if (r < 0)
1218 return log_error_errno(r, "Failed to read runtime journal: %m");
d025f1e4 1219
93b73b06
LP
1220 sd_journal_set_data_threshold(j, 0);
1221
d025f1e4
ZJS
1222 SD_JOURNAL_FOREACH(j) {
1223 Object *o = NULL;
1224 JournalFile *f;
1225
1226 f = j->current_file;
1227 assert(f && f->current_offset > 0);
1228
fbb63411
LP
1229 n++;
1230
d025f1e4
ZJS
1231 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1232 if (r < 0) {
da927ba9 1233 log_error_errno(r, "Can't read entry: %m");
d025f1e4
ZJS
1234 goto finish;
1235 }
1236
1237 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1238 if (r >= 0)
1239 continue;
1240
1241 if (!shall_try_append_again(s->system_journal, r)) {
da927ba9 1242 log_error_errno(r, "Can't write entry: %m");
d025f1e4
ZJS
1243 goto finish;
1244 }
1245
1246 server_rotate(s);
3a19f215 1247 server_vacuum(s, false);
d025f1e4 1248
253f59df
LP
1249 if (!s->system_journal) {
1250 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1251 r = -EIO;
1252 goto finish;
1253 }
1254
d025f1e4
ZJS
1255 log_debug("Retrying write.");
1256 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1257 if (r < 0) {
da927ba9 1258 log_error_errno(r, "Can't write entry: %m");
d025f1e4
ZJS
1259 goto finish;
1260 }
1261 }
1262
804ae586
LP
1263 r = 0;
1264
d025f1e4
ZJS
1265finish:
1266 journal_file_post_change(s->system_journal);
1267
804ae586 1268 s->runtime_journal = journal_file_close(s->runtime_journal);
d025f1e4
ZJS
1269
1270 if (r >= 0)
c6878637 1271 (void) rm_rf("/run/log/journal", REMOVE_ROOT);
d025f1e4 1272
763c7aa2 1273 sd_journal_close(j);
d025f1e4 1274
8a03c9ef
ZJS
1275 server_driver_message(s, SD_ID128_NULL,
1276 LOG_MESSAGE("Time spent on flushing to /var is %s for %u entries.",
1277 format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0),
1278 n),
1279 NULL);
fbb63411 1280
d025f1e4
ZJS
1281 return r;
1282}
1283
8531ae70 1284int server_process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
f9a810be 1285 Server *s = userdata;
a315ac4e
LP
1286 struct ucred *ucred = NULL;
1287 struct timeval *tv = NULL;
1288 struct cmsghdr *cmsg;
1289 char *label = NULL;
1290 size_t label_len = 0, m;
1291 struct iovec iovec;
1292 ssize_t n;
1293 int *fds = NULL, v = 0;
1294 unsigned n_fds = 0;
1295
1296 union {
1297 struct cmsghdr cmsghdr;
1298
1299 /* We use NAME_MAX space for the SELinux label
1300 * here. The kernel currently enforces no
1301 * limit, but according to suggestions from
1302 * the SELinux people this will change and it
1303 * will probably be identical to NAME_MAX. For
1304 * now we use that, but this should be updated
1305 * one day when the final limit is known. */
1306 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1307 CMSG_SPACE(sizeof(struct timeval)) +
1308 CMSG_SPACE(sizeof(int)) + /* fd */
1309 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1310 } control = {};
1311
1312 union sockaddr_union sa = {};
1313
1314 struct msghdr msghdr = {
1315 .msg_iov = &iovec,
1316 .msg_iovlen = 1,
1317 .msg_control = &control,
1318 .msg_controllen = sizeof(control),
1319 .msg_name = &sa,
1320 .msg_namelen = sizeof(sa),
1321 };
f9a810be 1322
d025f1e4 1323 assert(s);
875c2e22 1324 assert(fd == s->native_fd || fd == s->syslog_fd || fd == s->audit_fd);
f9a810be
LP
1325
1326 if (revents != EPOLLIN) {
1327 log_error("Got invalid event from epoll for datagram fd: %"PRIx32, revents);
1328 return -EIO;
1329 }
1330
a315ac4e
LP
1331 /* Try to get the right size, if we can. (Not all
1332 * sockets support SIOCINQ, hence we just try, but
1333 * don't rely on it. */
1334 (void) ioctl(fd, SIOCINQ, &v);
d025f1e4 1335
a315ac4e
LP
1336 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1337 m = PAGE_ALIGN(MAX3((size_t) v + 1,
1338 (size_t) LINE_MAX,
1339 ALIGN(sizeof(struct nlmsghdr)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH)) + 1);
d025f1e4 1340
a315ac4e
LP
1341 if (!GREEDY_REALLOC(s->buffer, s->buffer_size, m))
1342 return log_oom();
875c2e22 1343
a315ac4e
LP
1344 iovec.iov_base = s->buffer;
1345 iovec.iov_len = s->buffer_size - 1; /* Leave room for trailing NUL we add later */
d025f1e4 1346
a315ac4e
LP
1347 n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1348 if (n < 0) {
1349 if (errno == EINTR || errno == EAGAIN)
1350 return 0;
875c2e22 1351
a315ac4e
LP
1352 return log_error_errno(errno, "recvmsg() failed: %m");
1353 }
875c2e22 1354
a315ac4e
LP
1355 CMSG_FOREACH(cmsg, &msghdr) {
1356
1357 if (cmsg->cmsg_level == SOL_SOCKET &&
1358 cmsg->cmsg_type == SCM_CREDENTIALS &&
1359 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1360 ucred = (struct ucred*) CMSG_DATA(cmsg);
1361 else if (cmsg->cmsg_level == SOL_SOCKET &&
1362 cmsg->cmsg_type == SCM_SECURITY) {
1363 label = (char*) CMSG_DATA(cmsg);
1364 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1365 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1366 cmsg->cmsg_type == SO_TIMESTAMP &&
1367 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1368 tv = (struct timeval*) CMSG_DATA(cmsg);
1369 else if (cmsg->cmsg_level == SOL_SOCKET &&
1370 cmsg->cmsg_type == SCM_RIGHTS) {
1371 fds = (int*) CMSG_DATA(cmsg);
1372 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
d025f1e4 1373 }
a315ac4e 1374 }
d025f1e4 1375
a315ac4e
LP
1376 /* And a trailing NUL, just in case */
1377 s->buffer[n] = 0;
1378
1379 if (fd == s->syslog_fd) {
1380 if (n > 0 && n_fds == 0)
1381 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1382 else if (n_fds > 0)
1383 log_warning("Got file descriptors via syslog socket. Ignoring.");
1384
1385 } else if (fd == s->native_fd) {
1386 if (n > 0 && n_fds == 0)
1387 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1388 else if (n == 0 && n_fds == 1)
1389 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1390 else if (n_fds > 0)
1391 log_warning("Got too many file descriptors via native socket. Ignoring.");
1392
1393 } else {
1394 assert(fd == s->audit_fd);
1395
1396 if (n > 0 && n_fds == 0)
1397 server_process_audit_message(s, s->buffer, n, ucred, &sa, msghdr.msg_namelen);
1398 else if (n_fds > 0)
1399 log_warning("Got file descriptors via audit socket. Ignoring.");
f9a810be 1400 }
a315ac4e
LP
1401
1402 close_many(fds, n_fds);
1403 return 0;
f9a810be 1404}
d025f1e4 1405
f9a810be
LP
1406static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1407 Server *s = userdata;
33d52ab9 1408 int r;
d025f1e4 1409
f9a810be 1410 assert(s);
d025f1e4 1411
94b65516 1412 log_info("Received request to flush runtime journal from PID " PID_FMT, si->ssi_pid);
d025f1e4 1413
929eeb54 1414 (void) server_flush_to_var(s);
f9a810be 1415 server_sync(s);
3a19f215 1416 server_vacuum(s, false);
d025f1e4 1417
33d52ab9
LP
1418 r = touch("/run/systemd/journal/flushed");
1419 if (r < 0)
1420 log_warning_errno(r, "Failed to touch /run/systemd/journal/flushed, ignoring: %m");
74055aa7 1421
18e758bf 1422 server_space_usage_message(s, NULL);
f9a810be
LP
1423 return 0;
1424}
d025f1e4 1425
f9a810be
LP
1426static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1427 Server *s = userdata;
33d52ab9 1428 int r;
d025f1e4 1429
f9a810be 1430 assert(s);
d025f1e4 1431
94b65516 1432 log_info("Received request to rotate journal from PID " PID_FMT, si->ssi_pid);
f9a810be 1433 server_rotate(s);
3a19f215
FB
1434 server_vacuum(s, true);
1435
1436 if (s->system_journal)
1437 patch_min_use(&s->system_storage);
1438 if (s->runtime_journal)
1439 patch_min_use(&s->runtime_storage);
d025f1e4 1440
dbd6e31c 1441 /* Let clients know when the most recent rotation happened. */
33d52ab9
LP
1442 r = write_timestamp_file_atomic("/run/systemd/journal/rotated", now(CLOCK_MONOTONIC));
1443 if (r < 0)
1444 log_warning_errno(r, "Failed to write /run/systemd/journal/rotated, ignoring: %m");
dbd6e31c 1445
f9a810be
LP
1446 return 0;
1447}
d025f1e4 1448
f9a810be
LP
1449static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1450 Server *s = userdata;
d025f1e4 1451
f9a810be 1452 assert(s);
d025f1e4 1453
4daf54a8 1454 log_received_signal(LOG_INFO, si);
d025f1e4 1455
6203e07a 1456 sd_event_exit(s->event, 0);
d025f1e4
ZJS
1457 return 0;
1458}
1459
94b65516
LP
1460static int dispatch_sigrtmin1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1461 Server *s = userdata;
33d52ab9 1462 int r;
94b65516
LP
1463
1464 assert(s);
1465
1466 log_debug("Received request to sync from PID " PID_FMT, si->ssi_pid);
1467
1468 server_sync(s);
1469
1470 /* Let clients know when the most recent sync happened. */
33d52ab9
LP
1471 r = write_timestamp_file_atomic("/run/systemd/journal/synced", now(CLOCK_MONOTONIC));
1472 if (r < 0)
1473 log_warning_errno(r, "Failed to write /run/systemd/journal/synced, ignoring: %m");
94b65516
LP
1474
1475 return 0;
1476}
1477
f9a810be 1478static int setup_signals(Server *s) {
f9a810be 1479 int r;
d025f1e4
ZJS
1480
1481 assert(s);
1482
9bab3b65 1483 assert_se(sigprocmask_many(SIG_SETMASK, NULL, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, SIGRTMIN+1, -1) >= 0);
d025f1e4 1484
151b9b96 1485 r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
f9a810be
LP
1486 if (r < 0)
1487 return r;
1488
151b9b96 1489 r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s);
f9a810be
LP
1490 if (r < 0)
1491 return r;
d025f1e4 1492
151b9b96 1493 r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s);
f9a810be
LP
1494 if (r < 0)
1495 return r;
d025f1e4 1496
b374689c
LP
1497 /* Let's process SIGTERM late, so that we flush all queued
1498 * messages to disk before we exit */
1499 r = sd_event_source_set_priority(s->sigterm_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1500 if (r < 0)
1501 return r;
1502
1503 /* When journald is invoked on the terminal (when debugging),
1504 * it's useful if C-c is handled equivalent to SIGTERM. */
151b9b96 1505 r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s);
f9a810be
LP
1506 if (r < 0)
1507 return r;
d025f1e4 1508
b374689c
LP
1509 r = sd_event_source_set_priority(s->sigint_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1510 if (r < 0)
1511 return r;
1512
94b65516
LP
1513 /* SIGRTMIN+1 causes an immediate sync. We process this very
1514 * late, so that everything else queued at this point is
1515 * really written to disk. Clients can watch
1516 * /run/systemd/journal/synced with inotify until its mtime
1517 * changes to see when a sync happened. */
1518 r = sd_event_add_signal(s->event, &s->sigrtmin1_event_source, SIGRTMIN+1, dispatch_sigrtmin1, s);
1519 if (r < 0)
1520 return r;
1521
1522 r = sd_event_source_set_priority(s->sigrtmin1_event_source, SD_EVENT_PRIORITY_NORMAL+15);
1523 if (r < 0)
1524 return r;
1525
d025f1e4
ZJS
1526 return 0;
1527}
1528
5707ecf3
ZJS
1529static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
1530 Server *s = data;
74df0fca 1531 int r;
d025f1e4 1532
5707ecf3 1533 assert(s);
d025f1e4 1534
1d84ad94
LP
1535 if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_syslog")) {
1536
5707ecf3 1537 r = value ? parse_boolean(value) : true;
d581d9d9 1538 if (r < 0)
5707ecf3
ZJS
1539 log_warning("Failed to parse forward to syslog switch \"%s\". Ignoring.", value);
1540 else
1541 s->forward_to_syslog = r;
1d84ad94
LP
1542
1543 } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_kmsg")) {
1544
5707ecf3
ZJS
1545 r = value ? parse_boolean(value) : true;
1546 if (r < 0)
1547 log_warning("Failed to parse forward to kmsg switch \"%s\". Ignoring.", value);
1548 else
1549 s->forward_to_kmsg = r;
1d84ad94
LP
1550
1551 } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_console")) {
1552
5707ecf3
ZJS
1553 r = value ? parse_boolean(value) : true;
1554 if (r < 0)
1555 log_warning("Failed to parse forward to console switch \"%s\". Ignoring.", value);
1556 else
1557 s->forward_to_console = r;
1d84ad94
LP
1558
1559 } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_wall")) {
1560
5707ecf3
ZJS
1561 r = value ? parse_boolean(value) : true;
1562 if (r < 0)
1563 log_warning("Failed to parse forward to wall switch \"%s\". Ignoring.", value);
1564 else
1565 s->forward_to_wall = r;
1d84ad94
LP
1566
1567 } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_console")) {
1568
1569 if (proc_cmdline_value_missing(key, value))
1570 return 0;
1571
5707ecf3
ZJS
1572 r = log_level_from_string(value);
1573 if (r < 0)
1574 log_warning("Failed to parse max level console value \"%s\". Ignoring.", value);
1575 else
1576 s->max_level_console = r;
1d84ad94
LP
1577
1578 } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_store")) {
1579
1580 if (proc_cmdline_value_missing(key, value))
1581 return 0;
1582
5707ecf3
ZJS
1583 r = log_level_from_string(value);
1584 if (r < 0)
1585 log_warning("Failed to parse max level store value \"%s\". Ignoring.", value);
1586 else
1587 s->max_level_store = r;
1d84ad94
LP
1588
1589 } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_syslog")) {
1590
1591 if (proc_cmdline_value_missing(key, value))
1592 return 0;
1593
5707ecf3
ZJS
1594 r = log_level_from_string(value);
1595 if (r < 0)
1596 log_warning("Failed to parse max level syslog value \"%s\". Ignoring.", value);
1597 else
1598 s->max_level_syslog = r;
1d84ad94
LP
1599
1600 } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_kmsg")) {
1601
1602 if (proc_cmdline_value_missing(key, value))
1603 return 0;
1604
5707ecf3
ZJS
1605 r = log_level_from_string(value);
1606 if (r < 0)
1607 log_warning("Failed to parse max level kmsg value \"%s\". Ignoring.", value);
1608 else
1609 s->max_level_kmsg = r;
1d84ad94
LP
1610
1611 } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_wall")) {
1612
1613 if (proc_cmdline_value_missing(key, value))
1614 return 0;
1615
5707ecf3
ZJS
1616 r = log_level_from_string(value);
1617 if (r < 0)
1618 log_warning("Failed to parse max level wall value \"%s\". Ignoring.", value);
1619 else
1620 s->max_level_wall = r;
1d84ad94 1621
5707ecf3
ZJS
1622 } else if (startswith(key, "systemd.journald"))
1623 log_warning("Unknown journald kernel command line option \"%s\". Ignoring.", key);
d025f1e4 1624
804ae586 1625 /* do not warn about state here, since probably systemd already did */
db91ea32 1626 return 0;
d025f1e4
ZJS
1627}
1628
1629static int server_parse_config_file(Server *s) {
d025f1e4
ZJS
1630 assert(s);
1631
43688c49 1632 return config_parse_many_nulstr(PKGSYSCONFDIR "/journald.conf",
75eb6154 1633 CONF_PATHS_NULSTR("systemd/journald.conf.d"),
a9edaeff
JT
1634 "Journal\0",
1635 config_item_perf_lookup, journald_gperf_lookup,
1636 false, s);
d025f1e4
ZJS
1637}
1638
f9a810be
LP
1639static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
1640 Server *s = userdata;
26687bf8
OS
1641
1642 assert(s);
1643
f9a810be 1644 server_sync(s);
26687bf8
OS
1645 return 0;
1646}
1647
d07f7b9e 1648int server_schedule_sync(Server *s, int priority) {
26687bf8
OS
1649 int r;
1650
26687bf8
OS
1651 assert(s);
1652
d07f7b9e
LP
1653 if (priority <= LOG_CRIT) {
1654 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1655 server_sync(s);
1656 return 0;
1657 }
1658
26687bf8
OS
1659 if (s->sync_scheduled)
1660 return 0;
1661
f9a810be
LP
1662 if (s->sync_interval_usec > 0) {
1663 usec_t when;
ca267016 1664
6a0f1f6d 1665 r = sd_event_now(s->event, CLOCK_MONOTONIC, &when);
f9a810be
LP
1666 if (r < 0)
1667 return r;
26687bf8 1668
f9a810be
LP
1669 when += s->sync_interval_usec;
1670
1671 if (!s->sync_event_source) {
6a0f1f6d
LP
1672 r = sd_event_add_time(
1673 s->event,
1674 &s->sync_event_source,
1675 CLOCK_MONOTONIC,
1676 when, 0,
1677 server_dispatch_sync, s);
f9a810be
LP
1678 if (r < 0)
1679 return r;
1680
1681 r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
1682 } else {
1683 r = sd_event_source_set_time(s->sync_event_source, when);
1684 if (r < 0)
1685 return r;
1686
1687 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
1688 }
26687bf8 1689 if (r < 0)
f9a810be 1690 return r;
26687bf8 1691
f9a810be
LP
1692 s->sync_scheduled = true;
1693 }
26687bf8
OS
1694
1695 return 0;
1696}
1697
0c24bb23
LP
1698static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1699 Server *s = userdata;
1700
1701 assert(s);
1702
1703 server_cache_hostname(s);
1704 return 0;
1705}
1706
1707static int server_open_hostname(Server *s) {
1708 int r;
1709
1710 assert(s);
1711
1712 s->hostname_fd = open("/proc/sys/kernel/hostname", O_RDONLY|O_CLOEXEC|O_NDELAY|O_NOCTTY);
4a62c710
MS
1713 if (s->hostname_fd < 0)
1714 return log_error_errno(errno, "Failed to open /proc/sys/kernel/hostname: %m");
0c24bb23 1715
151b9b96 1716 r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s);
0c24bb23 1717 if (r < 0) {
28def94c
DR
1718 /* kernels prior to 3.2 don't support polling this file. Ignore
1719 * the failure. */
1720 if (r == -EPERM) {
e53fc357 1721 log_warning_errno(r, "Failed to register hostname fd in event loop, ignoring: %m");
03e334a1 1722 s->hostname_fd = safe_close(s->hostname_fd);
28def94c
DR
1723 return 0;
1724 }
1725
23bbb0de 1726 return log_error_errno(r, "Failed to register hostname fd in event loop: %m");
0c24bb23
LP
1727 }
1728
1729 r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
23bbb0de
MS
1730 if (r < 0)
1731 return log_error_errno(r, "Failed to adjust priority of host name event source: %m");
0c24bb23
LP
1732
1733 return 0;
1734}
1735
e22aa3d3
LP
1736static int dispatch_notify_event(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1737 Server *s = userdata;
1738 int r;
1739
1740 assert(s);
1741 assert(s->notify_event_source == es);
1742 assert(s->notify_fd == fd);
1743
e22aa3d3 1744 /* The $NOTIFY_SOCKET is writable again, now send exactly one
dd835265 1745 * message on it. Either it's the watchdog event, the initial
119e9655
LP
1746 * READY=1 event or an stdout stream event. If there's nothing
1747 * to write anymore, turn our event source off. The next time
1748 * there's something to send it will be turned on again. */
e22aa3d3
LP
1749
1750 if (!s->sent_notify_ready) {
1751 static const char p[] =
1752 "READY=1\n"
1753 "STATUS=Processing requests...";
1754 ssize_t l;
1755
1756 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1757 if (l < 0) {
1758 if (errno == EAGAIN)
1759 return 0;
1760
1761 return log_error_errno(errno, "Failed to send READY=1 notification message: %m");
1762 }
1763
1764 s->sent_notify_ready = true;
1765 log_debug("Sent READY=1 notification.");
1766
119e9655
LP
1767 } else if (s->send_watchdog) {
1768
1769 static const char p[] =
1770 "WATCHDOG=1";
1771
1772 ssize_t l;
1773
1774 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1775 if (l < 0) {
1776 if (errno == EAGAIN)
1777 return 0;
1778
1779 return log_error_errno(errno, "Failed to send WATCHDOG=1 notification message: %m");
1780 }
1781
1782 s->send_watchdog = false;
1783 log_debug("Sent WATCHDOG=1 notification.");
1784
e22aa3d3
LP
1785 } else if (s->stdout_streams_notify_queue)
1786 /* Dispatch one stream notification event */
1787 stdout_stream_send_notify(s->stdout_streams_notify_queue);
1788
61233823 1789 /* Leave us enabled if there's still more to do. */
119e9655 1790 if (s->send_watchdog || s->stdout_streams_notify_queue)
e22aa3d3
LP
1791 return 0;
1792
1793 /* There was nothing to do anymore, let's turn ourselves off. */
1794 r = sd_event_source_set_enabled(es, SD_EVENT_OFF);
1795 if (r < 0)
1796 return log_error_errno(r, "Failed to turn off notify event source: %m");
1797
1798 return 0;
1799}
1800
119e9655
LP
1801static int dispatch_watchdog(sd_event_source *es, uint64_t usec, void *userdata) {
1802 Server *s = userdata;
1803 int r;
1804
1805 assert(s);
1806
1807 s->send_watchdog = true;
1808
1809 r = sd_event_source_set_enabled(s->notify_event_source, SD_EVENT_ON);
1810 if (r < 0)
1811 log_warning_errno(r, "Failed to turn on notify event source: %m");
1812
1813 r = sd_event_source_set_time(s->watchdog_event_source, usec + s->watchdog_usec / 2);
1814 if (r < 0)
1815 return log_error_errno(r, "Failed to restart watchdog event source: %m");
1816
1817 r = sd_event_source_set_enabled(s->watchdog_event_source, SD_EVENT_ON);
1818 if (r < 0)
1819 return log_error_errno(r, "Failed to enable watchdog event source: %m");
1820
1821 return 0;
1822}
1823
e22aa3d3
LP
1824static int server_connect_notify(Server *s) {
1825 union sockaddr_union sa = {
1826 .un.sun_family = AF_UNIX,
1827 };
1828 const char *e;
1829 int r;
1830
1831 assert(s);
1832 assert(s->notify_fd < 0);
1833 assert(!s->notify_event_source);
1834
1835 /*
1836 So here's the problem: we'd like to send notification
1837 messages to PID 1, but we cannot do that via sd_notify(),
1838 since that's synchronous, and we might end up blocking on
1839 it. Specifically: given that PID 1 might block on
1840 dbus-daemon during IPC, and dbus-daemon is logging to us,
1841 and might hence block on us, we might end up in a deadlock
ccddd104 1842 if we block on sending PID 1 notification messages — by
e22aa3d3
LP
1843 generating a full blocking circle. To avoid this, let's
1844 create a non-blocking socket, and connect it to the
1845 notification socket, and then wait for POLLOUT before we
1846 send anything. This should efficiently avoid any deadlocks,
1847 as we'll never block on PID 1, hence PID 1 can safely block
1848 on dbus-daemon which can safely block on us again.
1849
1850 Don't think that this issue is real? It is, see:
1851 https://github.com/systemd/systemd/issues/1505
1852 */
1853
1854 e = getenv("NOTIFY_SOCKET");
1855 if (!e)
1856 return 0;
1857
1858 if ((e[0] != '@' && e[0] != '/') || e[1] == 0) {
1859 log_error("NOTIFY_SOCKET set to an invalid value: %s", e);
1860 return -EINVAL;
1861 }
1862
1863 if (strlen(e) > sizeof(sa.un.sun_path)) {
1864 log_error("NOTIFY_SOCKET path too long: %s", e);
1865 return -EINVAL;
1866 }
1867
1868 s->notify_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
1869 if (s->notify_fd < 0)
1870 return log_error_errno(errno, "Failed to create notify socket: %m");
1871
1872 (void) fd_inc_sndbuf(s->notify_fd, NOTIFY_SNDBUF_SIZE);
1873
1874 strncpy(sa.un.sun_path, e, sizeof(sa.un.sun_path));
1875 if (sa.un.sun_path[0] == '@')
1876 sa.un.sun_path[0] = 0;
1877
fc2fffe7 1878 r = connect(s->notify_fd, &sa.sa, SOCKADDR_UN_LEN(sa.un));
e22aa3d3
LP
1879 if (r < 0)
1880 return log_error_errno(errno, "Failed to connect to notify socket: %m");
1881
1882 r = sd_event_add_io(s->event, &s->notify_event_source, s->notify_fd, EPOLLOUT, dispatch_notify_event, s);
1883 if (r < 0)
1884 return log_error_errno(r, "Failed to watch notification socket: %m");
1885
119e9655
LP
1886 if (sd_watchdog_enabled(false, &s->watchdog_usec) > 0) {
1887 s->send_watchdog = true;
1888
4de2402b 1889 r = sd_event_add_time(s->event, &s->watchdog_event_source, CLOCK_MONOTONIC, now(CLOCK_MONOTONIC) + s->watchdog_usec/2, s->watchdog_usec/4, dispatch_watchdog, s);
119e9655
LP
1890 if (r < 0)
1891 return log_error_errno(r, "Failed to add watchdog time event: %m");
1892 }
1893
e22aa3d3
LP
1894 /* This should fire pretty soon, which we'll use to send the
1895 * READY=1 event. */
1896
1897 return 0;
1898}
1899
d025f1e4 1900int server_init(Server *s) {
13790add 1901 _cleanup_fdset_free_ FDSet *fds = NULL;
d025f1e4 1902 int n, r, fd;
7d18d348 1903 bool no_sockets;
d025f1e4
ZJS
1904
1905 assert(s);
1906
1907 zero(*s);
e22aa3d3 1908 s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->audit_fd = s->hostname_fd = s->notify_fd = -1;
d025f1e4
ZJS
1909 s->compress = true;
1910 s->seal = true;
1911
119e9655
LP
1912 s->watchdog_usec = USEC_INFINITY;
1913
26687bf8
OS
1914 s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1915 s->sync_scheduled = false;
1916
d025f1e4
ZJS
1917 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1918 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1919
40b71e89 1920 s->forward_to_wall = true;
d025f1e4 1921
e150e820
MB
1922 s->max_file_usec = DEFAULT_MAX_FILE_USEC;
1923
d025f1e4
ZJS
1924 s->max_level_store = LOG_DEBUG;
1925 s->max_level_syslog = LOG_DEBUG;
1926 s->max_level_kmsg = LOG_NOTICE;
1927 s->max_level_console = LOG_INFO;
40b71e89 1928 s->max_level_wall = LOG_EMERG;
d025f1e4 1929
266a4700
FB
1930 journal_reset_metrics(&s->system_storage.metrics);
1931 journal_reset_metrics(&s->runtime_storage.metrics);
d025f1e4
ZJS
1932
1933 server_parse_config_file(s);
1d84ad94
LP
1934
1935 r = proc_cmdline_parse(parse_proc_cmdline_item, s, PROC_CMDLINE_STRIP_RD_PREFIX);
1936 if (r < 0)
1937 log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
8580d1f7 1938
d288f79f 1939 if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
b1389b0d
ZJS
1940 log_debug("Setting both rate limit interval and burst from "USEC_FMT",%u to 0,0",
1941 s->rate_limit_interval, s->rate_limit_burst);
d288f79f
ZJS
1942 s->rate_limit_interval = s->rate_limit_burst = 0;
1943 }
d025f1e4 1944
8580d1f7 1945 (void) mkdir_p("/run/systemd/journal", 0755);
d025f1e4 1946
43cf8388 1947 s->user_journals = ordered_hashmap_new(NULL);
d025f1e4
ZJS
1948 if (!s->user_journals)
1949 return log_oom();
1950
1951 s->mmap = mmap_cache_new();
1952 if (!s->mmap)
1953 return log_oom();
1954
b58c888f
VC
1955 s->deferred_closes = set_new(NULL);
1956 if (!s->deferred_closes)
1957 return log_oom();
1958
f9a810be 1959 r = sd_event_default(&s->event);
23bbb0de
MS
1960 if (r < 0)
1961 return log_error_errno(r, "Failed to create event loop: %m");
d025f1e4
ZJS
1962
1963 n = sd_listen_fds(true);
23bbb0de
MS
1964 if (n < 0)
1965 return log_error_errno(n, "Failed to read listening file descriptors from environment: %m");
d025f1e4
ZJS
1966
1967 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1968
1969 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1970
1971 if (s->native_fd >= 0) {
1972 log_error("Too many native sockets passed.");
1973 return -EINVAL;
1974 }
1975
1976 s->native_fd = fd;
1977
1978 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1979
1980 if (s->stdout_fd >= 0) {
1981 log_error("Too many stdout sockets passed.");
1982 return -EINVAL;
1983 }
1984
1985 s->stdout_fd = fd;
1986
03ee5c38
LP
1987 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0 ||
1988 sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/dev-log", 0) > 0) {
d025f1e4
ZJS
1989
1990 if (s->syslog_fd >= 0) {
1991 log_error("Too many /dev/log sockets passed.");
1992 return -EINVAL;
1993 }
1994
1995 s->syslog_fd = fd;
1996
875c2e22
LP
1997 } else if (sd_is_socket(fd, AF_NETLINK, SOCK_RAW, -1) > 0) {
1998
1999 if (s->audit_fd >= 0) {
2000 log_error("Too many audit sockets passed.");
2001 return -EINVAL;
2002 }
2003
2004 s->audit_fd = fd;
2005
4ec3cd73 2006 } else {
4ec3cd73 2007
13790add
LP
2008 if (!fds) {
2009 fds = fdset_new();
2010 if (!fds)
2011 return log_oom();
2012 }
4ec3cd73 2013
13790add
LP
2014 r = fdset_put(fds, fd);
2015 if (r < 0)
2016 return log_oom();
4ec3cd73 2017 }
d025f1e4
ZJS
2018 }
2019
15d91bff
ZJS
2020 /* Try to restore streams, but don't bother if this fails */
2021 (void) server_restore_streams(s, fds);
d025f1e4 2022
13790add
LP
2023 if (fdset_size(fds) > 0) {
2024 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds));
2025 fds = fdset_free(fds);
2026 }
2027
7d18d348
ZJS
2028 no_sockets = s->native_fd < 0 && s->stdout_fd < 0 && s->syslog_fd < 0 && s->audit_fd < 0;
2029
2030 /* always open stdout, syslog, native, and kmsg sockets */
37b7affe
ZJS
2031
2032 /* systemd-journald.socket: /run/systemd/journal/stdout */
15d91bff
ZJS
2033 r = server_open_stdout_socket(s);
2034 if (r < 0)
2035 return r;
2036
37b7affe 2037 /* systemd-journald-dev-log.socket: /run/systemd/journal/dev-log */
13790add 2038 r = server_open_syslog_socket(s);
d025f1e4
ZJS
2039 if (r < 0)
2040 return r;
2041
37b7affe 2042 /* systemd-journald.socket: /run/systemd/journal/socket */
13790add 2043 r = server_open_native_socket(s);
d025f1e4
ZJS
2044 if (r < 0)
2045 return r;
2046
37b7affe 2047 /* /dev/ksmg */
d025f1e4
ZJS
2048 r = server_open_dev_kmsg(s);
2049 if (r < 0)
2050 return r;
2051
7d18d348
ZJS
2052 /* Unless we got *some* sockets and not audit, open audit socket */
2053 if (s->audit_fd >= 0 || no_sockets) {
2054 r = server_open_audit(s);
2055 if (r < 0)
2056 return r;
2057 }
875c2e22 2058
d025f1e4
ZJS
2059 r = server_open_kernel_seqnum(s);
2060 if (r < 0)
2061 return r;
2062
0c24bb23
LP
2063 r = server_open_hostname(s);
2064 if (r < 0)
2065 return r;
2066
f9a810be 2067 r = setup_signals(s);
d025f1e4
ZJS
2068 if (r < 0)
2069 return r;
2070
2071 s->udev = udev_new();
2072 if (!s->udev)
2073 return -ENOMEM;
2074
f9a810be 2075 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
d025f1e4
ZJS
2076 if (!s->rate_limit)
2077 return -ENOMEM;
2078
e9174f29
LP
2079 r = cg_get_root_path(&s->cgroup_root);
2080 if (r < 0)
2081 return r;
2082
0c24bb23
LP
2083 server_cache_hostname(s);
2084 server_cache_boot_id(s);
2085 server_cache_machine_id(s);
2086
266a4700
FB
2087 s->runtime_storage.name = "Runtime journal";
2088 s->system_storage.name = "System journal";
2089
605405c6
ZJS
2090 s->runtime_storage.path = strjoin("/run/log/journal/", SERVER_MACHINE_ID(s));
2091 s->system_storage.path = strjoin("/var/log/journal/", SERVER_MACHINE_ID(s));
266a4700
FB
2092 if (!s->runtime_storage.path || !s->system_storage.path)
2093 return -ENOMEM;
2094
e22aa3d3
LP
2095 (void) server_connect_notify(s);
2096
804ae586 2097 return system_journal_open(s, false);
d025f1e4
ZJS
2098}
2099
2100void server_maybe_append_tags(Server *s) {
2101#ifdef HAVE_GCRYPT
2102 JournalFile *f;
2103 Iterator i;
2104 usec_t n;
2105
2106 n = now(CLOCK_REALTIME);
2107
2108 if (s->system_journal)
2109 journal_file_maybe_append_tag(s->system_journal, n);
2110
43cf8388 2111 ORDERED_HASHMAP_FOREACH(f, s->user_journals, i)
d025f1e4
ZJS
2112 journal_file_maybe_append_tag(f, n);
2113#endif
2114}
2115
2116void server_done(Server *s) {
2117 JournalFile *f;
2118 assert(s);
2119
b58c888f
VC
2120 if (s->deferred_closes) {
2121 journal_file_close_set(s->deferred_closes);
2122 set_free(s->deferred_closes);
2123 }
2124
d025f1e4
ZJS
2125 while (s->stdout_streams)
2126 stdout_stream_free(s->stdout_streams);
2127
2128 if (s->system_journal)
69a3a6fd 2129 (void) journal_file_close(s->system_journal);
d025f1e4
ZJS
2130
2131 if (s->runtime_journal)
69a3a6fd 2132 (void) journal_file_close(s->runtime_journal);
d025f1e4 2133
43cf8388 2134 while ((f = ordered_hashmap_steal_first(s->user_journals)))
69a3a6fd 2135 (void) journal_file_close(f);
d025f1e4 2136
43cf8388 2137 ordered_hashmap_free(s->user_journals);
d025f1e4 2138
f9a810be
LP
2139 sd_event_source_unref(s->syslog_event_source);
2140 sd_event_source_unref(s->native_event_source);
2141 sd_event_source_unref(s->stdout_event_source);
2142 sd_event_source_unref(s->dev_kmsg_event_source);
875c2e22 2143 sd_event_source_unref(s->audit_event_source);
f9a810be
LP
2144 sd_event_source_unref(s->sync_event_source);
2145 sd_event_source_unref(s->sigusr1_event_source);
2146 sd_event_source_unref(s->sigusr2_event_source);
2147 sd_event_source_unref(s->sigterm_event_source);
2148 sd_event_source_unref(s->sigint_event_source);
94b65516 2149 sd_event_source_unref(s->sigrtmin1_event_source);
0c24bb23 2150 sd_event_source_unref(s->hostname_event_source);
e22aa3d3 2151 sd_event_source_unref(s->notify_event_source);
119e9655 2152 sd_event_source_unref(s->watchdog_event_source);
f9a810be 2153 sd_event_unref(s->event);
d025f1e4 2154
03e334a1
LP
2155 safe_close(s->syslog_fd);
2156 safe_close(s->native_fd);
2157 safe_close(s->stdout_fd);
2158 safe_close(s->dev_kmsg_fd);
875c2e22 2159 safe_close(s->audit_fd);
03e334a1 2160 safe_close(s->hostname_fd);
e22aa3d3 2161 safe_close(s->notify_fd);
0c24bb23 2162
d025f1e4
ZJS
2163 if (s->rate_limit)
2164 journal_rate_limit_free(s->rate_limit);
2165
2166 if (s->kernel_seqnum)
2167 munmap(s->kernel_seqnum, sizeof(uint64_t));
2168
2169 free(s->buffer);
2170 free(s->tty_path);
e9174f29 2171 free(s->cgroup_root);
99d0966e 2172 free(s->hostname_field);
d025f1e4
ZJS
2173
2174 if (s->mmap)
2175 mmap_cache_unref(s->mmap);
2176
3e044c49 2177 udev_unref(s->udev);
d025f1e4 2178}
8580d1f7
LP
2179
2180static const char* const storage_table[_STORAGE_MAX] = {
2181 [STORAGE_AUTO] = "auto",
2182 [STORAGE_VOLATILE] = "volatile",
2183 [STORAGE_PERSISTENT] = "persistent",
2184 [STORAGE_NONE] = "none"
2185};
2186
2187DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
2188DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
2189
2190static const char* const split_mode_table[_SPLIT_MAX] = {
2191 [SPLIT_LOGIN] = "login",
2192 [SPLIT_UID] = "uid",
2193 [SPLIT_NONE] = "none",
2194};
2195
2196DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
2197DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");