]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/journal/journald-server.c
cgroup: properly check for ignore-notfound paths (#4803)
[thirdparty/systemd.git] / src / journal / journald-server.c
CommitLineData
d025f1e4
ZJS
1/***
2 This file is part of systemd.
3
4 Copyright 2011 Lennart Poettering
5
6 systemd is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
10
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public License
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
18***/
19
24882e06
LP
20#ifdef HAVE_SELINUX
21#include <selinux/selinux.h>
22#endif
8580d1f7
LP
23#include <sys/ioctl.h>
24#include <sys/mman.h>
25#include <sys/signalfd.h>
26#include <sys/statvfs.h>
07630cea 27#include <linux/sockios.h>
24882e06 28
b4bbcaa9 29#include "libudev.h"
8580d1f7 30#include "sd-daemon.h"
74df0fca
LP
31#include "sd-journal.h"
32#include "sd-messages.h"
8580d1f7
LP
33
34#include "acl-util.h"
b5efdb8a 35#include "alloc-util.h"
430f0182 36#include "audit-util.h"
d025f1e4 37#include "cgroup-util.h"
d025f1e4 38#include "conf-parser.h"
a0956174 39#include "dirent-util.h"
0dec689b 40#include "extract-word.h"
3ffd4af2 41#include "fd-util.h"
33d52ab9 42#include "fileio.h"
f97b34a6 43#include "format-util.h"
f4f15635 44#include "fs-util.h"
8580d1f7 45#include "hashmap.h"
958b66ea 46#include "hostname-util.h"
4b58153d 47#include "id128-util.h"
afc5dbf3 48#include "io-util.h"
8580d1f7
LP
49#include "journal-authenticate.h"
50#include "journal-file.h"
d025f1e4
ZJS
51#include "journal-internal.h"
52#include "journal-vacuum.h"
8580d1f7 53#include "journald-audit.h"
d025f1e4 54#include "journald-kmsg.h"
d025f1e4 55#include "journald-native.h"
8580d1f7 56#include "journald-rate-limit.h"
3ffd4af2 57#include "journald-server.h"
8580d1f7
LP
58#include "journald-stream.h"
59#include "journald-syslog.h"
4b58153d 60#include "log.h"
07630cea
LP
61#include "missing.h"
62#include "mkdir.h"
6bedfcbb 63#include "parse-util.h"
4e731273 64#include "proc-cmdline.h"
07630cea
LP
65#include "process-util.h"
66#include "rm-rf.h"
67#include "selinux-util.h"
68#include "signal-util.h"
69#include "socket-util.h"
32917e33 70#include "stdio-util.h"
8b43440b 71#include "string-table.h"
07630cea 72#include "string-util.h"
4a0b58c4 73#include "user-util.h"
863a5610 74#include "syslog-util.h"
d025f1e4 75
d025f1e4
ZJS
76#define USER_JOURNALS_MAX 1024
77
26687bf8 78#define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
7f1ad696
LP
79#define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
80#define DEFAULT_RATE_LIMIT_BURST 1000
e150e820 81#define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
d025f1e4 82
8580d1f7 83#define RECHECK_SPACE_USEC (30*USEC_PER_SEC)
d025f1e4 84
e22aa3d3
LP
85#define NOTIFY_SNDBUF_SIZE (8*1024*1024)
86
7a24f3bf
VC
87/* The period to insert between posting changes for coalescing */
88#define POST_CHANGE_TIMER_INTERVAL_USEC (250*USEC_PER_MSEC)
89
e0ed6db9
FB
90static int determine_path_usage(Server *s, const char *path, uint64_t *ret_used, uint64_t *ret_free) {
91 _cleanup_closedir_ DIR *d = NULL;
92 struct dirent *de;
93 struct statvfs ss;
e0ed6db9
FB
94
95 assert(ret_used);
96 assert(ret_free);
97
266a4700 98 d = opendir(path);
e0ed6db9
FB
99 if (!d)
100 return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR,
266a4700 101 errno, "Failed to open %s: %m", path);
e0ed6db9
FB
102
103 if (fstatvfs(dirfd(d), &ss) < 0)
266a4700 104 return log_error_errno(errno, "Failed to fstatvfs(%s): %m", path);
e0ed6db9
FB
105
106 *ret_free = ss.f_bsize * ss.f_bavail;
107 *ret_used = 0;
108 FOREACH_DIRENT_ALL(de, d, break) {
109 struct stat st;
110
111 if (!endswith(de->d_name, ".journal") &&
112 !endswith(de->d_name, ".journal~"))
113 continue;
114
115 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) {
266a4700 116 log_debug_errno(errno, "Failed to stat %s/%s, ignoring: %m", path, de->d_name);
e0ed6db9
FB
117 continue;
118 }
119
120 if (!S_ISREG(st.st_mode))
121 continue;
122
123 *ret_used += (uint64_t) st.st_blocks * 512UL;
124 }
125
126 return 0;
127}
128
a0edc477
FB
129static void cache_space_invalidate(JournalStorageSpace *space) {
130 memset(space, 0, sizeof(*space));
131}
132
57f443a6 133static int cache_space_refresh(Server *s, JournalStorage *storage) {
23aba343 134 JournalStorageSpace *space;
266a4700 135 JournalMetrics *metrics;
23aba343 136 uint64_t vfs_used, vfs_avail, avail;
d025f1e4 137 usec_t ts;
e0ed6db9 138 int r;
d025f1e4 139
8580d1f7 140 assert(s);
266a4700 141
266a4700 142 metrics = &storage->metrics;
23aba343 143 space = &storage->space;
d025f1e4 144
8580d1f7 145 ts = now(CLOCK_MONOTONIC);
d025f1e4 146
57f443a6 147 if (space->timestamp + RECHECK_SPACE_USEC > ts)
d025f1e4
ZJS
148 return 0;
149
23aba343 150 r = determine_path_usage(s, storage->path, &vfs_used, &vfs_avail);
e0ed6db9
FB
151 if (r < 0)
152 return r;
d025f1e4 153
23aba343
FB
154 space->vfs_used = vfs_used;
155 space->vfs_available = vfs_avail;
156
157 avail = LESS_BY(vfs_avail, metrics->keep_free);
158
23aba343
FB
159 space->limit = MIN(MAX(vfs_used + avail, metrics->min_use), metrics->max_use);
160 space->available = LESS_BY(space->limit, vfs_used);
161 space->timestamp = ts;
8580d1f7
LP
162 return 1;
163}
164
3a19f215
FB
165static void patch_min_use(JournalStorage *storage) {
166 assert(storage);
167
168 /* Let's bump the min_use limit to the current usage on disk. We do
169 * this when starting up and first opening the journal files. This way
170 * sudden spikes in disk usage will not cause journald to vacuum files
171 * without bounds. Note that this means that only a restart of journald
172 * will make it reset this value. */
173
174 storage->metrics.min_use = MAX(storage->metrics.min_use, storage->space.vfs_used);
175}
176
177
178static int determine_space(Server *s, uint64_t *available, uint64_t *limit) {
266a4700 179 JournalStorage *js;
57f443a6 180 int r;
8580d1f7
LP
181
182 assert(s);
183
266a4700 184 js = s->system_journal ? &s->system_storage : &s->runtime_storage;
57f443a6
FB
185
186 r = cache_space_refresh(s, js);
187 if (r >= 0) {
188 if (available)
189 *available = js->space.available;
190 if (limit)
191 *limit = js->space.limit;
192 }
193 return r;
d025f1e4
ZJS
194}
195
cba5629e
FB
196void server_space_usage_message(Server *s, JournalStorage *storage) {
197 char fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
198 fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX], fb6[FORMAT_BYTES_MAX];
199 JournalMetrics *metrics;
cba5629e
FB
200
201 assert(s);
202
203 if (!storage)
204 storage = s->system_journal ? &s->system_storage : &s->runtime_storage;
205
57f443a6 206 if (cache_space_refresh(s, storage) < 0)
cba5629e
FB
207 return;
208
209 metrics = &storage->metrics;
23aba343 210 format_bytes(fb1, sizeof(fb1), storage->space.vfs_used);
cba5629e
FB
211 format_bytes(fb2, sizeof(fb2), metrics->max_use);
212 format_bytes(fb3, sizeof(fb3), metrics->keep_free);
23aba343 213 format_bytes(fb4, sizeof(fb4), storage->space.vfs_available);
cba5629e
FB
214 format_bytes(fb5, sizeof(fb5), storage->space.limit);
215 format_bytes(fb6, sizeof(fb6), storage->space.available);
216
217 server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
218 LOG_MESSAGE("%s (%s) is %s, max %s, %s free.",
219 storage->name, storage->path, fb1, fb5, fb6),
220 "JOURNAL_NAME=%s", storage->name,
221 "JOURNAL_PATH=%s", storage->path,
23aba343 222 "CURRENT_USE=%"PRIu64, storage->space.vfs_used,
cba5629e
FB
223 "CURRENT_USE_PRETTY=%s", fb1,
224 "MAX_USE=%"PRIu64, metrics->max_use,
225 "MAX_USE_PRETTY=%s", fb2,
226 "DISK_KEEP_FREE=%"PRIu64, metrics->keep_free,
227 "DISK_KEEP_FREE_PRETTY=%s", fb3,
23aba343 228 "DISK_AVAILABLE=%"PRIu64, storage->space.vfs_available,
cba5629e
FB
229 "DISK_AVAILABLE_PRETTY=%s", fb4,
230 "LIMIT=%"PRIu64, storage->space.limit,
231 "LIMIT_PRETTY=%s", fb5,
232 "AVAILABLE=%"PRIu64, storage->space.available,
233 "AVAILABLE_PRETTY=%s", fb6,
234 NULL);
235}
236
5c3bde3f 237static void server_add_acls(JournalFile *f, uid_t uid) {
d025f1e4 238#ifdef HAVE_ACL
5c3bde3f 239 int r;
d025f1e4 240#endif
d025f1e4
ZJS
241 assert(f);
242
d025f1e4 243#ifdef HAVE_ACL
34c10968 244 if (uid <= SYSTEM_UID_MAX)
d025f1e4
ZJS
245 return;
246
5c3bde3f
ZJS
247 r = add_acls_for_user(f->fd, uid);
248 if (r < 0)
249 log_warning_errno(r, "Failed to set ACL on %s, ignoring: %m", f->path);
d025f1e4
ZJS
250#endif
251}
252
7a24f3bf
VC
253static int open_journal(
254 Server *s,
255 bool reliably,
256 const char *fname,
257 int flags,
258 bool seal,
259 JournalMetrics *metrics,
7a24f3bf
VC
260 JournalFile **ret) {
261 int r;
e167d7fd 262 JournalFile *f;
7a24f3bf
VC
263
264 assert(s);
265 assert(fname);
266 assert(ret);
267
268 if (reliably)
b58c888f 269 r = journal_file_open_reliably(fname, flags, 0640, s->compress, seal, metrics, s->mmap, s->deferred_closes, NULL, &f);
7a24f3bf 270 else
5d1ce257 271 r = journal_file_open(-1, fname, flags, 0640, s->compress, seal, metrics, s->mmap, s->deferred_closes, NULL, &f);
7a24f3bf
VC
272 if (r < 0)
273 return r;
274
e167d7fd 275 r = journal_file_enable_post_change_timer(f, s->event, POST_CHANGE_TIMER_INTERVAL_USEC);
7a24f3bf 276 if (r < 0) {
69a3a6fd 277 (void) journal_file_close(f);
7a24f3bf
VC
278 return r;
279 }
280
e167d7fd 281 *ret = f;
7a24f3bf
VC
282 return r;
283}
284
6431c7e2
VC
285static bool flushed_flag_is_set(void) {
286 return (access("/run/systemd/journal/flushed", F_OK) >= 0);
287}
288
105bdb46 289static int system_journal_open(Server *s, bool flush_requested) {
929eeb54 290 bool flushed = false;
105bdb46
VC
291 const char *fn;
292 int r = 0;
293
294 if (!s->system_journal &&
295 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
6431c7e2 296 (flush_requested || (flushed = flushed_flag_is_set()))) {
105bdb46
VC
297
298 /* If in auto mode: first try to create the machine
299 * path, but not the prefix.
300 *
301 * If in persistent mode: create /var/log/journal and
302 * the machine path */
303
304 if (s->storage == STORAGE_PERSISTENT)
305 (void) mkdir_p("/var/log/journal/", 0755);
306
266a4700 307 (void) mkdir(s->system_storage.path, 0755);
105bdb46 308
266a4700
FB
309 fn = strjoina(s->system_storage.path, "/system.journal");
310 r = open_journal(s, true, fn, O_RDWR|O_CREAT, s->seal, &s->system_storage.metrics, &s->system_journal);
105bdb46
VC
311 if (r >= 0) {
312 server_add_acls(s->system_journal, 0);
57f443a6 313 (void) cache_space_refresh(s, &s->system_storage);
3a19f215 314 patch_min_use(&s->system_storage);
105bdb46
VC
315 } else if (r < 0) {
316 if (r != -ENOENT && r != -EROFS)
317 log_warning_errno(r, "Failed to open system journal: %m");
318
319 r = 0;
320 }
929eeb54
VC
321
322 /* If the runtime journal is open, and we're post-flush, we're
323 * recovering from a failed system journal rotate (ENOSPC)
324 * for which the runtime journal was reopened.
325 *
326 * Perform an implicit flush to var, leaving the runtime
327 * journal closed, now that the system journal is back.
328 */
329 if (s->runtime_journal && flushed)
330 (void) server_flush_to_var(s);
105bdb46
VC
331 }
332
333 if (!s->runtime_journal &&
334 (s->storage != STORAGE_NONE)) {
335
266a4700 336 fn = strjoina(s->runtime_storage.path, "/system.journal");
105bdb46
VC
337
338 if (s->system_journal) {
339
340 /* Try to open the runtime journal, but only
341 * if it already exists, so that we can flush
342 * it into the system journal */
343
266a4700 344 r = open_journal(s, false, fn, O_RDWR, false, &s->runtime_storage.metrics, &s->runtime_journal);
105bdb46
VC
345 if (r < 0) {
346 if (r != -ENOENT)
347 log_warning_errno(r, "Failed to open runtime journal: %m");
348
349 r = 0;
350 }
351
352 } else {
353
354 /* OK, we really need the runtime journal, so create
355 * it if necessary. */
356
357 (void) mkdir("/run/log", 0755);
358 (void) mkdir("/run/log/journal", 0755);
359 (void) mkdir_parents(fn, 0750);
360
266a4700 361 r = open_journal(s, true, fn, O_RDWR|O_CREAT, false, &s->runtime_storage.metrics, &s->runtime_journal);
105bdb46
VC
362 if (r < 0)
363 return log_error_errno(r, "Failed to open runtime journal: %m");
364 }
365
366 if (s->runtime_journal) {
367 server_add_acls(s->runtime_journal, 0);
57f443a6 368 (void) cache_space_refresh(s, &s->runtime_storage);
3a19f215 369 patch_min_use(&s->runtime_storage);
105bdb46
VC
370 }
371 }
372
373 return r;
374}
375
d025f1e4 376static JournalFile* find_journal(Server *s, uid_t uid) {
ed375beb 377 _cleanup_free_ char *p = NULL;
d025f1e4
ZJS
378 int r;
379 JournalFile *f;
380 sd_id128_t machine;
381
382 assert(s);
383
105bdb46
VC
384 /* A rotate that fails to create the new journal (ENOSPC) leaves the
385 * rotated journal as NULL. Unless we revisit opening, even after
386 * space is made available we'll continue to return NULL indefinitely.
387 *
388 * system_journal_open() is a noop if the journals are already open, so
389 * we can just call it here to recover from failed rotates (or anything
390 * else that's left the journals as NULL).
391 *
392 * Fixes https://github.com/systemd/systemd/issues/3968 */
393 (void) system_journal_open(s, false);
394
d025f1e4
ZJS
395 /* We split up user logs only on /var, not on /run. If the
396 * runtime file is open, we write to it exclusively, in order
397 * to guarantee proper order as soon as we flush /run to
398 * /var and close the runtime file. */
399
400 if (s->runtime_journal)
401 return s->runtime_journal;
402
61755fda 403 if (uid <= SYSTEM_UID_MAX || uid_is_dynamic(uid))
d025f1e4
ZJS
404 return s->system_journal;
405
406 r = sd_id128_get_machine(&machine);
407 if (r < 0)
408 return s->system_journal;
409
4a0b58c4 410 f = ordered_hashmap_get(s->user_journals, UID_TO_PTR(uid));
d025f1e4
ZJS
411 if (f)
412 return f;
413
de0671ee
ZJS
414 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-"UID_FMT".journal",
415 SD_ID128_FORMAT_VAL(machine), uid) < 0)
d025f1e4
ZJS
416 return s->system_journal;
417
43cf8388 418 while (ordered_hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
d025f1e4 419 /* Too many open? Then let's close one */
43cf8388 420 f = ordered_hashmap_steal_first(s->user_journals);
d025f1e4 421 assert(f);
69a3a6fd 422 (void) journal_file_close(f);
d025f1e4
ZJS
423 }
424
266a4700 425 r = open_journal(s, true, p, O_RDWR|O_CREAT, s->seal, &s->system_storage.metrics, &f);
d025f1e4
ZJS
426 if (r < 0)
427 return s->system_journal;
428
5c3bde3f 429 server_add_acls(f, uid);
d025f1e4 430
4a0b58c4 431 r = ordered_hashmap_put(s->user_journals, UID_TO_PTR(uid), f);
d025f1e4 432 if (r < 0) {
69a3a6fd 433 (void) journal_file_close(f);
d025f1e4
ZJS
434 return s->system_journal;
435 }
436
437 return f;
438}
439
ea69bd41
LP
440static int do_rotate(
441 Server *s,
442 JournalFile **f,
443 const char* name,
444 bool seal,
445 uint32_t uid) {
446
fc55baee
ZJS
447 int r;
448 assert(s);
449
450 if (!*f)
451 return -EINVAL;
452
b58c888f 453 r = journal_file_rotate(f, s->compress, seal, s->deferred_closes);
fc55baee
ZJS
454 if (r < 0)
455 if (*f)
ea69bd41 456 log_error_errno(r, "Failed to rotate %s: %m", (*f)->path);
fc55baee 457 else
ea69bd41 458 log_error_errno(r, "Failed to create new %s journal: %m", name);
fc55baee 459 else
5c3bde3f 460 server_add_acls(*f, uid);
2678031a 461
fc55baee
ZJS
462 return r;
463}
464
d025f1e4
ZJS
465void server_rotate(Server *s) {
466 JournalFile *f;
467 void *k;
468 Iterator i;
469 int r;
470
471 log_debug("Rotating...");
472
8580d1f7
LP
473 (void) do_rotate(s, &s->runtime_journal, "runtime", false, 0);
474 (void) do_rotate(s, &s->system_journal, "system", s->seal, 0);
d025f1e4 475
43cf8388 476 ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
4a0b58c4 477 r = do_rotate(s, &f, "user", s->seal, PTR_TO_UID(k));
fc55baee 478 if (r >= 0)
43cf8388 479 ordered_hashmap_replace(s->user_journals, k, f);
fc55baee
ZJS
480 else if (!f)
481 /* Old file has been closed and deallocated */
43cf8388 482 ordered_hashmap_remove(s->user_journals, k);
d025f1e4 483 }
b58c888f
VC
484
485 /* Perform any deferred closes which aren't still offlining. */
486 SET_FOREACH(f, s->deferred_closes, i)
487 if (!journal_file_is_offlining(f)) {
488 (void) set_remove(s->deferred_closes, f);
489 (void) journal_file_close(f);
490 }
d025f1e4
ZJS
491}
492
26687bf8
OS
493void server_sync(Server *s) {
494 JournalFile *f;
26687bf8
OS
495 Iterator i;
496 int r;
497
26687bf8 498 if (s->system_journal) {
ac2e41f5 499 r = journal_file_set_offline(s->system_journal, false);
26687bf8 500 if (r < 0)
65089b82 501 log_warning_errno(r, "Failed to sync system journal, ignoring: %m");
26687bf8
OS
502 }
503
65c1d46b 504 ORDERED_HASHMAP_FOREACH(f, s->user_journals, i) {
ac2e41f5 505 r = journal_file_set_offline(f, false);
26687bf8 506 if (r < 0)
65089b82 507 log_warning_errno(r, "Failed to sync user journal, ignoring: %m");
26687bf8
OS
508 }
509
f9a810be
LP
510 if (s->sync_event_source) {
511 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
512 if (r < 0)
da927ba9 513 log_error_errno(r, "Failed to disable sync timer source: %m");
f9a810be 514 }
26687bf8
OS
515
516 s->sync_scheduled = false;
517}
518
3a19f215 519static void do_vacuum(Server *s, JournalStorage *storage, bool verbose) {
ea69bd41 520
63c8666b
ZJS
521 int r;
522
8580d1f7 523 assert(s);
266a4700 524 assert(storage);
8580d1f7 525
57f443a6 526 (void) cache_space_refresh(s, storage);
18e758bf
FB
527
528 if (verbose)
529 server_space_usage_message(s, storage);
8580d1f7 530
57f443a6
FB
531 r = journal_directory_vacuum(storage->path, storage->space.limit,
532 storage->metrics.n_max_files, s->max_retention_usec,
533 &s->oldest_file_usec, verbose);
63c8666b 534 if (r < 0 && r != -ENOENT)
266a4700
FB
535 log_warning_errno(r, "Failed to vacuum %s, ignoring: %m", storage->path);
536
a0edc477 537 cache_space_invalidate(&storage->space);
63c8666b
ZJS
538}
539
3a19f215 540int server_vacuum(Server *s, bool verbose) {
8580d1f7 541 assert(s);
d025f1e4
ZJS
542
543 log_debug("Vacuuming...");
544
545 s->oldest_file_usec = 0;
546
266a4700 547 if (s->system_journal)
3a19f215 548 do_vacuum(s, &s->system_storage, verbose);
266a4700 549 if (s->runtime_journal)
3a19f215 550 do_vacuum(s, &s->runtime_storage, verbose);
d025f1e4 551
8580d1f7 552 return 0;
d025f1e4
ZJS
553}
554
0c24bb23
LP
555static void server_cache_machine_id(Server *s) {
556 sd_id128_t id;
557 int r;
558
559 assert(s);
560
561 r = sd_id128_get_machine(&id);
562 if (r < 0)
563 return;
564
565 sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
566}
567
568static void server_cache_boot_id(Server *s) {
569 sd_id128_t id;
570 int r;
571
572 assert(s);
573
574 r = sd_id128_get_boot(&id);
575 if (r < 0)
576 return;
577
578 sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
579}
580
581static void server_cache_hostname(Server *s) {
582 _cleanup_free_ char *t = NULL;
583 char *x;
584
585 assert(s);
586
587 t = gethostname_malloc();
588 if (!t)
589 return;
590
591 x = strappend("_HOSTNAME=", t);
592 if (!x)
593 return;
594
595 free(s->hostname_field);
596 s->hostname_field = x;
597}
598
8531ae70 599static bool shall_try_append_again(JournalFile *f, int r) {
6e1045e5 600 switch(r) {
ae739cc1 601
6e1045e5
ZJS
602 case -E2BIG: /* Hit configured limit */
603 case -EFBIG: /* Hit fs limit */
604 case -EDQUOT: /* Quota limit hit */
605 case -ENOSPC: /* Disk full */
d025f1e4 606 log_debug("%s: Allocation limit reached, rotating.", f->path);
6e1045e5 607 return true;
ae739cc1 608
6e1045e5
ZJS
609 case -EIO: /* I/O error of some kind (mmap) */
610 log_warning("%s: IO error, rotating.", f->path);
611 return true;
ae739cc1 612
6e1045e5 613 case -EHOSTDOWN: /* Other machine */
d025f1e4 614 log_info("%s: Journal file from other machine, rotating.", f->path);
6e1045e5 615 return true;
ae739cc1 616
6e1045e5 617 case -EBUSY: /* Unclean shutdown */
d025f1e4 618 log_info("%s: Unclean shutdown, rotating.", f->path);
6e1045e5 619 return true;
ae739cc1 620
6e1045e5 621 case -EPROTONOSUPPORT: /* Unsupported feature */
d025f1e4 622 log_info("%s: Unsupported feature, rotating.", f->path);
6e1045e5 623 return true;
ae739cc1 624
6e1045e5
ZJS
625 case -EBADMSG: /* Corrupted */
626 case -ENODATA: /* Truncated */
627 case -ESHUTDOWN: /* Already archived */
d025f1e4 628 log_warning("%s: Journal file corrupted, rotating.", f->path);
6e1045e5 629 return true;
ae739cc1 630
6e1045e5 631 case -EIDRM: /* Journal file has been deleted */
2678031a 632 log_warning("%s: Journal file has been deleted, rotating.", f->path);
6e1045e5 633 return true;
ae739cc1
LP
634
635 case -ETXTBSY: /* Journal file is from the future */
c1a9199e 636 log_warning("%s: Journal file is from the future, rotating.", f->path);
ae739cc1
LP
637 return true;
638
6e1045e5 639 default:
d025f1e4 640 return false;
6e1045e5 641 }
d025f1e4
ZJS
642}
643
d07f7b9e 644static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
7c070017 645 bool vacuumed = false, rotate = false;
0f972d66 646 struct dual_timestamp ts;
d025f1e4 647 JournalFile *f;
d025f1e4
ZJS
648 int r;
649
650 assert(s);
651 assert(iovec);
652 assert(n > 0);
653
0f972d66
LP
654 /* Get the closest, linearized time we have for this log event from the event loop. (Note that we do not use
655 * the source time, and not even the time the event was originally seen, but instead simply the time we started
656 * processing it, as we want strictly linear ordering in what we write out.) */
657 assert_se(sd_event_now(s->event, CLOCK_REALTIME, &ts.realtime) >= 0);
658 assert_se(sd_event_now(s->event, CLOCK_MONOTONIC, &ts.monotonic) >= 0);
659
7c070017
LP
660 if (ts.realtime < s->last_realtime_clock) {
661 /* When the time jumps backwards, let's immediately rotate. Of course, this should not happen during
662 * regular operation. However, when it does happen, then we should make sure that we start fresh files
663 * to ensure that the entries in the journal files are strictly ordered by time, in order to ensure
664 * bisection works correctly. */
d025f1e4 665
7c070017
LP
666 log_debug("Time jumped backwards, rotating.");
667 rotate = true;
668 } else {
669
670 f = find_journal(s, uid);
671 if (!f)
672 return;
673
674 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
675 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
676 rotate = true;
677 }
678 }
d025f1e4 679
7c070017 680 if (rotate) {
d025f1e4 681 server_rotate(s);
3a19f215 682 server_vacuum(s, false);
d025f1e4
ZJS
683 vacuumed = true;
684
685 f = find_journal(s, uid);
686 if (!f)
687 return;
688 }
689
7c070017
LP
690 s->last_realtime_clock = ts.realtime;
691
0f972d66 692 r = journal_file_append_entry(f, &ts, iovec, n, &s->seqnum, NULL, NULL);
26687bf8 693 if (r >= 0) {
d07f7b9e 694 server_schedule_sync(s, priority);
d025f1e4 695 return;
26687bf8 696 }
d025f1e4
ZJS
697
698 if (vacuumed || !shall_try_append_again(f, r)) {
8266e1c0 699 log_error_errno(r, "Failed to write entry (%d items, %zu bytes), ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
d025f1e4
ZJS
700 return;
701 }
702
703 server_rotate(s);
3a19f215 704 server_vacuum(s, false);
d025f1e4
ZJS
705
706 f = find_journal(s, uid);
707 if (!f)
708 return;
709
710 log_debug("Retrying write.");
0f972d66 711 r = journal_file_append_entry(f, &ts, iovec, n, &s->seqnum, NULL, NULL);
8266e1c0
LP
712 if (r < 0)
713 log_error_errno(r, "Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
714 else
d07f7b9e 715 server_schedule_sync(s, priority);
d025f1e4
ZJS
716}
717
4b58153d
LP
718static int get_invocation_id(const char *cgroup_root, const char *slice, const char *unit, char **ret) {
719 _cleanup_free_ char *escaped = NULL, *slice_path = NULL, *p = NULL;
720 char *copy, ids[SD_ID128_STRING_MAX];
721 int r;
722
723 /* Read the invocation ID of a unit off a unit. It's stored in the "trusted.invocation_id" extended attribute
724 * on the cgroup path. */
725
726 r = cg_slice_to_path(slice, &slice_path);
727 if (r < 0)
728 return r;
729
730 escaped = cg_escape(unit);
731 if (!escaped)
732 return -ENOMEM;
733
605405c6 734 p = strjoin(cgroup_root, "/", slice_path, "/", escaped);
4b58153d
LP
735 if (!p)
736 return -ENOMEM;
737
738 r = cg_get_xattr(SYSTEMD_CGROUP_CONTROLLER, p, "trusted.invocation_id", ids, 32);
739 if (r < 0)
740 return r;
741 if (r != 32)
742 return -EINVAL;
743 ids[32] = 0;
744
745 if (!id128_is_valid(ids))
746 return -EINVAL;
747
748 copy = strdup(ids);
749 if (!copy)
750 return -ENOMEM;
751
752 *ret = copy;
753 return 0;
754}
755
d025f1e4
ZJS
756static void dispatch_message_real(
757 Server *s,
758 struct iovec *iovec, unsigned n, unsigned m,
3b3154df
LP
759 const struct ucred *ucred,
760 const struct timeval *tv,
d025f1e4 761 const char *label, size_t label_len,
968f3196 762 const char *unit_id,
d07f7b9e 763 int priority,
968f3196 764 pid_t object_pid) {
d025f1e4 765
968f3196 766 char pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
ae018d9b
LP
767 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
768 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
769 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
d3789917 770 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
968f3196
ZJS
771 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
772 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
773 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
774 uid_t object_uid;
775 gid_t object_gid;
968f3196 776 char *x;
d025f1e4 777 int r;
ae018d9b 778 char *t, *c;
82499507
LP
779 uid_t realuid = 0, owner = 0, journal_uid;
780 bool owner_valid = false;
ae018d9b 781#ifdef HAVE_AUDIT
968f3196
ZJS
782 char audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
783 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
784 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
785 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
ae018d9b
LP
786
787 uint32_t audit;
788 uid_t loginuid;
789#endif
d025f1e4
ZJS
790
791 assert(s);
792 assert(iovec);
793 assert(n > 0);
d473176a 794 assert(n + N_IOVEC_META_FIELDS + (object_pid > 0 ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
d025f1e4
ZJS
795
796 if (ucred) {
d025f1e4
ZJS
797 realuid = ucred->uid;
798
de0671ee 799 sprintf(pid, "_PID="PID_FMT, ucred->pid);
c2457105 800 IOVEC_SET_STRING(iovec[n++], pid);
d025f1e4 801
de0671ee 802 sprintf(uid, "_UID="UID_FMT, ucred->uid);
c2457105 803 IOVEC_SET_STRING(iovec[n++], uid);
d025f1e4 804
de0671ee 805 sprintf(gid, "_GID="GID_FMT, ucred->gid);
c2457105 806 IOVEC_SET_STRING(iovec[n++], gid);
d025f1e4
ZJS
807
808 r = get_process_comm(ucred->pid, &t);
809 if (r >= 0) {
63c372cb 810 x = strjoina("_COMM=", t);
d025f1e4 811 free(t);
968f3196 812 IOVEC_SET_STRING(iovec[n++], x);
d025f1e4
ZJS
813 }
814
815 r = get_process_exe(ucred->pid, &t);
816 if (r >= 0) {
63c372cb 817 x = strjoina("_EXE=", t);
d025f1e4 818 free(t);
968f3196 819 IOVEC_SET_STRING(iovec[n++], x);
d025f1e4
ZJS
820 }
821
9bdbc2e2 822 r = get_process_cmdline(ucred->pid, 0, false, &t);
d025f1e4 823 if (r >= 0) {
63c372cb 824 x = strjoina("_CMDLINE=", t);
d025f1e4 825 free(t);
3a832116
SL
826 IOVEC_SET_STRING(iovec[n++], x);
827 }
828
829 r = get_process_capeff(ucred->pid, &t);
830 if (r >= 0) {
63c372cb 831 x = strjoina("_CAP_EFFECTIVE=", t);
3a832116 832 free(t);
968f3196 833 IOVEC_SET_STRING(iovec[n++], x);
d025f1e4
ZJS
834 }
835
0a20e3c1 836#ifdef HAVE_AUDIT
d025f1e4 837 r = audit_session_from_pid(ucred->pid, &audit);
ae018d9b 838 if (r >= 0) {
de0671ee 839 sprintf(audit_session, "_AUDIT_SESSION=%"PRIu32, audit);
ae018d9b
LP
840 IOVEC_SET_STRING(iovec[n++], audit_session);
841 }
d025f1e4
ZJS
842
843 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
7027ff61 844 if (r >= 0) {
de0671ee 845 sprintf(audit_loginuid, "_AUDIT_LOGINUID="UID_FMT, loginuid);
ae018d9b 846 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
d025f1e4 847 }
ae018d9b 848#endif
d025f1e4 849
e9174f29 850 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &c);
7027ff61 851 if (r >= 0) {
4b58153d 852 _cleanup_free_ char *raw_unit = NULL, *raw_slice = NULL;
968f3196
ZJS
853 char *session = NULL;
854
63c372cb 855 x = strjoina("_SYSTEMD_CGROUP=", c);
968f3196 856 IOVEC_SET_STRING(iovec[n++], x);
d025f1e4 857
ae018d9b
LP
858 r = cg_path_get_session(c, &t);
859 if (r >= 0) {
63c372cb 860 session = strjoina("_SYSTEMD_SESSION=", t);
ae018d9b 861 free(t);
d025f1e4 862 IOVEC_SET_STRING(iovec[n++], session);
ae018d9b
LP
863 }
864
865 if (cg_path_get_owner_uid(c, &owner) >= 0) {
866 owner_valid = true;
d025f1e4 867
de0671ee 868 sprintf(owner_uid, "_SYSTEMD_OWNER_UID="UID_FMT, owner);
d025f1e4 869 IOVEC_SET_STRING(iovec[n++], owner_uid);
ae018d9b 870 }
d025f1e4 871
4b58153d
LP
872 if (cg_path_get_unit(c, &raw_unit) >= 0) {
873 x = strjoina("_SYSTEMD_UNIT=", raw_unit);
19cace37
LP
874 IOVEC_SET_STRING(iovec[n++], x);
875 } else if (unit_id && !session) {
63c372cb 876 x = strjoina("_SYSTEMD_UNIT=", unit_id);
19cace37
LP
877 IOVEC_SET_STRING(iovec[n++], x);
878 }
879
880 if (cg_path_get_user_unit(c, &t) >= 0) {
63c372cb 881 x = strjoina("_SYSTEMD_USER_UNIT=", t);
ae018d9b 882 free(t);
968f3196 883 IOVEC_SET_STRING(iovec[n++], x);
19cace37 884 } else if (unit_id && session) {
63c372cb 885 x = strjoina("_SYSTEMD_USER_UNIT=", unit_id);
19cace37
LP
886 IOVEC_SET_STRING(iovec[n++], x);
887 }
ae018d9b 888
4b58153d
LP
889 if (cg_path_get_slice(c, &raw_slice) >= 0) {
890 x = strjoina("_SYSTEMD_SLICE=", raw_slice);
0a244b8e
LP
891 IOVEC_SET_STRING(iovec[n++], x);
892 }
893
d473176a
LP
894 if (cg_path_get_user_slice(c, &t) >= 0) {
895 x = strjoina("_SYSTEMD_USER_SLICE=", t);
896 free(t);
897 IOVEC_SET_STRING(iovec[n++], x);
898 }
899
4b58153d
LP
900 if (raw_slice && raw_unit) {
901 if (get_invocation_id(s->cgroup_root, raw_slice, raw_unit, &t) >= 0) {
902 x = strjoina("_SYSTEMD_INVOCATION_ID=", t);
903 free(t);
904 IOVEC_SET_STRING(iovec[n++], x);
905 }
906 }
907
ae018d9b 908 free(c);
2d43b190 909 } else if (unit_id) {
63c372cb 910 x = strjoina("_SYSTEMD_UNIT=", unit_id);
2d43b190 911 IOVEC_SET_STRING(iovec[n++], x);
ef1673d1 912 }
d025f1e4 913
d025f1e4 914#ifdef HAVE_SELINUX
6355e756 915 if (mac_selinux_have()) {
d682b3a7 916 if (label) {
f8294e41 917 x = alloca(strlen("_SELINUX_CONTEXT=") + label_len + 1);
ae018d9b 918
d682b3a7
LP
919 *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
920 IOVEC_SET_STRING(iovec[n++], x);
921 } else {
2ed96880 922 char *con;
d025f1e4 923
d682b3a7 924 if (getpidcon(ucred->pid, &con) >= 0) {
63c372cb 925 x = strjoina("_SELINUX_CONTEXT=", con);
e7ff4e7f 926
d682b3a7
LP
927 freecon(con);
928 IOVEC_SET_STRING(iovec[n++], x);
929 }
d025f1e4
ZJS
930 }
931 }
932#endif
933 }
968f3196
ZJS
934 assert(n <= m);
935
936 if (object_pid) {
937 r = get_process_uid(object_pid, &object_uid);
938 if (r >= 0) {
de0671ee 939 sprintf(o_uid, "OBJECT_UID="UID_FMT, object_uid);
968f3196
ZJS
940 IOVEC_SET_STRING(iovec[n++], o_uid);
941 }
942
943 r = get_process_gid(object_pid, &object_gid);
944 if (r >= 0) {
de0671ee 945 sprintf(o_gid, "OBJECT_GID="GID_FMT, object_gid);
968f3196
ZJS
946 IOVEC_SET_STRING(iovec[n++], o_gid);
947 }
948
949 r = get_process_comm(object_pid, &t);
950 if (r >= 0) {
63c372cb 951 x = strjoina("OBJECT_COMM=", t);
968f3196
ZJS
952 free(t);
953 IOVEC_SET_STRING(iovec[n++], x);
954 }
955
956 r = get_process_exe(object_pid, &t);
957 if (r >= 0) {
63c372cb 958 x = strjoina("OBJECT_EXE=", t);
968f3196
ZJS
959 free(t);
960 IOVEC_SET_STRING(iovec[n++], x);
961 }
962
963 r = get_process_cmdline(object_pid, 0, false, &t);
964 if (r >= 0) {
63c372cb 965 x = strjoina("OBJECT_CMDLINE=", t);
968f3196
ZJS
966 free(t);
967 IOVEC_SET_STRING(iovec[n++], x);
968 }
969
970#ifdef HAVE_AUDIT
971 r = audit_session_from_pid(object_pid, &audit);
972 if (r >= 0) {
de0671ee 973 sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%"PRIu32, audit);
968f3196
ZJS
974 IOVEC_SET_STRING(iovec[n++], o_audit_session);
975 }
976
977 r = audit_loginuid_from_pid(object_pid, &loginuid);
978 if (r >= 0) {
de0671ee 979 sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID="UID_FMT, loginuid);
968f3196
ZJS
980 IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
981 }
982#endif
983
e9174f29 984 r = cg_pid_get_path_shifted(object_pid, s->cgroup_root, &c);
968f3196 985 if (r >= 0) {
63c372cb 986 x = strjoina("OBJECT_SYSTEMD_CGROUP=", c);
968f3196
ZJS
987 IOVEC_SET_STRING(iovec[n++], x);
988
989 r = cg_path_get_session(c, &t);
990 if (r >= 0) {
63c372cb 991 x = strjoina("OBJECT_SYSTEMD_SESSION=", t);
968f3196
ZJS
992 free(t);
993 IOVEC_SET_STRING(iovec[n++], x);
994 }
995
996 if (cg_path_get_owner_uid(c, &owner) >= 0) {
de0671ee 997 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID="UID_FMT, owner);
968f3196
ZJS
998 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
999 }
1000
1001 if (cg_path_get_unit(c, &t) >= 0) {
63c372cb 1002 x = strjoina("OBJECT_SYSTEMD_UNIT=", t);
968f3196 1003 free(t);
19cace37
LP
1004 IOVEC_SET_STRING(iovec[n++], x);
1005 }
1006
1007 if (cg_path_get_user_unit(c, &t) >= 0) {
63c372cb 1008 x = strjoina("OBJECT_SYSTEMD_USER_UNIT=", t);
968f3196 1009 free(t);
968f3196 1010 IOVEC_SET_STRING(iovec[n++], x);
19cace37 1011 }
968f3196 1012
d473176a
LP
1013 if (cg_path_get_slice(c, &t) >= 0) {
1014 x = strjoina("OBJECT_SYSTEMD_SLICE=", t);
1015 free(t);
1016 IOVEC_SET_STRING(iovec[n++], x);
1017 }
1018
1019 if (cg_path_get_user_slice(c, &t) >= 0) {
1020 x = strjoina("OBJECT_SYSTEMD_USER_SLICE=", t);
1021 free(t);
1022 IOVEC_SET_STRING(iovec[n++], x);
1023 }
1024
968f3196
ZJS
1025 free(c);
1026 }
1027 }
1028 assert(n <= m);
d025f1e4
ZJS
1029
1030 if (tv) {
398a50cd 1031 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=" USEC_FMT, timeval_load(tv));
a5693989 1032 IOVEC_SET_STRING(iovec[n++], source_time);
d025f1e4
ZJS
1033 }
1034
1035 /* Note that strictly speaking storing the boot id here is
1036 * redundant since the entry includes this in-line
1037 * anyway. However, we need this indexed, too. */
0c24bb23
LP
1038 if (!isempty(s->boot_id_field))
1039 IOVEC_SET_STRING(iovec[n++], s->boot_id_field);
d025f1e4 1040
0c24bb23
LP
1041 if (!isempty(s->machine_id_field))
1042 IOVEC_SET_STRING(iovec[n++], s->machine_id_field);
d025f1e4 1043
0c24bb23
LP
1044 if (!isempty(s->hostname_field))
1045 IOVEC_SET_STRING(iovec[n++], s->hostname_field);
d025f1e4
ZJS
1046
1047 assert(n <= m);
1048
da499392 1049 if (s->split_mode == SPLIT_UID && realuid > 0)
40adcda8 1050 /* Split up strictly by any UID */
759c945a 1051 journal_uid = realuid;
82499507 1052 else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
edc3797f
LP
1053 /* Split up by login UIDs. We do this only if the
1054 * realuid is not root, in order not to accidentally
1055 * leak privileged information to the user that is
1056 * logged by a privileged process that is part of an
7517e174 1057 * unprivileged session. */
8a0889df 1058 journal_uid = owner;
da499392
KS
1059 else
1060 journal_uid = 0;
759c945a 1061
d07f7b9e 1062 write_to_journal(s, journal_uid, iovec, n, priority);
d025f1e4
ZJS
1063}
1064
1065void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
1066 char mid[11 + 32 + 1];
8a03c9ef
ZJS
1067 struct iovec iovec[N_IOVEC_META_FIELDS + 5 + N_IOVEC_PAYLOAD_FIELDS];
1068 unsigned n = 0, m;
32917e33 1069 int r;
d025f1e4 1070 va_list ap;
b92bea5d 1071 struct ucred ucred = {};
d025f1e4
ZJS
1072
1073 assert(s);
1074 assert(format);
1075
4850d39a 1076 assert_cc(3 == LOG_FAC(LOG_DAEMON));
b6fa2555
EV
1077 IOVEC_SET_STRING(iovec[n++], "SYSLOG_FACILITY=3");
1078 IOVEC_SET_STRING(iovec[n++], "SYSLOG_IDENTIFIER=systemd-journald");
1079
d025f1e4 1080 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
4850d39a 1081 assert_cc(6 == LOG_INFO);
32917e33 1082 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
d025f1e4 1083
3bbaff3e 1084 if (!sd_id128_is_null(message_id)) {
e2cc6eca 1085 snprintf(mid, sizeof(mid), LOG_MESSAGE_ID(message_id));
d025f1e4
ZJS
1086 IOVEC_SET_STRING(iovec[n++], mid);
1087 }
1088
8a03c9ef
ZJS
1089 m = n;
1090
1091 va_start(ap, format);
32917e33
ZJS
1092 r = log_format_iovec(iovec, ELEMENTSOF(iovec), &n, false, 0, format, ap);
1093 /* Error handling below */
8a03c9ef
ZJS
1094 va_end(ap);
1095
d025f1e4
ZJS
1096 ucred.pid = getpid();
1097 ucred.uid = getuid();
1098 ucred.gid = getgid();
1099
32917e33
ZJS
1100 if (r >= 0)
1101 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
8a03c9ef
ZJS
1102
1103 while (m < n)
1104 free(iovec[m++].iov_base);
32917e33
ZJS
1105
1106 if (r < 0) {
1107 /* We failed to format the message. Emit a warning instead. */
1108 char buf[LINE_MAX];
1109
1110 xsprintf(buf, "MESSAGE=Entry printing failed: %s", strerror(-r));
1111
1112 n = 3;
1113 IOVEC_SET_STRING(iovec[n++], "PRIORITY=4");
1114 IOVEC_SET_STRING(iovec[n++], buf);
1115 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
1116 }
d025f1e4
ZJS
1117}
1118
1119void server_dispatch_message(
1120 Server *s,
1121 struct iovec *iovec, unsigned n, unsigned m,
3b3154df
LP
1122 const struct ucred *ucred,
1123 const struct timeval *tv,
d025f1e4
ZJS
1124 const char *label, size_t label_len,
1125 const char *unit_id,
968f3196
ZJS
1126 int priority,
1127 pid_t object_pid) {
d025f1e4 1128
7027ff61 1129 int rl, r;
7fd1b19b 1130 _cleanup_free_ char *path = NULL;
8580d1f7 1131 uint64_t available = 0;
db91ea32 1132 char *c;
d025f1e4
ZJS
1133
1134 assert(s);
1135 assert(iovec || n == 0);
1136
1137 if (n == 0)
1138 return;
1139
1140 if (LOG_PRI(priority) > s->max_level_store)
1141 return;
1142
2f5df74a
HHPF
1143 /* Stop early in case the information will not be stored
1144 * in a journal. */
1145 if (s->storage == STORAGE_NONE)
1146 return;
1147
d025f1e4
ZJS
1148 if (!ucred)
1149 goto finish;
1150
e9174f29 1151 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &path);
7027ff61 1152 if (r < 0)
d025f1e4
ZJS
1153 goto finish;
1154
1155 /* example: /user/lennart/3/foobar
1156 * /system/dbus.service/foobar
1157 *
1158 * So let's cut of everything past the third /, since that is
1159 * where user directories start */
1160
1161 c = strchr(path, '/');
1162 if (c) {
1163 c = strchr(c+1, '/');
1164 if (c) {
1165 c = strchr(c+1, '/');
1166 if (c)
1167 *c = 0;
1168 }
1169 }
1170
3a19f215 1171 (void) determine_space(s, &available, NULL);
8580d1f7 1172 rl = journal_rate_limit_test(s->rate_limit, path, priority & LOG_PRIMASK, available);
db91ea32 1173 if (rl == 0)
d025f1e4 1174 return;
d025f1e4
ZJS
1175
1176 /* Write a suppression message if we suppressed something */
1177 if (rl > 1)
db91ea32 1178 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
8a03c9ef
ZJS
1179 LOG_MESSAGE("Suppressed %u messages from %s", rl - 1, path),
1180 NULL);
d025f1e4
ZJS
1181
1182finish:
d07f7b9e 1183 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, priority, object_pid);
d025f1e4
ZJS
1184}
1185
d025f1e4 1186int server_flush_to_var(Server *s) {
d025f1e4
ZJS
1187 sd_id128_t machine;
1188 sd_journal *j = NULL;
fbb63411
LP
1189 char ts[FORMAT_TIMESPAN_MAX];
1190 usec_t start;
1191 unsigned n = 0;
1192 int r;
d025f1e4
ZJS
1193
1194 assert(s);
1195
1196 if (s->storage != STORAGE_AUTO &&
1197 s->storage != STORAGE_PERSISTENT)
1198 return 0;
1199
1200 if (!s->runtime_journal)
1201 return 0;
1202
8580d1f7 1203 (void) system_journal_open(s, true);
d025f1e4
ZJS
1204
1205 if (!s->system_journal)
1206 return 0;
1207
1208 log_debug("Flushing to /var...");
1209
fbb63411
LP
1210 start = now(CLOCK_MONOTONIC);
1211
d025f1e4 1212 r = sd_id128_get_machine(&machine);
00a16861 1213 if (r < 0)
d025f1e4 1214 return r;
d025f1e4
ZJS
1215
1216 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
23bbb0de
MS
1217 if (r < 0)
1218 return log_error_errno(r, "Failed to read runtime journal: %m");
d025f1e4 1219
93b73b06
LP
1220 sd_journal_set_data_threshold(j, 0);
1221
d025f1e4
ZJS
1222 SD_JOURNAL_FOREACH(j) {
1223 Object *o = NULL;
1224 JournalFile *f;
1225
1226 f = j->current_file;
1227 assert(f && f->current_offset > 0);
1228
fbb63411
LP
1229 n++;
1230
d025f1e4
ZJS
1231 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1232 if (r < 0) {
da927ba9 1233 log_error_errno(r, "Can't read entry: %m");
d025f1e4
ZJS
1234 goto finish;
1235 }
1236
1237 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1238 if (r >= 0)
1239 continue;
1240
1241 if (!shall_try_append_again(s->system_journal, r)) {
da927ba9 1242 log_error_errno(r, "Can't write entry: %m");
d025f1e4
ZJS
1243 goto finish;
1244 }
1245
1246 server_rotate(s);
3a19f215 1247 server_vacuum(s, false);
d025f1e4 1248
253f59df
LP
1249 if (!s->system_journal) {
1250 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1251 r = -EIO;
1252 goto finish;
1253 }
1254
d025f1e4
ZJS
1255 log_debug("Retrying write.");
1256 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1257 if (r < 0) {
da927ba9 1258 log_error_errno(r, "Can't write entry: %m");
d025f1e4
ZJS
1259 goto finish;
1260 }
1261 }
1262
804ae586
LP
1263 r = 0;
1264
d025f1e4
ZJS
1265finish:
1266 journal_file_post_change(s->system_journal);
1267
804ae586 1268 s->runtime_journal = journal_file_close(s->runtime_journal);
d025f1e4
ZJS
1269
1270 if (r >= 0)
c6878637 1271 (void) rm_rf("/run/log/journal", REMOVE_ROOT);
d025f1e4 1272
763c7aa2 1273 sd_journal_close(j);
d025f1e4 1274
8a03c9ef
ZJS
1275 server_driver_message(s, SD_ID128_NULL,
1276 LOG_MESSAGE("Time spent on flushing to /var is %s for %u entries.",
1277 format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0),
1278 n),
1279 NULL);
fbb63411 1280
d025f1e4
ZJS
1281 return r;
1282}
1283
8531ae70 1284int server_process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
f9a810be 1285 Server *s = userdata;
a315ac4e
LP
1286 struct ucred *ucred = NULL;
1287 struct timeval *tv = NULL;
1288 struct cmsghdr *cmsg;
1289 char *label = NULL;
1290 size_t label_len = 0, m;
1291 struct iovec iovec;
1292 ssize_t n;
1293 int *fds = NULL, v = 0;
1294 unsigned n_fds = 0;
1295
1296 union {
1297 struct cmsghdr cmsghdr;
1298
1299 /* We use NAME_MAX space for the SELinux label
1300 * here. The kernel currently enforces no
1301 * limit, but according to suggestions from
1302 * the SELinux people this will change and it
1303 * will probably be identical to NAME_MAX. For
1304 * now we use that, but this should be updated
1305 * one day when the final limit is known. */
1306 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1307 CMSG_SPACE(sizeof(struct timeval)) +
1308 CMSG_SPACE(sizeof(int)) + /* fd */
1309 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1310 } control = {};
1311
1312 union sockaddr_union sa = {};
1313
1314 struct msghdr msghdr = {
1315 .msg_iov = &iovec,
1316 .msg_iovlen = 1,
1317 .msg_control = &control,
1318 .msg_controllen = sizeof(control),
1319 .msg_name = &sa,
1320 .msg_namelen = sizeof(sa),
1321 };
f9a810be 1322
d025f1e4 1323 assert(s);
875c2e22 1324 assert(fd == s->native_fd || fd == s->syslog_fd || fd == s->audit_fd);
f9a810be
LP
1325
1326 if (revents != EPOLLIN) {
1327 log_error("Got invalid event from epoll for datagram fd: %"PRIx32, revents);
1328 return -EIO;
1329 }
1330
a315ac4e
LP
1331 /* Try to get the right size, if we can. (Not all
1332 * sockets support SIOCINQ, hence we just try, but
1333 * don't rely on it. */
1334 (void) ioctl(fd, SIOCINQ, &v);
d025f1e4 1335
a315ac4e
LP
1336 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1337 m = PAGE_ALIGN(MAX3((size_t) v + 1,
1338 (size_t) LINE_MAX,
1339 ALIGN(sizeof(struct nlmsghdr)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH)) + 1);
d025f1e4 1340
a315ac4e
LP
1341 if (!GREEDY_REALLOC(s->buffer, s->buffer_size, m))
1342 return log_oom();
875c2e22 1343
a315ac4e
LP
1344 iovec.iov_base = s->buffer;
1345 iovec.iov_len = s->buffer_size - 1; /* Leave room for trailing NUL we add later */
d025f1e4 1346
a315ac4e
LP
1347 n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1348 if (n < 0) {
1349 if (errno == EINTR || errno == EAGAIN)
1350 return 0;
875c2e22 1351
a315ac4e
LP
1352 return log_error_errno(errno, "recvmsg() failed: %m");
1353 }
875c2e22 1354
a315ac4e
LP
1355 CMSG_FOREACH(cmsg, &msghdr) {
1356
1357 if (cmsg->cmsg_level == SOL_SOCKET &&
1358 cmsg->cmsg_type == SCM_CREDENTIALS &&
1359 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1360 ucred = (struct ucred*) CMSG_DATA(cmsg);
1361 else if (cmsg->cmsg_level == SOL_SOCKET &&
1362 cmsg->cmsg_type == SCM_SECURITY) {
1363 label = (char*) CMSG_DATA(cmsg);
1364 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1365 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1366 cmsg->cmsg_type == SO_TIMESTAMP &&
1367 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1368 tv = (struct timeval*) CMSG_DATA(cmsg);
1369 else if (cmsg->cmsg_level == SOL_SOCKET &&
1370 cmsg->cmsg_type == SCM_RIGHTS) {
1371 fds = (int*) CMSG_DATA(cmsg);
1372 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
d025f1e4 1373 }
a315ac4e 1374 }
d025f1e4 1375
a315ac4e
LP
1376 /* And a trailing NUL, just in case */
1377 s->buffer[n] = 0;
1378
1379 if (fd == s->syslog_fd) {
1380 if (n > 0 && n_fds == 0)
1381 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1382 else if (n_fds > 0)
1383 log_warning("Got file descriptors via syslog socket. Ignoring.");
1384
1385 } else if (fd == s->native_fd) {
1386 if (n > 0 && n_fds == 0)
1387 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1388 else if (n == 0 && n_fds == 1)
1389 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1390 else if (n_fds > 0)
1391 log_warning("Got too many file descriptors via native socket. Ignoring.");
1392
1393 } else {
1394 assert(fd == s->audit_fd);
1395
1396 if (n > 0 && n_fds == 0)
1397 server_process_audit_message(s, s->buffer, n, ucred, &sa, msghdr.msg_namelen);
1398 else if (n_fds > 0)
1399 log_warning("Got file descriptors via audit socket. Ignoring.");
f9a810be 1400 }
a315ac4e
LP
1401
1402 close_many(fds, n_fds);
1403 return 0;
f9a810be 1404}
d025f1e4 1405
f9a810be
LP
1406static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1407 Server *s = userdata;
33d52ab9 1408 int r;
d025f1e4 1409
f9a810be 1410 assert(s);
d025f1e4 1411
94b65516 1412 log_info("Received request to flush runtime journal from PID " PID_FMT, si->ssi_pid);
d025f1e4 1413
929eeb54 1414 (void) server_flush_to_var(s);
f9a810be 1415 server_sync(s);
3a19f215 1416 server_vacuum(s, false);
d025f1e4 1417
33d52ab9
LP
1418 r = touch("/run/systemd/journal/flushed");
1419 if (r < 0)
1420 log_warning_errno(r, "Failed to touch /run/systemd/journal/flushed, ignoring: %m");
74055aa7 1421
18e758bf 1422 server_space_usage_message(s, NULL);
f9a810be
LP
1423 return 0;
1424}
d025f1e4 1425
f9a810be
LP
1426static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1427 Server *s = userdata;
33d52ab9 1428 int r;
d025f1e4 1429
f9a810be 1430 assert(s);
d025f1e4 1431
94b65516 1432 log_info("Received request to rotate journal from PID " PID_FMT, si->ssi_pid);
f9a810be 1433 server_rotate(s);
3a19f215
FB
1434 server_vacuum(s, true);
1435
1436 if (s->system_journal)
1437 patch_min_use(&s->system_storage);
1438 if (s->runtime_journal)
1439 patch_min_use(&s->runtime_storage);
d025f1e4 1440
dbd6e31c 1441 /* Let clients know when the most recent rotation happened. */
33d52ab9
LP
1442 r = write_timestamp_file_atomic("/run/systemd/journal/rotated", now(CLOCK_MONOTONIC));
1443 if (r < 0)
1444 log_warning_errno(r, "Failed to write /run/systemd/journal/rotated, ignoring: %m");
dbd6e31c 1445
f9a810be
LP
1446 return 0;
1447}
d025f1e4 1448
f9a810be
LP
1449static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1450 Server *s = userdata;
d025f1e4 1451
f9a810be 1452 assert(s);
d025f1e4 1453
4daf54a8 1454 log_received_signal(LOG_INFO, si);
d025f1e4 1455
6203e07a 1456 sd_event_exit(s->event, 0);
d025f1e4
ZJS
1457 return 0;
1458}
1459
94b65516
LP
1460static int dispatch_sigrtmin1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1461 Server *s = userdata;
33d52ab9 1462 int r;
94b65516
LP
1463
1464 assert(s);
1465
1466 log_debug("Received request to sync from PID " PID_FMT, si->ssi_pid);
1467
1468 server_sync(s);
1469
1470 /* Let clients know when the most recent sync happened. */
33d52ab9
LP
1471 r = write_timestamp_file_atomic("/run/systemd/journal/synced", now(CLOCK_MONOTONIC));
1472 if (r < 0)
1473 log_warning_errno(r, "Failed to write /run/systemd/journal/synced, ignoring: %m");
94b65516
LP
1474
1475 return 0;
1476}
1477
f9a810be 1478static int setup_signals(Server *s) {
f9a810be 1479 int r;
d025f1e4
ZJS
1480
1481 assert(s);
1482
9bab3b65 1483 assert_se(sigprocmask_many(SIG_SETMASK, NULL, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, SIGRTMIN+1, -1) >= 0);
d025f1e4 1484
151b9b96 1485 r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
f9a810be
LP
1486 if (r < 0)
1487 return r;
1488
151b9b96 1489 r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s);
f9a810be
LP
1490 if (r < 0)
1491 return r;
d025f1e4 1492
151b9b96 1493 r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s);
f9a810be
LP
1494 if (r < 0)
1495 return r;
d025f1e4 1496
b374689c
LP
1497 /* Let's process SIGTERM late, so that we flush all queued
1498 * messages to disk before we exit */
1499 r = sd_event_source_set_priority(s->sigterm_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1500 if (r < 0)
1501 return r;
1502
1503 /* When journald is invoked on the terminal (when debugging),
1504 * it's useful if C-c is handled equivalent to SIGTERM. */
151b9b96 1505 r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s);
f9a810be
LP
1506 if (r < 0)
1507 return r;
d025f1e4 1508
b374689c
LP
1509 r = sd_event_source_set_priority(s->sigint_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1510 if (r < 0)
1511 return r;
1512
94b65516
LP
1513 /* SIGRTMIN+1 causes an immediate sync. We process this very
1514 * late, so that everything else queued at this point is
1515 * really written to disk. Clients can watch
1516 * /run/systemd/journal/synced with inotify until its mtime
1517 * changes to see when a sync happened. */
1518 r = sd_event_add_signal(s->event, &s->sigrtmin1_event_source, SIGRTMIN+1, dispatch_sigrtmin1, s);
1519 if (r < 0)
1520 return r;
1521
1522 r = sd_event_source_set_priority(s->sigrtmin1_event_source, SD_EVENT_PRIORITY_NORMAL+15);
1523 if (r < 0)
1524 return r;
1525
d025f1e4
ZJS
1526 return 0;
1527}
1528
5707ecf3
ZJS
1529static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
1530 Server *s = data;
74df0fca 1531 int r;
d025f1e4 1532
5707ecf3 1533 assert(s);
d025f1e4 1534
5707ecf3
ZJS
1535 if (streq(key, "systemd.journald.forward_to_syslog")) {
1536 r = value ? parse_boolean(value) : true;
d581d9d9 1537 if (r < 0)
5707ecf3
ZJS
1538 log_warning("Failed to parse forward to syslog switch \"%s\". Ignoring.", value);
1539 else
1540 s->forward_to_syslog = r;
1541 } else if (streq(key, "systemd.journald.forward_to_kmsg")) {
1542 r = value ? parse_boolean(value) : true;
1543 if (r < 0)
1544 log_warning("Failed to parse forward to kmsg switch \"%s\". Ignoring.", value);
1545 else
1546 s->forward_to_kmsg = r;
1547 } else if (streq(key, "systemd.journald.forward_to_console")) {
1548 r = value ? parse_boolean(value) : true;
1549 if (r < 0)
1550 log_warning("Failed to parse forward to console switch \"%s\". Ignoring.", value);
1551 else
1552 s->forward_to_console = r;
1553 } else if (streq(key, "systemd.journald.forward_to_wall")) {
1554 r = value ? parse_boolean(value) : true;
1555 if (r < 0)
1556 log_warning("Failed to parse forward to wall switch \"%s\". Ignoring.", value);
1557 else
1558 s->forward_to_wall = r;
1559 } else if (streq(key, "systemd.journald.max_level_console") && value) {
1560 r = log_level_from_string(value);
1561 if (r < 0)
1562 log_warning("Failed to parse max level console value \"%s\". Ignoring.", value);
1563 else
1564 s->max_level_console = r;
1565 } else if (streq(key, "systemd.journald.max_level_store") && value) {
1566 r = log_level_from_string(value);
1567 if (r < 0)
1568 log_warning("Failed to parse max level store value \"%s\". Ignoring.", value);
1569 else
1570 s->max_level_store = r;
1571 } else if (streq(key, "systemd.journald.max_level_syslog") && value) {
1572 r = log_level_from_string(value);
1573 if (r < 0)
1574 log_warning("Failed to parse max level syslog value \"%s\". Ignoring.", value);
1575 else
1576 s->max_level_syslog = r;
1577 } else if (streq(key, "systemd.journald.max_level_kmsg") && value) {
1578 r = log_level_from_string(value);
1579 if (r < 0)
1580 log_warning("Failed to parse max level kmsg value \"%s\". Ignoring.", value);
1581 else
1582 s->max_level_kmsg = r;
1583 } else if (streq(key, "systemd.journald.max_level_wall") && value) {
1584 r = log_level_from_string(value);
1585 if (r < 0)
1586 log_warning("Failed to parse max level wall value \"%s\". Ignoring.", value);
1587 else
1588 s->max_level_wall = r;
1589 } else if (startswith(key, "systemd.journald"))
1590 log_warning("Unknown journald kernel command line option \"%s\". Ignoring.", key);
d025f1e4 1591
804ae586 1592 /* do not warn about state here, since probably systemd already did */
db91ea32 1593 return 0;
d025f1e4
ZJS
1594}
1595
1596static int server_parse_config_file(Server *s) {
d025f1e4
ZJS
1597 assert(s);
1598
43688c49 1599 return config_parse_many_nulstr(PKGSYSCONFDIR "/journald.conf",
75eb6154 1600 CONF_PATHS_NULSTR("systemd/journald.conf.d"),
a9edaeff
JT
1601 "Journal\0",
1602 config_item_perf_lookup, journald_gperf_lookup,
1603 false, s);
d025f1e4
ZJS
1604}
1605
f9a810be
LP
1606static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
1607 Server *s = userdata;
26687bf8
OS
1608
1609 assert(s);
1610
f9a810be 1611 server_sync(s);
26687bf8
OS
1612 return 0;
1613}
1614
d07f7b9e 1615int server_schedule_sync(Server *s, int priority) {
26687bf8
OS
1616 int r;
1617
26687bf8
OS
1618 assert(s);
1619
d07f7b9e
LP
1620 if (priority <= LOG_CRIT) {
1621 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1622 server_sync(s);
1623 return 0;
1624 }
1625
26687bf8
OS
1626 if (s->sync_scheduled)
1627 return 0;
1628
f9a810be
LP
1629 if (s->sync_interval_usec > 0) {
1630 usec_t when;
ca267016 1631
6a0f1f6d 1632 r = sd_event_now(s->event, CLOCK_MONOTONIC, &when);
f9a810be
LP
1633 if (r < 0)
1634 return r;
26687bf8 1635
f9a810be
LP
1636 when += s->sync_interval_usec;
1637
1638 if (!s->sync_event_source) {
6a0f1f6d
LP
1639 r = sd_event_add_time(
1640 s->event,
1641 &s->sync_event_source,
1642 CLOCK_MONOTONIC,
1643 when, 0,
1644 server_dispatch_sync, s);
f9a810be
LP
1645 if (r < 0)
1646 return r;
1647
1648 r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
1649 } else {
1650 r = sd_event_source_set_time(s->sync_event_source, when);
1651 if (r < 0)
1652 return r;
1653
1654 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
1655 }
26687bf8 1656 if (r < 0)
f9a810be 1657 return r;
26687bf8 1658
f9a810be
LP
1659 s->sync_scheduled = true;
1660 }
26687bf8
OS
1661
1662 return 0;
1663}
1664
0c24bb23
LP
1665static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1666 Server *s = userdata;
1667
1668 assert(s);
1669
1670 server_cache_hostname(s);
1671 return 0;
1672}
1673
1674static int server_open_hostname(Server *s) {
1675 int r;
1676
1677 assert(s);
1678
1679 s->hostname_fd = open("/proc/sys/kernel/hostname", O_RDONLY|O_CLOEXEC|O_NDELAY|O_NOCTTY);
4a62c710
MS
1680 if (s->hostname_fd < 0)
1681 return log_error_errno(errno, "Failed to open /proc/sys/kernel/hostname: %m");
0c24bb23 1682
151b9b96 1683 r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s);
0c24bb23 1684 if (r < 0) {
28def94c
DR
1685 /* kernels prior to 3.2 don't support polling this file. Ignore
1686 * the failure. */
1687 if (r == -EPERM) {
e53fc357 1688 log_warning_errno(r, "Failed to register hostname fd in event loop, ignoring: %m");
03e334a1 1689 s->hostname_fd = safe_close(s->hostname_fd);
28def94c
DR
1690 return 0;
1691 }
1692
23bbb0de 1693 return log_error_errno(r, "Failed to register hostname fd in event loop: %m");
0c24bb23
LP
1694 }
1695
1696 r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
23bbb0de
MS
1697 if (r < 0)
1698 return log_error_errno(r, "Failed to adjust priority of host name event source: %m");
0c24bb23
LP
1699
1700 return 0;
1701}
1702
e22aa3d3
LP
1703static int dispatch_notify_event(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1704 Server *s = userdata;
1705 int r;
1706
1707 assert(s);
1708 assert(s->notify_event_source == es);
1709 assert(s->notify_fd == fd);
1710
e22aa3d3 1711 /* The $NOTIFY_SOCKET is writable again, now send exactly one
dd835265 1712 * message on it. Either it's the watchdog event, the initial
119e9655
LP
1713 * READY=1 event or an stdout stream event. If there's nothing
1714 * to write anymore, turn our event source off. The next time
1715 * there's something to send it will be turned on again. */
e22aa3d3
LP
1716
1717 if (!s->sent_notify_ready) {
1718 static const char p[] =
1719 "READY=1\n"
1720 "STATUS=Processing requests...";
1721 ssize_t l;
1722
1723 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1724 if (l < 0) {
1725 if (errno == EAGAIN)
1726 return 0;
1727
1728 return log_error_errno(errno, "Failed to send READY=1 notification message: %m");
1729 }
1730
1731 s->sent_notify_ready = true;
1732 log_debug("Sent READY=1 notification.");
1733
119e9655
LP
1734 } else if (s->send_watchdog) {
1735
1736 static const char p[] =
1737 "WATCHDOG=1";
1738
1739 ssize_t l;
1740
1741 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1742 if (l < 0) {
1743 if (errno == EAGAIN)
1744 return 0;
1745
1746 return log_error_errno(errno, "Failed to send WATCHDOG=1 notification message: %m");
1747 }
1748
1749 s->send_watchdog = false;
1750 log_debug("Sent WATCHDOG=1 notification.");
1751
e22aa3d3
LP
1752 } else if (s->stdout_streams_notify_queue)
1753 /* Dispatch one stream notification event */
1754 stdout_stream_send_notify(s->stdout_streams_notify_queue);
1755
61233823 1756 /* Leave us enabled if there's still more to do. */
119e9655 1757 if (s->send_watchdog || s->stdout_streams_notify_queue)
e22aa3d3
LP
1758 return 0;
1759
1760 /* There was nothing to do anymore, let's turn ourselves off. */
1761 r = sd_event_source_set_enabled(es, SD_EVENT_OFF);
1762 if (r < 0)
1763 return log_error_errno(r, "Failed to turn off notify event source: %m");
1764
1765 return 0;
1766}
1767
119e9655
LP
1768static int dispatch_watchdog(sd_event_source *es, uint64_t usec, void *userdata) {
1769 Server *s = userdata;
1770 int r;
1771
1772 assert(s);
1773
1774 s->send_watchdog = true;
1775
1776 r = sd_event_source_set_enabled(s->notify_event_source, SD_EVENT_ON);
1777 if (r < 0)
1778 log_warning_errno(r, "Failed to turn on notify event source: %m");
1779
1780 r = sd_event_source_set_time(s->watchdog_event_source, usec + s->watchdog_usec / 2);
1781 if (r < 0)
1782 return log_error_errno(r, "Failed to restart watchdog event source: %m");
1783
1784 r = sd_event_source_set_enabled(s->watchdog_event_source, SD_EVENT_ON);
1785 if (r < 0)
1786 return log_error_errno(r, "Failed to enable watchdog event source: %m");
1787
1788 return 0;
1789}
1790
e22aa3d3
LP
1791static int server_connect_notify(Server *s) {
1792 union sockaddr_union sa = {
1793 .un.sun_family = AF_UNIX,
1794 };
1795 const char *e;
1796 int r;
1797
1798 assert(s);
1799 assert(s->notify_fd < 0);
1800 assert(!s->notify_event_source);
1801
1802 /*
1803 So here's the problem: we'd like to send notification
1804 messages to PID 1, but we cannot do that via sd_notify(),
1805 since that's synchronous, and we might end up blocking on
1806 it. Specifically: given that PID 1 might block on
1807 dbus-daemon during IPC, and dbus-daemon is logging to us,
1808 and might hence block on us, we might end up in a deadlock
ccddd104 1809 if we block on sending PID 1 notification messages — by
e22aa3d3
LP
1810 generating a full blocking circle. To avoid this, let's
1811 create a non-blocking socket, and connect it to the
1812 notification socket, and then wait for POLLOUT before we
1813 send anything. This should efficiently avoid any deadlocks,
1814 as we'll never block on PID 1, hence PID 1 can safely block
1815 on dbus-daemon which can safely block on us again.
1816
1817 Don't think that this issue is real? It is, see:
1818 https://github.com/systemd/systemd/issues/1505
1819 */
1820
1821 e = getenv("NOTIFY_SOCKET");
1822 if (!e)
1823 return 0;
1824
1825 if ((e[0] != '@' && e[0] != '/') || e[1] == 0) {
1826 log_error("NOTIFY_SOCKET set to an invalid value: %s", e);
1827 return -EINVAL;
1828 }
1829
1830 if (strlen(e) > sizeof(sa.un.sun_path)) {
1831 log_error("NOTIFY_SOCKET path too long: %s", e);
1832 return -EINVAL;
1833 }
1834
1835 s->notify_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
1836 if (s->notify_fd < 0)
1837 return log_error_errno(errno, "Failed to create notify socket: %m");
1838
1839 (void) fd_inc_sndbuf(s->notify_fd, NOTIFY_SNDBUF_SIZE);
1840
1841 strncpy(sa.un.sun_path, e, sizeof(sa.un.sun_path));
1842 if (sa.un.sun_path[0] == '@')
1843 sa.un.sun_path[0] = 0;
1844
fc2fffe7 1845 r = connect(s->notify_fd, &sa.sa, SOCKADDR_UN_LEN(sa.un));
e22aa3d3
LP
1846 if (r < 0)
1847 return log_error_errno(errno, "Failed to connect to notify socket: %m");
1848
1849 r = sd_event_add_io(s->event, &s->notify_event_source, s->notify_fd, EPOLLOUT, dispatch_notify_event, s);
1850 if (r < 0)
1851 return log_error_errno(r, "Failed to watch notification socket: %m");
1852
119e9655
LP
1853 if (sd_watchdog_enabled(false, &s->watchdog_usec) > 0) {
1854 s->send_watchdog = true;
1855
4de2402b 1856 r = sd_event_add_time(s->event, &s->watchdog_event_source, CLOCK_MONOTONIC, now(CLOCK_MONOTONIC) + s->watchdog_usec/2, s->watchdog_usec/4, dispatch_watchdog, s);
119e9655
LP
1857 if (r < 0)
1858 return log_error_errno(r, "Failed to add watchdog time event: %m");
1859 }
1860
e22aa3d3
LP
1861 /* This should fire pretty soon, which we'll use to send the
1862 * READY=1 event. */
1863
1864 return 0;
1865}
1866
d025f1e4 1867int server_init(Server *s) {
13790add 1868 _cleanup_fdset_free_ FDSet *fds = NULL;
d025f1e4 1869 int n, r, fd;
7d18d348 1870 bool no_sockets;
d025f1e4
ZJS
1871
1872 assert(s);
1873
1874 zero(*s);
e22aa3d3 1875 s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->audit_fd = s->hostname_fd = s->notify_fd = -1;
d025f1e4
ZJS
1876 s->compress = true;
1877 s->seal = true;
1878
119e9655
LP
1879 s->watchdog_usec = USEC_INFINITY;
1880
26687bf8
OS
1881 s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1882 s->sync_scheduled = false;
1883
d025f1e4
ZJS
1884 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1885 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1886
40b71e89 1887 s->forward_to_wall = true;
d025f1e4 1888
e150e820
MB
1889 s->max_file_usec = DEFAULT_MAX_FILE_USEC;
1890
d025f1e4
ZJS
1891 s->max_level_store = LOG_DEBUG;
1892 s->max_level_syslog = LOG_DEBUG;
1893 s->max_level_kmsg = LOG_NOTICE;
1894 s->max_level_console = LOG_INFO;
40b71e89 1895 s->max_level_wall = LOG_EMERG;
d025f1e4 1896
266a4700
FB
1897 journal_reset_metrics(&s->system_storage.metrics);
1898 journal_reset_metrics(&s->runtime_storage.metrics);
d025f1e4
ZJS
1899
1900 server_parse_config_file(s);
d7f69e16 1901 parse_proc_cmdline(parse_proc_cmdline_item, s, true);
8580d1f7 1902
d288f79f 1903 if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
b1389b0d
ZJS
1904 log_debug("Setting both rate limit interval and burst from "USEC_FMT",%u to 0,0",
1905 s->rate_limit_interval, s->rate_limit_burst);
d288f79f
ZJS
1906 s->rate_limit_interval = s->rate_limit_burst = 0;
1907 }
d025f1e4 1908
8580d1f7 1909 (void) mkdir_p("/run/systemd/journal", 0755);
d025f1e4 1910
43cf8388 1911 s->user_journals = ordered_hashmap_new(NULL);
d025f1e4
ZJS
1912 if (!s->user_journals)
1913 return log_oom();
1914
1915 s->mmap = mmap_cache_new();
1916 if (!s->mmap)
1917 return log_oom();
1918
b58c888f
VC
1919 s->deferred_closes = set_new(NULL);
1920 if (!s->deferred_closes)
1921 return log_oom();
1922
f9a810be 1923 r = sd_event_default(&s->event);
23bbb0de
MS
1924 if (r < 0)
1925 return log_error_errno(r, "Failed to create event loop: %m");
d025f1e4
ZJS
1926
1927 n = sd_listen_fds(true);
23bbb0de
MS
1928 if (n < 0)
1929 return log_error_errno(n, "Failed to read listening file descriptors from environment: %m");
d025f1e4
ZJS
1930
1931 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1932
1933 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1934
1935 if (s->native_fd >= 0) {
1936 log_error("Too many native sockets passed.");
1937 return -EINVAL;
1938 }
1939
1940 s->native_fd = fd;
1941
1942 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1943
1944 if (s->stdout_fd >= 0) {
1945 log_error("Too many stdout sockets passed.");
1946 return -EINVAL;
1947 }
1948
1949 s->stdout_fd = fd;
1950
03ee5c38
LP
1951 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0 ||
1952 sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/dev-log", 0) > 0) {
d025f1e4
ZJS
1953
1954 if (s->syslog_fd >= 0) {
1955 log_error("Too many /dev/log sockets passed.");
1956 return -EINVAL;
1957 }
1958
1959 s->syslog_fd = fd;
1960
875c2e22
LP
1961 } else if (sd_is_socket(fd, AF_NETLINK, SOCK_RAW, -1) > 0) {
1962
1963 if (s->audit_fd >= 0) {
1964 log_error("Too many audit sockets passed.");
1965 return -EINVAL;
1966 }
1967
1968 s->audit_fd = fd;
1969
4ec3cd73 1970 } else {
4ec3cd73 1971
13790add
LP
1972 if (!fds) {
1973 fds = fdset_new();
1974 if (!fds)
1975 return log_oom();
1976 }
4ec3cd73 1977
13790add
LP
1978 r = fdset_put(fds, fd);
1979 if (r < 0)
1980 return log_oom();
4ec3cd73 1981 }
d025f1e4
ZJS
1982 }
1983
15d91bff
ZJS
1984 /* Try to restore streams, but don't bother if this fails */
1985 (void) server_restore_streams(s, fds);
d025f1e4 1986
13790add
LP
1987 if (fdset_size(fds) > 0) {
1988 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds));
1989 fds = fdset_free(fds);
1990 }
1991
7d18d348
ZJS
1992 no_sockets = s->native_fd < 0 && s->stdout_fd < 0 && s->syslog_fd < 0 && s->audit_fd < 0;
1993
1994 /* always open stdout, syslog, native, and kmsg sockets */
37b7affe
ZJS
1995
1996 /* systemd-journald.socket: /run/systemd/journal/stdout */
15d91bff
ZJS
1997 r = server_open_stdout_socket(s);
1998 if (r < 0)
1999 return r;
2000
37b7affe 2001 /* systemd-journald-dev-log.socket: /run/systemd/journal/dev-log */
13790add 2002 r = server_open_syslog_socket(s);
d025f1e4
ZJS
2003 if (r < 0)
2004 return r;
2005
37b7affe 2006 /* systemd-journald.socket: /run/systemd/journal/socket */
13790add 2007 r = server_open_native_socket(s);
d025f1e4
ZJS
2008 if (r < 0)
2009 return r;
2010
37b7affe 2011 /* /dev/ksmg */
d025f1e4
ZJS
2012 r = server_open_dev_kmsg(s);
2013 if (r < 0)
2014 return r;
2015
7d18d348
ZJS
2016 /* Unless we got *some* sockets and not audit, open audit socket */
2017 if (s->audit_fd >= 0 || no_sockets) {
2018 r = server_open_audit(s);
2019 if (r < 0)
2020 return r;
2021 }
875c2e22 2022
d025f1e4
ZJS
2023 r = server_open_kernel_seqnum(s);
2024 if (r < 0)
2025 return r;
2026
0c24bb23
LP
2027 r = server_open_hostname(s);
2028 if (r < 0)
2029 return r;
2030
f9a810be 2031 r = setup_signals(s);
d025f1e4
ZJS
2032 if (r < 0)
2033 return r;
2034
2035 s->udev = udev_new();
2036 if (!s->udev)
2037 return -ENOMEM;
2038
f9a810be 2039 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
d025f1e4
ZJS
2040 if (!s->rate_limit)
2041 return -ENOMEM;
2042
e9174f29
LP
2043 r = cg_get_root_path(&s->cgroup_root);
2044 if (r < 0)
2045 return r;
2046
0c24bb23
LP
2047 server_cache_hostname(s);
2048 server_cache_boot_id(s);
2049 server_cache_machine_id(s);
2050
266a4700
FB
2051 s->runtime_storage.name = "Runtime journal";
2052 s->system_storage.name = "System journal";
2053
605405c6
ZJS
2054 s->runtime_storage.path = strjoin("/run/log/journal/", SERVER_MACHINE_ID(s));
2055 s->system_storage.path = strjoin("/var/log/journal/", SERVER_MACHINE_ID(s));
266a4700
FB
2056 if (!s->runtime_storage.path || !s->system_storage.path)
2057 return -ENOMEM;
2058
e22aa3d3
LP
2059 (void) server_connect_notify(s);
2060
804ae586 2061 return system_journal_open(s, false);
d025f1e4
ZJS
2062}
2063
2064void server_maybe_append_tags(Server *s) {
2065#ifdef HAVE_GCRYPT
2066 JournalFile *f;
2067 Iterator i;
2068 usec_t n;
2069
2070 n = now(CLOCK_REALTIME);
2071
2072 if (s->system_journal)
2073 journal_file_maybe_append_tag(s->system_journal, n);
2074
43cf8388 2075 ORDERED_HASHMAP_FOREACH(f, s->user_journals, i)
d025f1e4
ZJS
2076 journal_file_maybe_append_tag(f, n);
2077#endif
2078}
2079
2080void server_done(Server *s) {
2081 JournalFile *f;
2082 assert(s);
2083
b58c888f
VC
2084 if (s->deferred_closes) {
2085 journal_file_close_set(s->deferred_closes);
2086 set_free(s->deferred_closes);
2087 }
2088
d025f1e4
ZJS
2089 while (s->stdout_streams)
2090 stdout_stream_free(s->stdout_streams);
2091
2092 if (s->system_journal)
69a3a6fd 2093 (void) journal_file_close(s->system_journal);
d025f1e4
ZJS
2094
2095 if (s->runtime_journal)
69a3a6fd 2096 (void) journal_file_close(s->runtime_journal);
d025f1e4 2097
43cf8388 2098 while ((f = ordered_hashmap_steal_first(s->user_journals)))
69a3a6fd 2099 (void) journal_file_close(f);
d025f1e4 2100
43cf8388 2101 ordered_hashmap_free(s->user_journals);
d025f1e4 2102
f9a810be
LP
2103 sd_event_source_unref(s->syslog_event_source);
2104 sd_event_source_unref(s->native_event_source);
2105 sd_event_source_unref(s->stdout_event_source);
2106 sd_event_source_unref(s->dev_kmsg_event_source);
875c2e22 2107 sd_event_source_unref(s->audit_event_source);
f9a810be
LP
2108 sd_event_source_unref(s->sync_event_source);
2109 sd_event_source_unref(s->sigusr1_event_source);
2110 sd_event_source_unref(s->sigusr2_event_source);
2111 sd_event_source_unref(s->sigterm_event_source);
2112 sd_event_source_unref(s->sigint_event_source);
94b65516 2113 sd_event_source_unref(s->sigrtmin1_event_source);
0c24bb23 2114 sd_event_source_unref(s->hostname_event_source);
e22aa3d3 2115 sd_event_source_unref(s->notify_event_source);
119e9655 2116 sd_event_source_unref(s->watchdog_event_source);
f9a810be 2117 sd_event_unref(s->event);
d025f1e4 2118
03e334a1
LP
2119 safe_close(s->syslog_fd);
2120 safe_close(s->native_fd);
2121 safe_close(s->stdout_fd);
2122 safe_close(s->dev_kmsg_fd);
875c2e22 2123 safe_close(s->audit_fd);
03e334a1 2124 safe_close(s->hostname_fd);
e22aa3d3 2125 safe_close(s->notify_fd);
0c24bb23 2126
d025f1e4
ZJS
2127 if (s->rate_limit)
2128 journal_rate_limit_free(s->rate_limit);
2129
2130 if (s->kernel_seqnum)
2131 munmap(s->kernel_seqnum, sizeof(uint64_t));
2132
2133 free(s->buffer);
2134 free(s->tty_path);
e9174f29 2135 free(s->cgroup_root);
99d0966e 2136 free(s->hostname_field);
d025f1e4
ZJS
2137
2138 if (s->mmap)
2139 mmap_cache_unref(s->mmap);
2140
3e044c49 2141 udev_unref(s->udev);
d025f1e4 2142}
8580d1f7
LP
2143
2144static const char* const storage_table[_STORAGE_MAX] = {
2145 [STORAGE_AUTO] = "auto",
2146 [STORAGE_VOLATILE] = "volatile",
2147 [STORAGE_PERSISTENT] = "persistent",
2148 [STORAGE_NONE] = "none"
2149};
2150
2151DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
2152DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
2153
2154static const char* const split_mode_table[_SPLIT_MAX] = {
2155 [SPLIT_LOGIN] = "login",
2156 [SPLIT_UID] = "uid",
2157 [SPLIT_NONE] = "none",
2158};
2159
2160DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
2161DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");