]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/journal/journald-server.c
journal: introduce cache_space_invalidate()
[thirdparty/systemd.git] / src / journal / journald-server.c
CommitLineData
d025f1e4
ZJS
1/***
2 This file is part of systemd.
3
4 Copyright 2011 Lennart Poettering
5
6 systemd is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
10
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public License
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
18***/
19
24882e06
LP
20#ifdef HAVE_SELINUX
21#include <selinux/selinux.h>
22#endif
8580d1f7
LP
23#include <sys/ioctl.h>
24#include <sys/mman.h>
25#include <sys/signalfd.h>
26#include <sys/statvfs.h>
07630cea 27#include <linux/sockios.h>
24882e06 28
b4bbcaa9 29#include "libudev.h"
8580d1f7 30#include "sd-daemon.h"
74df0fca
LP
31#include "sd-journal.h"
32#include "sd-messages.h"
8580d1f7
LP
33
34#include "acl-util.h"
b5efdb8a 35#include "alloc-util.h"
430f0182 36#include "audit-util.h"
d025f1e4 37#include "cgroup-util.h"
d025f1e4 38#include "conf-parser.h"
a0956174 39#include "dirent-util.h"
0dec689b 40#include "extract-word.h"
3ffd4af2 41#include "fd-util.h"
33d52ab9 42#include "fileio.h"
958b66ea 43#include "formats-util.h"
f4f15635 44#include "fs-util.h"
8580d1f7 45#include "hashmap.h"
958b66ea 46#include "hostname-util.h"
4b58153d 47#include "id128-util.h"
afc5dbf3 48#include "io-util.h"
8580d1f7
LP
49#include "journal-authenticate.h"
50#include "journal-file.h"
d025f1e4
ZJS
51#include "journal-internal.h"
52#include "journal-vacuum.h"
8580d1f7 53#include "journald-audit.h"
d025f1e4 54#include "journald-kmsg.h"
d025f1e4 55#include "journald-native.h"
8580d1f7 56#include "journald-rate-limit.h"
3ffd4af2 57#include "journald-server.h"
8580d1f7
LP
58#include "journald-stream.h"
59#include "journald-syslog.h"
4b58153d 60#include "log.h"
07630cea
LP
61#include "missing.h"
62#include "mkdir.h"
6bedfcbb 63#include "parse-util.h"
4e731273 64#include "proc-cmdline.h"
07630cea
LP
65#include "process-util.h"
66#include "rm-rf.h"
67#include "selinux-util.h"
68#include "signal-util.h"
69#include "socket-util.h"
32917e33 70#include "stdio-util.h"
8b43440b 71#include "string-table.h"
07630cea 72#include "string-util.h"
4a0b58c4 73#include "user-util.h"
d025f1e4 74
d025f1e4
ZJS
75#define USER_JOURNALS_MAX 1024
76
26687bf8 77#define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
7f1ad696
LP
78#define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
79#define DEFAULT_RATE_LIMIT_BURST 1000
e150e820 80#define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
d025f1e4 81
8580d1f7 82#define RECHECK_SPACE_USEC (30*USEC_PER_SEC)
d025f1e4 83
e22aa3d3
LP
84#define NOTIFY_SNDBUF_SIZE (8*1024*1024)
85
7a24f3bf
VC
86/* The period to insert between posting changes for coalescing */
87#define POST_CHANGE_TIMER_INTERVAL_USEC (250*USEC_PER_MSEC)
88
e0ed6db9
FB
89static int determine_path_usage(Server *s, const char *path, uint64_t *ret_used, uint64_t *ret_free) {
90 _cleanup_closedir_ DIR *d = NULL;
91 struct dirent *de;
92 struct statvfs ss;
e0ed6db9
FB
93
94 assert(ret_used);
95 assert(ret_free);
96
266a4700 97 d = opendir(path);
e0ed6db9
FB
98 if (!d)
99 return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR,
266a4700 100 errno, "Failed to open %s: %m", path);
e0ed6db9
FB
101
102 if (fstatvfs(dirfd(d), &ss) < 0)
266a4700 103 return log_error_errno(errno, "Failed to fstatvfs(%s): %m", path);
e0ed6db9
FB
104
105 *ret_free = ss.f_bsize * ss.f_bavail;
106 *ret_used = 0;
107 FOREACH_DIRENT_ALL(de, d, break) {
108 struct stat st;
109
110 if (!endswith(de->d_name, ".journal") &&
111 !endswith(de->d_name, ".journal~"))
112 continue;
113
114 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) {
266a4700 115 log_debug_errno(errno, "Failed to stat %s/%s, ignoring: %m", path, de->d_name);
e0ed6db9
FB
116 continue;
117 }
118
119 if (!S_ISREG(st.st_mode))
120 continue;
121
122 *ret_used += (uint64_t) st.st_blocks * 512UL;
123 }
124
125 return 0;
126}
127
a0edc477
FB
128static void cache_space_invalidate(JournalStorageSpace *space) {
129 memset(space, 0, sizeof(*space));
130}
131
8580d1f7
LP
132static int determine_space_for(
133 Server *s,
266a4700 134 JournalStorage *storage,
8580d1f7
LP
135 bool patch_min_use,
136 uint64_t *available,
137 uint64_t *limit) {
138
7fd1b19b 139 _cleanup_closedir_ DIR *d = NULL;
23aba343 140 JournalStorageSpace *space;
266a4700 141 JournalMetrics *metrics;
23aba343 142 uint64_t vfs_used, vfs_avail, avail;
d025f1e4 143 usec_t ts;
e0ed6db9 144 int r;
d025f1e4 145
8580d1f7 146 assert(s);
266a4700 147
266a4700 148 metrics = &storage->metrics;
23aba343 149 space = &storage->space;
d025f1e4 150
8580d1f7 151 ts = now(CLOCK_MONOTONIC);
d025f1e4 152
23aba343 153 if (space->timestamp + RECHECK_SPACE_USEC > ts) {
d025f1e4 154
8580d1f7 155 if (available)
23aba343 156 *available = space->available;
8580d1f7 157 if (limit)
23aba343 158 *limit = space->limit;
d025f1e4 159
d025f1e4 160 return 0;
8580d1f7 161 }
d025f1e4 162
23aba343 163 r = determine_path_usage(s, storage->path, &vfs_used, &vfs_avail);
e0ed6db9
FB
164 if (r < 0)
165 return r;
d025f1e4 166
23aba343
FB
167 space->vfs_used = vfs_used;
168 space->vfs_available = vfs_avail;
169
170 avail = LESS_BY(vfs_avail, metrics->keep_free);
171
8a03c9ef 172 /* If requested, then let's bump the min_use limit to the
8580d1f7
LP
173 * current usage on disk. We do this when starting up and
174 * first opening the journal files. This way sudden spikes in
175 * disk usage will not cause journald to vacuum files without
176 * bounds. Note that this means that only a restart of
177 * journald will make it reset this value. */
d025f1e4 178
8580d1f7 179 if (patch_min_use)
23aba343 180 metrics->min_use = MAX(metrics->min_use, vfs_used);
348ced90 181
23aba343
FB
182 space->limit = MIN(MAX(vfs_used + avail, metrics->min_use), metrics->max_use);
183 space->available = LESS_BY(space->limit, vfs_used);
184 space->timestamp = ts;
d025f1e4 185
8580d1f7 186 if (available)
23aba343 187 *available = space->available;
8580d1f7 188 if (limit)
23aba343 189 *limit = space->limit;
8580d1f7
LP
190
191 return 1;
192}
193
18e758bf 194static int determine_space(Server *s, bool patch_min_use, uint64_t *available, uint64_t *limit) {
266a4700 195 JournalStorage *js;
8580d1f7
LP
196
197 assert(s);
198
266a4700 199 js = s->system_journal ? &s->system_storage : &s->runtime_storage;
18e758bf 200 return determine_space_for(s, js, patch_min_use, available, limit);
d025f1e4
ZJS
201}
202
cba5629e
FB
203void server_space_usage_message(Server *s, JournalStorage *storage) {
204 char fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
205 fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX], fb6[FORMAT_BYTES_MAX];
206 JournalMetrics *metrics;
cba5629e
FB
207
208 assert(s);
209
210 if (!storage)
211 storage = s->system_journal ? &s->system_storage : &s->runtime_storage;
212
23aba343 213 if (determine_space_for(s, storage, false, NULL, NULL) < 0)
cba5629e
FB
214 return;
215
216 metrics = &storage->metrics;
23aba343 217 format_bytes(fb1, sizeof(fb1), storage->space.vfs_used);
cba5629e
FB
218 format_bytes(fb2, sizeof(fb2), metrics->max_use);
219 format_bytes(fb3, sizeof(fb3), metrics->keep_free);
23aba343 220 format_bytes(fb4, sizeof(fb4), storage->space.vfs_available);
cba5629e
FB
221 format_bytes(fb5, sizeof(fb5), storage->space.limit);
222 format_bytes(fb6, sizeof(fb6), storage->space.available);
223
224 server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
225 LOG_MESSAGE("%s (%s) is %s, max %s, %s free.",
226 storage->name, storage->path, fb1, fb5, fb6),
227 "JOURNAL_NAME=%s", storage->name,
228 "JOURNAL_PATH=%s", storage->path,
23aba343 229 "CURRENT_USE=%"PRIu64, storage->space.vfs_used,
cba5629e
FB
230 "CURRENT_USE_PRETTY=%s", fb1,
231 "MAX_USE=%"PRIu64, metrics->max_use,
232 "MAX_USE_PRETTY=%s", fb2,
233 "DISK_KEEP_FREE=%"PRIu64, metrics->keep_free,
234 "DISK_KEEP_FREE_PRETTY=%s", fb3,
23aba343 235 "DISK_AVAILABLE=%"PRIu64, storage->space.vfs_available,
cba5629e
FB
236 "DISK_AVAILABLE_PRETTY=%s", fb4,
237 "LIMIT=%"PRIu64, storage->space.limit,
238 "LIMIT_PRETTY=%s", fb5,
239 "AVAILABLE=%"PRIu64, storage->space.available,
240 "AVAILABLE_PRETTY=%s", fb6,
241 NULL);
242}
243
5c3bde3f 244static void server_add_acls(JournalFile *f, uid_t uid) {
d025f1e4 245#ifdef HAVE_ACL
5c3bde3f 246 int r;
d025f1e4 247#endif
d025f1e4
ZJS
248 assert(f);
249
d025f1e4 250#ifdef HAVE_ACL
34c10968 251 if (uid <= SYSTEM_UID_MAX)
d025f1e4
ZJS
252 return;
253
5c3bde3f
ZJS
254 r = add_acls_for_user(f->fd, uid);
255 if (r < 0)
256 log_warning_errno(r, "Failed to set ACL on %s, ignoring: %m", f->path);
d025f1e4
ZJS
257#endif
258}
259
7a24f3bf
VC
260static int open_journal(
261 Server *s,
262 bool reliably,
263 const char *fname,
264 int flags,
265 bool seal,
266 JournalMetrics *metrics,
7a24f3bf
VC
267 JournalFile **ret) {
268 int r;
e167d7fd 269 JournalFile *f;
7a24f3bf
VC
270
271 assert(s);
272 assert(fname);
273 assert(ret);
274
275 if (reliably)
b58c888f 276 r = journal_file_open_reliably(fname, flags, 0640, s->compress, seal, metrics, s->mmap, s->deferred_closes, NULL, &f);
7a24f3bf 277 else
5d1ce257 278 r = journal_file_open(-1, fname, flags, 0640, s->compress, seal, metrics, s->mmap, s->deferred_closes, NULL, &f);
7a24f3bf
VC
279 if (r < 0)
280 return r;
281
e167d7fd 282 r = journal_file_enable_post_change_timer(f, s->event, POST_CHANGE_TIMER_INTERVAL_USEC);
7a24f3bf 283 if (r < 0) {
69a3a6fd 284 (void) journal_file_close(f);
7a24f3bf
VC
285 return r;
286 }
287
e167d7fd 288 *ret = f;
7a24f3bf
VC
289 return r;
290}
291
6431c7e2
VC
292static bool flushed_flag_is_set(void) {
293 return (access("/run/systemd/journal/flushed", F_OK) >= 0);
294}
295
105bdb46 296static int system_journal_open(Server *s, bool flush_requested) {
929eeb54 297 bool flushed = false;
105bdb46
VC
298 const char *fn;
299 int r = 0;
300
301 if (!s->system_journal &&
302 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
6431c7e2 303 (flush_requested || (flushed = flushed_flag_is_set()))) {
105bdb46
VC
304
305 /* If in auto mode: first try to create the machine
306 * path, but not the prefix.
307 *
308 * If in persistent mode: create /var/log/journal and
309 * the machine path */
310
311 if (s->storage == STORAGE_PERSISTENT)
312 (void) mkdir_p("/var/log/journal/", 0755);
313
266a4700 314 (void) mkdir(s->system_storage.path, 0755);
105bdb46 315
266a4700
FB
316 fn = strjoina(s->system_storage.path, "/system.journal");
317 r = open_journal(s, true, fn, O_RDWR|O_CREAT, s->seal, &s->system_storage.metrics, &s->system_journal);
105bdb46
VC
318 if (r >= 0) {
319 server_add_acls(s->system_journal, 0);
18e758bf 320 (void) determine_space_for(s, &s->system_storage, true, NULL, NULL);
105bdb46
VC
321 } else if (r < 0) {
322 if (r != -ENOENT && r != -EROFS)
323 log_warning_errno(r, "Failed to open system journal: %m");
324
325 r = 0;
326 }
929eeb54
VC
327
328 /* If the runtime journal is open, and we're post-flush, we're
329 * recovering from a failed system journal rotate (ENOSPC)
330 * for which the runtime journal was reopened.
331 *
332 * Perform an implicit flush to var, leaving the runtime
333 * journal closed, now that the system journal is back.
334 */
335 if (s->runtime_journal && flushed)
336 (void) server_flush_to_var(s);
105bdb46
VC
337 }
338
339 if (!s->runtime_journal &&
340 (s->storage != STORAGE_NONE)) {
341
266a4700 342 fn = strjoina(s->runtime_storage.path, "/system.journal");
105bdb46
VC
343
344 if (s->system_journal) {
345
346 /* Try to open the runtime journal, but only
347 * if it already exists, so that we can flush
348 * it into the system journal */
349
266a4700 350 r = open_journal(s, false, fn, O_RDWR, false, &s->runtime_storage.metrics, &s->runtime_journal);
105bdb46
VC
351 if (r < 0) {
352 if (r != -ENOENT)
353 log_warning_errno(r, "Failed to open runtime journal: %m");
354
355 r = 0;
356 }
357
358 } else {
359
360 /* OK, we really need the runtime journal, so create
361 * it if necessary. */
362
363 (void) mkdir("/run/log", 0755);
364 (void) mkdir("/run/log/journal", 0755);
365 (void) mkdir_parents(fn, 0750);
366
266a4700 367 r = open_journal(s, true, fn, O_RDWR|O_CREAT, false, &s->runtime_storage.metrics, &s->runtime_journal);
105bdb46
VC
368 if (r < 0)
369 return log_error_errno(r, "Failed to open runtime journal: %m");
370 }
371
372 if (s->runtime_journal) {
373 server_add_acls(s->runtime_journal, 0);
18e758bf 374 (void) determine_space_for(s, &s->runtime_storage, true, NULL, NULL);
105bdb46
VC
375 }
376 }
377
378 return r;
379}
380
d025f1e4 381static JournalFile* find_journal(Server *s, uid_t uid) {
ed375beb 382 _cleanup_free_ char *p = NULL;
d025f1e4
ZJS
383 int r;
384 JournalFile *f;
385 sd_id128_t machine;
386
387 assert(s);
388
105bdb46
VC
389 /* A rotate that fails to create the new journal (ENOSPC) leaves the
390 * rotated journal as NULL. Unless we revisit opening, even after
391 * space is made available we'll continue to return NULL indefinitely.
392 *
393 * system_journal_open() is a noop if the journals are already open, so
394 * we can just call it here to recover from failed rotates (or anything
395 * else that's left the journals as NULL).
396 *
397 * Fixes https://github.com/systemd/systemd/issues/3968 */
398 (void) system_journal_open(s, false);
399
d025f1e4
ZJS
400 /* We split up user logs only on /var, not on /run. If the
401 * runtime file is open, we write to it exclusively, in order
402 * to guarantee proper order as soon as we flush /run to
403 * /var and close the runtime file. */
404
405 if (s->runtime_journal)
406 return s->runtime_journal;
407
61755fda 408 if (uid <= SYSTEM_UID_MAX || uid_is_dynamic(uid))
d025f1e4
ZJS
409 return s->system_journal;
410
411 r = sd_id128_get_machine(&machine);
412 if (r < 0)
413 return s->system_journal;
414
4a0b58c4 415 f = ordered_hashmap_get(s->user_journals, UID_TO_PTR(uid));
d025f1e4
ZJS
416 if (f)
417 return f;
418
de0671ee
ZJS
419 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-"UID_FMT".journal",
420 SD_ID128_FORMAT_VAL(machine), uid) < 0)
d025f1e4
ZJS
421 return s->system_journal;
422
43cf8388 423 while (ordered_hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
d025f1e4 424 /* Too many open? Then let's close one */
43cf8388 425 f = ordered_hashmap_steal_first(s->user_journals);
d025f1e4 426 assert(f);
69a3a6fd 427 (void) journal_file_close(f);
d025f1e4
ZJS
428 }
429
266a4700 430 r = open_journal(s, true, p, O_RDWR|O_CREAT, s->seal, &s->system_storage.metrics, &f);
d025f1e4
ZJS
431 if (r < 0)
432 return s->system_journal;
433
5c3bde3f 434 server_add_acls(f, uid);
d025f1e4 435
4a0b58c4 436 r = ordered_hashmap_put(s->user_journals, UID_TO_PTR(uid), f);
d025f1e4 437 if (r < 0) {
69a3a6fd 438 (void) journal_file_close(f);
d025f1e4
ZJS
439 return s->system_journal;
440 }
441
442 return f;
443}
444
ea69bd41
LP
445static int do_rotate(
446 Server *s,
447 JournalFile **f,
448 const char* name,
449 bool seal,
450 uint32_t uid) {
451
fc55baee
ZJS
452 int r;
453 assert(s);
454
455 if (!*f)
456 return -EINVAL;
457
b58c888f 458 r = journal_file_rotate(f, s->compress, seal, s->deferred_closes);
fc55baee
ZJS
459 if (r < 0)
460 if (*f)
ea69bd41 461 log_error_errno(r, "Failed to rotate %s: %m", (*f)->path);
fc55baee 462 else
ea69bd41 463 log_error_errno(r, "Failed to create new %s journal: %m", name);
fc55baee 464 else
5c3bde3f 465 server_add_acls(*f, uid);
2678031a 466
fc55baee
ZJS
467 return r;
468}
469
d025f1e4
ZJS
470void server_rotate(Server *s) {
471 JournalFile *f;
472 void *k;
473 Iterator i;
474 int r;
475
476 log_debug("Rotating...");
477
8580d1f7
LP
478 (void) do_rotate(s, &s->runtime_journal, "runtime", false, 0);
479 (void) do_rotate(s, &s->system_journal, "system", s->seal, 0);
d025f1e4 480
43cf8388 481 ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
4a0b58c4 482 r = do_rotate(s, &f, "user", s->seal, PTR_TO_UID(k));
fc55baee 483 if (r >= 0)
43cf8388 484 ordered_hashmap_replace(s->user_journals, k, f);
fc55baee
ZJS
485 else if (!f)
486 /* Old file has been closed and deallocated */
43cf8388 487 ordered_hashmap_remove(s->user_journals, k);
d025f1e4 488 }
b58c888f
VC
489
490 /* Perform any deferred closes which aren't still offlining. */
491 SET_FOREACH(f, s->deferred_closes, i)
492 if (!journal_file_is_offlining(f)) {
493 (void) set_remove(s->deferred_closes, f);
494 (void) journal_file_close(f);
495 }
d025f1e4
ZJS
496}
497
26687bf8
OS
498void server_sync(Server *s) {
499 JournalFile *f;
26687bf8
OS
500 Iterator i;
501 int r;
502
26687bf8 503 if (s->system_journal) {
ac2e41f5 504 r = journal_file_set_offline(s->system_journal, false);
26687bf8 505 if (r < 0)
65089b82 506 log_warning_errno(r, "Failed to sync system journal, ignoring: %m");
26687bf8
OS
507 }
508
65c1d46b 509 ORDERED_HASHMAP_FOREACH(f, s->user_journals, i) {
ac2e41f5 510 r = journal_file_set_offline(f, false);
26687bf8 511 if (r < 0)
65089b82 512 log_warning_errno(r, "Failed to sync user journal, ignoring: %m");
26687bf8
OS
513 }
514
f9a810be
LP
515 if (s->sync_event_source) {
516 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
517 if (r < 0)
da927ba9 518 log_error_errno(r, "Failed to disable sync timer source: %m");
f9a810be 519 }
26687bf8
OS
520
521 s->sync_scheduled = false;
522}
523
ea69bd41
LP
524static void do_vacuum(
525 Server *s,
266a4700 526 JournalStorage *storage,
8580d1f7
LP
527 bool verbose,
528 bool patch_min_use) {
ea69bd41 529
266a4700 530 JournalMetrics *metrics;
8580d1f7 531 uint64_t limit;
63c8666b
ZJS
532 int r;
533
8580d1f7 534 assert(s);
266a4700 535 assert(storage);
8580d1f7 536
266a4700 537 metrics = &storage->metrics;
8580d1f7 538 limit = metrics->max_use;
18e758bf
FB
539 (void) determine_space_for(s, storage, patch_min_use, NULL, &limit);
540
541 if (verbose)
542 server_space_usage_message(s, storage);
8580d1f7 543
266a4700 544 r = journal_directory_vacuum(storage->path, limit, metrics->n_max_files, s->max_retention_usec, &s->oldest_file_usec, verbose);
63c8666b 545 if (r < 0 && r != -ENOENT)
266a4700
FB
546 log_warning_errno(r, "Failed to vacuum %s, ignoring: %m", storage->path);
547
a0edc477 548 cache_space_invalidate(&storage->space);
63c8666b
ZJS
549}
550
8580d1f7
LP
551int server_vacuum(Server *s, bool verbose, bool patch_min_use) {
552 assert(s);
d025f1e4
ZJS
553
554 log_debug("Vacuuming...");
555
556 s->oldest_file_usec = 0;
557
266a4700
FB
558 if (s->system_journal)
559 do_vacuum(s, &s->system_storage, verbose, patch_min_use);
560 if (s->runtime_journal)
561 do_vacuum(s, &s->runtime_storage, verbose, patch_min_use);
d025f1e4 562
8580d1f7 563 return 0;
d025f1e4
ZJS
564}
565
0c24bb23
LP
566static void server_cache_machine_id(Server *s) {
567 sd_id128_t id;
568 int r;
569
570 assert(s);
571
572 r = sd_id128_get_machine(&id);
573 if (r < 0)
574 return;
575
576 sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
577}
578
579static void server_cache_boot_id(Server *s) {
580 sd_id128_t id;
581 int r;
582
583 assert(s);
584
585 r = sd_id128_get_boot(&id);
586 if (r < 0)
587 return;
588
589 sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
590}
591
592static void server_cache_hostname(Server *s) {
593 _cleanup_free_ char *t = NULL;
594 char *x;
595
596 assert(s);
597
598 t = gethostname_malloc();
599 if (!t)
600 return;
601
602 x = strappend("_HOSTNAME=", t);
603 if (!x)
604 return;
605
606 free(s->hostname_field);
607 s->hostname_field = x;
608}
609
8531ae70 610static bool shall_try_append_again(JournalFile *f, int r) {
6e1045e5 611 switch(r) {
ae739cc1 612
6e1045e5
ZJS
613 case -E2BIG: /* Hit configured limit */
614 case -EFBIG: /* Hit fs limit */
615 case -EDQUOT: /* Quota limit hit */
616 case -ENOSPC: /* Disk full */
d025f1e4 617 log_debug("%s: Allocation limit reached, rotating.", f->path);
6e1045e5 618 return true;
ae739cc1 619
6e1045e5
ZJS
620 case -EIO: /* I/O error of some kind (mmap) */
621 log_warning("%s: IO error, rotating.", f->path);
622 return true;
ae739cc1 623
6e1045e5 624 case -EHOSTDOWN: /* Other machine */
d025f1e4 625 log_info("%s: Journal file from other machine, rotating.", f->path);
6e1045e5 626 return true;
ae739cc1 627
6e1045e5 628 case -EBUSY: /* Unclean shutdown */
d025f1e4 629 log_info("%s: Unclean shutdown, rotating.", f->path);
6e1045e5 630 return true;
ae739cc1 631
6e1045e5 632 case -EPROTONOSUPPORT: /* Unsupported feature */
d025f1e4 633 log_info("%s: Unsupported feature, rotating.", f->path);
6e1045e5 634 return true;
ae739cc1 635
6e1045e5
ZJS
636 case -EBADMSG: /* Corrupted */
637 case -ENODATA: /* Truncated */
638 case -ESHUTDOWN: /* Already archived */
d025f1e4 639 log_warning("%s: Journal file corrupted, rotating.", f->path);
6e1045e5 640 return true;
ae739cc1 641
6e1045e5 642 case -EIDRM: /* Journal file has been deleted */
2678031a 643 log_warning("%s: Journal file has been deleted, rotating.", f->path);
6e1045e5 644 return true;
ae739cc1
LP
645
646 case -ETXTBSY: /* Journal file is from the future */
c1a9199e 647 log_warning("%s: Journal file is from the future, rotating.", f->path);
ae739cc1
LP
648 return true;
649
6e1045e5 650 default:
d025f1e4 651 return false;
6e1045e5 652 }
d025f1e4
ZJS
653}
654
d07f7b9e 655static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
7c070017 656 bool vacuumed = false, rotate = false;
0f972d66 657 struct dual_timestamp ts;
d025f1e4 658 JournalFile *f;
d025f1e4
ZJS
659 int r;
660
661 assert(s);
662 assert(iovec);
663 assert(n > 0);
664
0f972d66
LP
665 /* Get the closest, linearized time we have for this log event from the event loop. (Note that we do not use
666 * the source time, and not even the time the event was originally seen, but instead simply the time we started
667 * processing it, as we want strictly linear ordering in what we write out.) */
668 assert_se(sd_event_now(s->event, CLOCK_REALTIME, &ts.realtime) >= 0);
669 assert_se(sd_event_now(s->event, CLOCK_MONOTONIC, &ts.monotonic) >= 0);
670
7c070017
LP
671 if (ts.realtime < s->last_realtime_clock) {
672 /* When the time jumps backwards, let's immediately rotate. Of course, this should not happen during
673 * regular operation. However, when it does happen, then we should make sure that we start fresh files
674 * to ensure that the entries in the journal files are strictly ordered by time, in order to ensure
675 * bisection works correctly. */
d025f1e4 676
7c070017
LP
677 log_debug("Time jumped backwards, rotating.");
678 rotate = true;
679 } else {
680
681 f = find_journal(s, uid);
682 if (!f)
683 return;
684
685 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
686 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
687 rotate = true;
688 }
689 }
d025f1e4 690
7c070017 691 if (rotate) {
d025f1e4 692 server_rotate(s);
8580d1f7 693 server_vacuum(s, false, false);
d025f1e4
ZJS
694 vacuumed = true;
695
696 f = find_journal(s, uid);
697 if (!f)
698 return;
699 }
700
7c070017
LP
701 s->last_realtime_clock = ts.realtime;
702
0f972d66 703 r = journal_file_append_entry(f, &ts, iovec, n, &s->seqnum, NULL, NULL);
26687bf8 704 if (r >= 0) {
d07f7b9e 705 server_schedule_sync(s, priority);
d025f1e4 706 return;
26687bf8 707 }
d025f1e4
ZJS
708
709 if (vacuumed || !shall_try_append_again(f, r)) {
8266e1c0 710 log_error_errno(r, "Failed to write entry (%d items, %zu bytes), ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
d025f1e4
ZJS
711 return;
712 }
713
714 server_rotate(s);
8580d1f7 715 server_vacuum(s, false, false);
d025f1e4
ZJS
716
717 f = find_journal(s, uid);
718 if (!f)
719 return;
720
721 log_debug("Retrying write.");
0f972d66 722 r = journal_file_append_entry(f, &ts, iovec, n, &s->seqnum, NULL, NULL);
8266e1c0
LP
723 if (r < 0)
724 log_error_errno(r, "Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
725 else
d07f7b9e 726 server_schedule_sync(s, priority);
d025f1e4
ZJS
727}
728
4b58153d
LP
729static int get_invocation_id(const char *cgroup_root, const char *slice, const char *unit, char **ret) {
730 _cleanup_free_ char *escaped = NULL, *slice_path = NULL, *p = NULL;
731 char *copy, ids[SD_ID128_STRING_MAX];
732 int r;
733
734 /* Read the invocation ID of a unit off a unit. It's stored in the "trusted.invocation_id" extended attribute
735 * on the cgroup path. */
736
737 r = cg_slice_to_path(slice, &slice_path);
738 if (r < 0)
739 return r;
740
741 escaped = cg_escape(unit);
742 if (!escaped)
743 return -ENOMEM;
744
745 p = strjoin(cgroup_root, "/", slice_path, "/", escaped, NULL);
746 if (!p)
747 return -ENOMEM;
748
749 r = cg_get_xattr(SYSTEMD_CGROUP_CONTROLLER, p, "trusted.invocation_id", ids, 32);
750 if (r < 0)
751 return r;
752 if (r != 32)
753 return -EINVAL;
754 ids[32] = 0;
755
756 if (!id128_is_valid(ids))
757 return -EINVAL;
758
759 copy = strdup(ids);
760 if (!copy)
761 return -ENOMEM;
762
763 *ret = copy;
764 return 0;
765}
766
d025f1e4
ZJS
767static void dispatch_message_real(
768 Server *s,
769 struct iovec *iovec, unsigned n, unsigned m,
3b3154df
LP
770 const struct ucred *ucred,
771 const struct timeval *tv,
d025f1e4 772 const char *label, size_t label_len,
968f3196 773 const char *unit_id,
d07f7b9e 774 int priority,
968f3196 775 pid_t object_pid) {
d025f1e4 776
968f3196 777 char pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
ae018d9b
LP
778 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
779 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
780 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
d3789917 781 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
968f3196
ZJS
782 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
783 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
784 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
785 uid_t object_uid;
786 gid_t object_gid;
968f3196 787 char *x;
d025f1e4 788 int r;
ae018d9b 789 char *t, *c;
82499507
LP
790 uid_t realuid = 0, owner = 0, journal_uid;
791 bool owner_valid = false;
ae018d9b 792#ifdef HAVE_AUDIT
968f3196
ZJS
793 char audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
794 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
795 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
796 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
ae018d9b
LP
797
798 uint32_t audit;
799 uid_t loginuid;
800#endif
d025f1e4
ZJS
801
802 assert(s);
803 assert(iovec);
804 assert(n > 0);
d473176a 805 assert(n + N_IOVEC_META_FIELDS + (object_pid > 0 ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
d025f1e4
ZJS
806
807 if (ucred) {
d025f1e4
ZJS
808 realuid = ucred->uid;
809
de0671ee 810 sprintf(pid, "_PID="PID_FMT, ucred->pid);
c2457105 811 IOVEC_SET_STRING(iovec[n++], pid);
d025f1e4 812
de0671ee 813 sprintf(uid, "_UID="UID_FMT, ucred->uid);
c2457105 814 IOVEC_SET_STRING(iovec[n++], uid);
d025f1e4 815
de0671ee 816 sprintf(gid, "_GID="GID_FMT, ucred->gid);
c2457105 817 IOVEC_SET_STRING(iovec[n++], gid);
d025f1e4
ZJS
818
819 r = get_process_comm(ucred->pid, &t);
820 if (r >= 0) {
63c372cb 821 x = strjoina("_COMM=", t);
d025f1e4 822 free(t);
968f3196 823 IOVEC_SET_STRING(iovec[n++], x);
d025f1e4
ZJS
824 }
825
826 r = get_process_exe(ucred->pid, &t);
827 if (r >= 0) {
63c372cb 828 x = strjoina("_EXE=", t);
d025f1e4 829 free(t);
968f3196 830 IOVEC_SET_STRING(iovec[n++], x);
d025f1e4
ZJS
831 }
832
9bdbc2e2 833 r = get_process_cmdline(ucred->pid, 0, false, &t);
d025f1e4 834 if (r >= 0) {
63c372cb 835 x = strjoina("_CMDLINE=", t);
d025f1e4 836 free(t);
3a832116
SL
837 IOVEC_SET_STRING(iovec[n++], x);
838 }
839
840 r = get_process_capeff(ucred->pid, &t);
841 if (r >= 0) {
63c372cb 842 x = strjoina("_CAP_EFFECTIVE=", t);
3a832116 843 free(t);
968f3196 844 IOVEC_SET_STRING(iovec[n++], x);
d025f1e4
ZJS
845 }
846
0a20e3c1 847#ifdef HAVE_AUDIT
d025f1e4 848 r = audit_session_from_pid(ucred->pid, &audit);
ae018d9b 849 if (r >= 0) {
de0671ee 850 sprintf(audit_session, "_AUDIT_SESSION=%"PRIu32, audit);
ae018d9b
LP
851 IOVEC_SET_STRING(iovec[n++], audit_session);
852 }
d025f1e4
ZJS
853
854 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
7027ff61 855 if (r >= 0) {
de0671ee 856 sprintf(audit_loginuid, "_AUDIT_LOGINUID="UID_FMT, loginuid);
ae018d9b 857 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
d025f1e4 858 }
ae018d9b 859#endif
d025f1e4 860
e9174f29 861 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &c);
7027ff61 862 if (r >= 0) {
4b58153d 863 _cleanup_free_ char *raw_unit = NULL, *raw_slice = NULL;
968f3196
ZJS
864 char *session = NULL;
865
63c372cb 866 x = strjoina("_SYSTEMD_CGROUP=", c);
968f3196 867 IOVEC_SET_STRING(iovec[n++], x);
d025f1e4 868
ae018d9b
LP
869 r = cg_path_get_session(c, &t);
870 if (r >= 0) {
63c372cb 871 session = strjoina("_SYSTEMD_SESSION=", t);
ae018d9b 872 free(t);
d025f1e4 873 IOVEC_SET_STRING(iovec[n++], session);
ae018d9b
LP
874 }
875
876 if (cg_path_get_owner_uid(c, &owner) >= 0) {
877 owner_valid = true;
d025f1e4 878
de0671ee 879 sprintf(owner_uid, "_SYSTEMD_OWNER_UID="UID_FMT, owner);
d025f1e4 880 IOVEC_SET_STRING(iovec[n++], owner_uid);
ae018d9b 881 }
d025f1e4 882
4b58153d
LP
883 if (cg_path_get_unit(c, &raw_unit) >= 0) {
884 x = strjoina("_SYSTEMD_UNIT=", raw_unit);
19cace37
LP
885 IOVEC_SET_STRING(iovec[n++], x);
886 } else if (unit_id && !session) {
63c372cb 887 x = strjoina("_SYSTEMD_UNIT=", unit_id);
19cace37
LP
888 IOVEC_SET_STRING(iovec[n++], x);
889 }
890
891 if (cg_path_get_user_unit(c, &t) >= 0) {
63c372cb 892 x = strjoina("_SYSTEMD_USER_UNIT=", t);
ae018d9b 893 free(t);
968f3196 894 IOVEC_SET_STRING(iovec[n++], x);
19cace37 895 } else if (unit_id && session) {
63c372cb 896 x = strjoina("_SYSTEMD_USER_UNIT=", unit_id);
19cace37
LP
897 IOVEC_SET_STRING(iovec[n++], x);
898 }
ae018d9b 899
4b58153d
LP
900 if (cg_path_get_slice(c, &raw_slice) >= 0) {
901 x = strjoina("_SYSTEMD_SLICE=", raw_slice);
0a244b8e
LP
902 IOVEC_SET_STRING(iovec[n++], x);
903 }
904
d473176a
LP
905 if (cg_path_get_user_slice(c, &t) >= 0) {
906 x = strjoina("_SYSTEMD_USER_SLICE=", t);
907 free(t);
908 IOVEC_SET_STRING(iovec[n++], x);
909 }
910
4b58153d
LP
911 if (raw_slice && raw_unit) {
912 if (get_invocation_id(s->cgroup_root, raw_slice, raw_unit, &t) >= 0) {
913 x = strjoina("_SYSTEMD_INVOCATION_ID=", t);
914 free(t);
915 IOVEC_SET_STRING(iovec[n++], x);
916 }
917 }
918
ae018d9b 919 free(c);
2d43b190 920 } else if (unit_id) {
63c372cb 921 x = strjoina("_SYSTEMD_UNIT=", unit_id);
2d43b190 922 IOVEC_SET_STRING(iovec[n++], x);
ef1673d1 923 }
d025f1e4 924
d025f1e4 925#ifdef HAVE_SELINUX
6355e756 926 if (mac_selinux_have()) {
d682b3a7 927 if (label) {
f8294e41 928 x = alloca(strlen("_SELINUX_CONTEXT=") + label_len + 1);
ae018d9b 929
d682b3a7
LP
930 *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
931 IOVEC_SET_STRING(iovec[n++], x);
932 } else {
2ed96880 933 char *con;
d025f1e4 934
d682b3a7 935 if (getpidcon(ucred->pid, &con) >= 0) {
63c372cb 936 x = strjoina("_SELINUX_CONTEXT=", con);
e7ff4e7f 937
d682b3a7
LP
938 freecon(con);
939 IOVEC_SET_STRING(iovec[n++], x);
940 }
d025f1e4
ZJS
941 }
942 }
943#endif
944 }
968f3196
ZJS
945 assert(n <= m);
946
947 if (object_pid) {
948 r = get_process_uid(object_pid, &object_uid);
949 if (r >= 0) {
de0671ee 950 sprintf(o_uid, "OBJECT_UID="UID_FMT, object_uid);
968f3196
ZJS
951 IOVEC_SET_STRING(iovec[n++], o_uid);
952 }
953
954 r = get_process_gid(object_pid, &object_gid);
955 if (r >= 0) {
de0671ee 956 sprintf(o_gid, "OBJECT_GID="GID_FMT, object_gid);
968f3196
ZJS
957 IOVEC_SET_STRING(iovec[n++], o_gid);
958 }
959
960 r = get_process_comm(object_pid, &t);
961 if (r >= 0) {
63c372cb 962 x = strjoina("OBJECT_COMM=", t);
968f3196
ZJS
963 free(t);
964 IOVEC_SET_STRING(iovec[n++], x);
965 }
966
967 r = get_process_exe(object_pid, &t);
968 if (r >= 0) {
63c372cb 969 x = strjoina("OBJECT_EXE=", t);
968f3196
ZJS
970 free(t);
971 IOVEC_SET_STRING(iovec[n++], x);
972 }
973
974 r = get_process_cmdline(object_pid, 0, false, &t);
975 if (r >= 0) {
63c372cb 976 x = strjoina("OBJECT_CMDLINE=", t);
968f3196
ZJS
977 free(t);
978 IOVEC_SET_STRING(iovec[n++], x);
979 }
980
981#ifdef HAVE_AUDIT
982 r = audit_session_from_pid(object_pid, &audit);
983 if (r >= 0) {
de0671ee 984 sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%"PRIu32, audit);
968f3196
ZJS
985 IOVEC_SET_STRING(iovec[n++], o_audit_session);
986 }
987
988 r = audit_loginuid_from_pid(object_pid, &loginuid);
989 if (r >= 0) {
de0671ee 990 sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID="UID_FMT, loginuid);
968f3196
ZJS
991 IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
992 }
993#endif
994
e9174f29 995 r = cg_pid_get_path_shifted(object_pid, s->cgroup_root, &c);
968f3196 996 if (r >= 0) {
63c372cb 997 x = strjoina("OBJECT_SYSTEMD_CGROUP=", c);
968f3196
ZJS
998 IOVEC_SET_STRING(iovec[n++], x);
999
1000 r = cg_path_get_session(c, &t);
1001 if (r >= 0) {
63c372cb 1002 x = strjoina("OBJECT_SYSTEMD_SESSION=", t);
968f3196
ZJS
1003 free(t);
1004 IOVEC_SET_STRING(iovec[n++], x);
1005 }
1006
1007 if (cg_path_get_owner_uid(c, &owner) >= 0) {
de0671ee 1008 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID="UID_FMT, owner);
968f3196
ZJS
1009 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
1010 }
1011
1012 if (cg_path_get_unit(c, &t) >= 0) {
63c372cb 1013 x = strjoina("OBJECT_SYSTEMD_UNIT=", t);
968f3196 1014 free(t);
19cace37
LP
1015 IOVEC_SET_STRING(iovec[n++], x);
1016 }
1017
1018 if (cg_path_get_user_unit(c, &t) >= 0) {
63c372cb 1019 x = strjoina("OBJECT_SYSTEMD_USER_UNIT=", t);
968f3196 1020 free(t);
968f3196 1021 IOVEC_SET_STRING(iovec[n++], x);
19cace37 1022 }
968f3196 1023
d473176a
LP
1024 if (cg_path_get_slice(c, &t) >= 0) {
1025 x = strjoina("OBJECT_SYSTEMD_SLICE=", t);
1026 free(t);
1027 IOVEC_SET_STRING(iovec[n++], x);
1028 }
1029
1030 if (cg_path_get_user_slice(c, &t) >= 0) {
1031 x = strjoina("OBJECT_SYSTEMD_USER_SLICE=", t);
1032 free(t);
1033 IOVEC_SET_STRING(iovec[n++], x);
1034 }
1035
968f3196
ZJS
1036 free(c);
1037 }
1038 }
1039 assert(n <= m);
d025f1e4
ZJS
1040
1041 if (tv) {
398a50cd 1042 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=" USEC_FMT, timeval_load(tv));
a5693989 1043 IOVEC_SET_STRING(iovec[n++], source_time);
d025f1e4
ZJS
1044 }
1045
1046 /* Note that strictly speaking storing the boot id here is
1047 * redundant since the entry includes this in-line
1048 * anyway. However, we need this indexed, too. */
0c24bb23
LP
1049 if (!isempty(s->boot_id_field))
1050 IOVEC_SET_STRING(iovec[n++], s->boot_id_field);
d025f1e4 1051
0c24bb23
LP
1052 if (!isempty(s->machine_id_field))
1053 IOVEC_SET_STRING(iovec[n++], s->machine_id_field);
d025f1e4 1054
0c24bb23
LP
1055 if (!isempty(s->hostname_field))
1056 IOVEC_SET_STRING(iovec[n++], s->hostname_field);
d025f1e4
ZJS
1057
1058 assert(n <= m);
1059
da499392 1060 if (s->split_mode == SPLIT_UID && realuid > 0)
40adcda8 1061 /* Split up strictly by any UID */
759c945a 1062 journal_uid = realuid;
82499507 1063 else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
edc3797f
LP
1064 /* Split up by login UIDs. We do this only if the
1065 * realuid is not root, in order not to accidentally
1066 * leak privileged information to the user that is
1067 * logged by a privileged process that is part of an
7517e174 1068 * unprivileged session. */
8a0889df 1069 journal_uid = owner;
da499392
KS
1070 else
1071 journal_uid = 0;
759c945a 1072
d07f7b9e 1073 write_to_journal(s, journal_uid, iovec, n, priority);
d025f1e4
ZJS
1074}
1075
1076void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
1077 char mid[11 + 32 + 1];
8a03c9ef
ZJS
1078 struct iovec iovec[N_IOVEC_META_FIELDS + 5 + N_IOVEC_PAYLOAD_FIELDS];
1079 unsigned n = 0, m;
32917e33 1080 int r;
d025f1e4 1081 va_list ap;
b92bea5d 1082 struct ucred ucred = {};
d025f1e4
ZJS
1083
1084 assert(s);
1085 assert(format);
1086
4850d39a 1087 assert_cc(3 == LOG_FAC(LOG_DAEMON));
b6fa2555
EV
1088 IOVEC_SET_STRING(iovec[n++], "SYSLOG_FACILITY=3");
1089 IOVEC_SET_STRING(iovec[n++], "SYSLOG_IDENTIFIER=systemd-journald");
1090
d025f1e4 1091 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
4850d39a 1092 assert_cc(6 == LOG_INFO);
32917e33 1093 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
d025f1e4 1094
3bbaff3e 1095 if (!sd_id128_is_null(message_id)) {
e2cc6eca 1096 snprintf(mid, sizeof(mid), LOG_MESSAGE_ID(message_id));
d025f1e4
ZJS
1097 IOVEC_SET_STRING(iovec[n++], mid);
1098 }
1099
8a03c9ef
ZJS
1100 m = n;
1101
1102 va_start(ap, format);
32917e33
ZJS
1103 r = log_format_iovec(iovec, ELEMENTSOF(iovec), &n, false, 0, format, ap);
1104 /* Error handling below */
8a03c9ef
ZJS
1105 va_end(ap);
1106
d025f1e4
ZJS
1107 ucred.pid = getpid();
1108 ucred.uid = getuid();
1109 ucred.gid = getgid();
1110
32917e33
ZJS
1111 if (r >= 0)
1112 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
8a03c9ef
ZJS
1113
1114 while (m < n)
1115 free(iovec[m++].iov_base);
32917e33
ZJS
1116
1117 if (r < 0) {
1118 /* We failed to format the message. Emit a warning instead. */
1119 char buf[LINE_MAX];
1120
1121 xsprintf(buf, "MESSAGE=Entry printing failed: %s", strerror(-r));
1122
1123 n = 3;
1124 IOVEC_SET_STRING(iovec[n++], "PRIORITY=4");
1125 IOVEC_SET_STRING(iovec[n++], buf);
1126 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
1127 }
d025f1e4
ZJS
1128}
1129
1130void server_dispatch_message(
1131 Server *s,
1132 struct iovec *iovec, unsigned n, unsigned m,
3b3154df
LP
1133 const struct ucred *ucred,
1134 const struct timeval *tv,
d025f1e4
ZJS
1135 const char *label, size_t label_len,
1136 const char *unit_id,
968f3196
ZJS
1137 int priority,
1138 pid_t object_pid) {
d025f1e4 1139
7027ff61 1140 int rl, r;
7fd1b19b 1141 _cleanup_free_ char *path = NULL;
8580d1f7 1142 uint64_t available = 0;
db91ea32 1143 char *c;
d025f1e4
ZJS
1144
1145 assert(s);
1146 assert(iovec || n == 0);
1147
1148 if (n == 0)
1149 return;
1150
1151 if (LOG_PRI(priority) > s->max_level_store)
1152 return;
1153
2f5df74a
HHPF
1154 /* Stop early in case the information will not be stored
1155 * in a journal. */
1156 if (s->storage == STORAGE_NONE)
1157 return;
1158
d025f1e4
ZJS
1159 if (!ucred)
1160 goto finish;
1161
e9174f29 1162 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &path);
7027ff61 1163 if (r < 0)
d025f1e4
ZJS
1164 goto finish;
1165
1166 /* example: /user/lennart/3/foobar
1167 * /system/dbus.service/foobar
1168 *
1169 * So let's cut of everything past the third /, since that is
1170 * where user directories start */
1171
1172 c = strchr(path, '/');
1173 if (c) {
1174 c = strchr(c+1, '/');
1175 if (c) {
1176 c = strchr(c+1, '/');
1177 if (c)
1178 *c = 0;
1179 }
1180 }
1181
18e758bf 1182 (void) determine_space(s, false, &available, NULL);
8580d1f7 1183 rl = journal_rate_limit_test(s->rate_limit, path, priority & LOG_PRIMASK, available);
db91ea32 1184 if (rl == 0)
d025f1e4 1185 return;
d025f1e4
ZJS
1186
1187 /* Write a suppression message if we suppressed something */
1188 if (rl > 1)
db91ea32 1189 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
8a03c9ef
ZJS
1190 LOG_MESSAGE("Suppressed %u messages from %s", rl - 1, path),
1191 NULL);
d025f1e4
ZJS
1192
1193finish:
d07f7b9e 1194 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, priority, object_pid);
d025f1e4
ZJS
1195}
1196
d025f1e4 1197int server_flush_to_var(Server *s) {
d025f1e4
ZJS
1198 sd_id128_t machine;
1199 sd_journal *j = NULL;
fbb63411
LP
1200 char ts[FORMAT_TIMESPAN_MAX];
1201 usec_t start;
1202 unsigned n = 0;
1203 int r;
d025f1e4
ZJS
1204
1205 assert(s);
1206
1207 if (s->storage != STORAGE_AUTO &&
1208 s->storage != STORAGE_PERSISTENT)
1209 return 0;
1210
1211 if (!s->runtime_journal)
1212 return 0;
1213
8580d1f7 1214 (void) system_journal_open(s, true);
d025f1e4
ZJS
1215
1216 if (!s->system_journal)
1217 return 0;
1218
1219 log_debug("Flushing to /var...");
1220
fbb63411
LP
1221 start = now(CLOCK_MONOTONIC);
1222
d025f1e4 1223 r = sd_id128_get_machine(&machine);
00a16861 1224 if (r < 0)
d025f1e4 1225 return r;
d025f1e4
ZJS
1226
1227 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
23bbb0de
MS
1228 if (r < 0)
1229 return log_error_errno(r, "Failed to read runtime journal: %m");
d025f1e4 1230
93b73b06
LP
1231 sd_journal_set_data_threshold(j, 0);
1232
d025f1e4
ZJS
1233 SD_JOURNAL_FOREACH(j) {
1234 Object *o = NULL;
1235 JournalFile *f;
1236
1237 f = j->current_file;
1238 assert(f && f->current_offset > 0);
1239
fbb63411
LP
1240 n++;
1241
d025f1e4
ZJS
1242 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1243 if (r < 0) {
da927ba9 1244 log_error_errno(r, "Can't read entry: %m");
d025f1e4
ZJS
1245 goto finish;
1246 }
1247
1248 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1249 if (r >= 0)
1250 continue;
1251
1252 if (!shall_try_append_again(s->system_journal, r)) {
da927ba9 1253 log_error_errno(r, "Can't write entry: %m");
d025f1e4
ZJS
1254 goto finish;
1255 }
1256
1257 server_rotate(s);
8580d1f7 1258 server_vacuum(s, false, false);
d025f1e4 1259
253f59df
LP
1260 if (!s->system_journal) {
1261 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1262 r = -EIO;
1263 goto finish;
1264 }
1265
d025f1e4
ZJS
1266 log_debug("Retrying write.");
1267 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1268 if (r < 0) {
da927ba9 1269 log_error_errno(r, "Can't write entry: %m");
d025f1e4
ZJS
1270 goto finish;
1271 }
1272 }
1273
804ae586
LP
1274 r = 0;
1275
d025f1e4
ZJS
1276finish:
1277 journal_file_post_change(s->system_journal);
1278
804ae586 1279 s->runtime_journal = journal_file_close(s->runtime_journal);
d025f1e4
ZJS
1280
1281 if (r >= 0)
c6878637 1282 (void) rm_rf("/run/log/journal", REMOVE_ROOT);
d025f1e4 1283
763c7aa2 1284 sd_journal_close(j);
d025f1e4 1285
8a03c9ef
ZJS
1286 server_driver_message(s, SD_ID128_NULL,
1287 LOG_MESSAGE("Time spent on flushing to /var is %s for %u entries.",
1288 format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0),
1289 n),
1290 NULL);
fbb63411 1291
d025f1e4
ZJS
1292 return r;
1293}
1294
8531ae70 1295int server_process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
f9a810be 1296 Server *s = userdata;
a315ac4e
LP
1297 struct ucred *ucred = NULL;
1298 struct timeval *tv = NULL;
1299 struct cmsghdr *cmsg;
1300 char *label = NULL;
1301 size_t label_len = 0, m;
1302 struct iovec iovec;
1303 ssize_t n;
1304 int *fds = NULL, v = 0;
1305 unsigned n_fds = 0;
1306
1307 union {
1308 struct cmsghdr cmsghdr;
1309
1310 /* We use NAME_MAX space for the SELinux label
1311 * here. The kernel currently enforces no
1312 * limit, but according to suggestions from
1313 * the SELinux people this will change and it
1314 * will probably be identical to NAME_MAX. For
1315 * now we use that, but this should be updated
1316 * one day when the final limit is known. */
1317 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1318 CMSG_SPACE(sizeof(struct timeval)) +
1319 CMSG_SPACE(sizeof(int)) + /* fd */
1320 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1321 } control = {};
1322
1323 union sockaddr_union sa = {};
1324
1325 struct msghdr msghdr = {
1326 .msg_iov = &iovec,
1327 .msg_iovlen = 1,
1328 .msg_control = &control,
1329 .msg_controllen = sizeof(control),
1330 .msg_name = &sa,
1331 .msg_namelen = sizeof(sa),
1332 };
f9a810be 1333
d025f1e4 1334 assert(s);
875c2e22 1335 assert(fd == s->native_fd || fd == s->syslog_fd || fd == s->audit_fd);
f9a810be
LP
1336
1337 if (revents != EPOLLIN) {
1338 log_error("Got invalid event from epoll for datagram fd: %"PRIx32, revents);
1339 return -EIO;
1340 }
1341
a315ac4e
LP
1342 /* Try to get the right size, if we can. (Not all
1343 * sockets support SIOCINQ, hence we just try, but
1344 * don't rely on it. */
1345 (void) ioctl(fd, SIOCINQ, &v);
d025f1e4 1346
a315ac4e
LP
1347 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1348 m = PAGE_ALIGN(MAX3((size_t) v + 1,
1349 (size_t) LINE_MAX,
1350 ALIGN(sizeof(struct nlmsghdr)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH)) + 1);
d025f1e4 1351
a315ac4e
LP
1352 if (!GREEDY_REALLOC(s->buffer, s->buffer_size, m))
1353 return log_oom();
875c2e22 1354
a315ac4e
LP
1355 iovec.iov_base = s->buffer;
1356 iovec.iov_len = s->buffer_size - 1; /* Leave room for trailing NUL we add later */
d025f1e4 1357
a315ac4e
LP
1358 n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1359 if (n < 0) {
1360 if (errno == EINTR || errno == EAGAIN)
1361 return 0;
875c2e22 1362
a315ac4e
LP
1363 return log_error_errno(errno, "recvmsg() failed: %m");
1364 }
875c2e22 1365
a315ac4e
LP
1366 CMSG_FOREACH(cmsg, &msghdr) {
1367
1368 if (cmsg->cmsg_level == SOL_SOCKET &&
1369 cmsg->cmsg_type == SCM_CREDENTIALS &&
1370 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1371 ucred = (struct ucred*) CMSG_DATA(cmsg);
1372 else if (cmsg->cmsg_level == SOL_SOCKET &&
1373 cmsg->cmsg_type == SCM_SECURITY) {
1374 label = (char*) CMSG_DATA(cmsg);
1375 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1376 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1377 cmsg->cmsg_type == SO_TIMESTAMP &&
1378 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1379 tv = (struct timeval*) CMSG_DATA(cmsg);
1380 else if (cmsg->cmsg_level == SOL_SOCKET &&
1381 cmsg->cmsg_type == SCM_RIGHTS) {
1382 fds = (int*) CMSG_DATA(cmsg);
1383 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
d025f1e4 1384 }
a315ac4e 1385 }
d025f1e4 1386
a315ac4e
LP
1387 /* And a trailing NUL, just in case */
1388 s->buffer[n] = 0;
1389
1390 if (fd == s->syslog_fd) {
1391 if (n > 0 && n_fds == 0)
1392 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1393 else if (n_fds > 0)
1394 log_warning("Got file descriptors via syslog socket. Ignoring.");
1395
1396 } else if (fd == s->native_fd) {
1397 if (n > 0 && n_fds == 0)
1398 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1399 else if (n == 0 && n_fds == 1)
1400 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1401 else if (n_fds > 0)
1402 log_warning("Got too many file descriptors via native socket. Ignoring.");
1403
1404 } else {
1405 assert(fd == s->audit_fd);
1406
1407 if (n > 0 && n_fds == 0)
1408 server_process_audit_message(s, s->buffer, n, ucred, &sa, msghdr.msg_namelen);
1409 else if (n_fds > 0)
1410 log_warning("Got file descriptors via audit socket. Ignoring.");
f9a810be 1411 }
a315ac4e
LP
1412
1413 close_many(fds, n_fds);
1414 return 0;
f9a810be 1415}
d025f1e4 1416
f9a810be
LP
1417static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1418 Server *s = userdata;
33d52ab9 1419 int r;
d025f1e4 1420
f9a810be 1421 assert(s);
d025f1e4 1422
94b65516 1423 log_info("Received request to flush runtime journal from PID " PID_FMT, si->ssi_pid);
d025f1e4 1424
929eeb54 1425 (void) server_flush_to_var(s);
f9a810be 1426 server_sync(s);
8580d1f7 1427 server_vacuum(s, false, false);
d025f1e4 1428
33d52ab9
LP
1429 r = touch("/run/systemd/journal/flushed");
1430 if (r < 0)
1431 log_warning_errno(r, "Failed to touch /run/systemd/journal/flushed, ignoring: %m");
74055aa7 1432
18e758bf 1433 server_space_usage_message(s, NULL);
f9a810be
LP
1434 return 0;
1435}
d025f1e4 1436
f9a810be
LP
1437static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1438 Server *s = userdata;
33d52ab9 1439 int r;
d025f1e4 1440
f9a810be 1441 assert(s);
d025f1e4 1442
94b65516 1443 log_info("Received request to rotate journal from PID " PID_FMT, si->ssi_pid);
f9a810be 1444 server_rotate(s);
8580d1f7 1445 server_vacuum(s, true, true);
d025f1e4 1446
dbd6e31c 1447 /* Let clients know when the most recent rotation happened. */
33d52ab9
LP
1448 r = write_timestamp_file_atomic("/run/systemd/journal/rotated", now(CLOCK_MONOTONIC));
1449 if (r < 0)
1450 log_warning_errno(r, "Failed to write /run/systemd/journal/rotated, ignoring: %m");
dbd6e31c 1451
f9a810be
LP
1452 return 0;
1453}
d025f1e4 1454
f9a810be
LP
1455static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1456 Server *s = userdata;
d025f1e4 1457
f9a810be 1458 assert(s);
d025f1e4 1459
4daf54a8 1460 log_received_signal(LOG_INFO, si);
d025f1e4 1461
6203e07a 1462 sd_event_exit(s->event, 0);
d025f1e4
ZJS
1463 return 0;
1464}
1465
94b65516
LP
1466static int dispatch_sigrtmin1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1467 Server *s = userdata;
33d52ab9 1468 int r;
94b65516
LP
1469
1470 assert(s);
1471
1472 log_debug("Received request to sync from PID " PID_FMT, si->ssi_pid);
1473
1474 server_sync(s);
1475
1476 /* Let clients know when the most recent sync happened. */
33d52ab9
LP
1477 r = write_timestamp_file_atomic("/run/systemd/journal/synced", now(CLOCK_MONOTONIC));
1478 if (r < 0)
1479 log_warning_errno(r, "Failed to write /run/systemd/journal/synced, ignoring: %m");
94b65516
LP
1480
1481 return 0;
1482}
1483
f9a810be 1484static int setup_signals(Server *s) {
f9a810be 1485 int r;
d025f1e4
ZJS
1486
1487 assert(s);
1488
94b65516 1489 assert(sigprocmask_many(SIG_SETMASK, NULL, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, SIGRTMIN+1, -1) >= 0);
d025f1e4 1490
151b9b96 1491 r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
f9a810be
LP
1492 if (r < 0)
1493 return r;
1494
151b9b96 1495 r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s);
f9a810be
LP
1496 if (r < 0)
1497 return r;
d025f1e4 1498
151b9b96 1499 r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s);
f9a810be
LP
1500 if (r < 0)
1501 return r;
d025f1e4 1502
b374689c
LP
1503 /* Let's process SIGTERM late, so that we flush all queued
1504 * messages to disk before we exit */
1505 r = sd_event_source_set_priority(s->sigterm_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1506 if (r < 0)
1507 return r;
1508
1509 /* When journald is invoked on the terminal (when debugging),
1510 * it's useful if C-c is handled equivalent to SIGTERM. */
151b9b96 1511 r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s);
f9a810be
LP
1512 if (r < 0)
1513 return r;
d025f1e4 1514
b374689c
LP
1515 r = sd_event_source_set_priority(s->sigint_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1516 if (r < 0)
1517 return r;
1518
94b65516
LP
1519 /* SIGRTMIN+1 causes an immediate sync. We process this very
1520 * late, so that everything else queued at this point is
1521 * really written to disk. Clients can watch
1522 * /run/systemd/journal/synced with inotify until its mtime
1523 * changes to see when a sync happened. */
1524 r = sd_event_add_signal(s->event, &s->sigrtmin1_event_source, SIGRTMIN+1, dispatch_sigrtmin1, s);
1525 if (r < 0)
1526 return r;
1527
1528 r = sd_event_source_set_priority(s->sigrtmin1_event_source, SD_EVENT_PRIORITY_NORMAL+15);
1529 if (r < 0)
1530 return r;
1531
d025f1e4
ZJS
1532 return 0;
1533}
1534
1535static int server_parse_proc_cmdline(Server *s) {
7fd1b19b 1536 _cleanup_free_ char *line = NULL;
d581d9d9 1537 const char *p;
74df0fca 1538 int r;
d025f1e4 1539
74df0fca 1540 r = proc_cmdline(&line);
b5884878 1541 if (r < 0) {
da927ba9 1542 log_warning_errno(r, "Failed to read /proc/cmdline, ignoring: %m");
d025f1e4 1543 return 0;
b5884878 1544 }
d025f1e4 1545
d581d9d9 1546 p = line;
9ed794a3 1547 for (;;) {
ff82c36c 1548 _cleanup_free_ char *word = NULL;
d025f1e4 1549
d581d9d9
SS
1550 r = extract_first_word(&p, &word, NULL, 0);
1551 if (r < 0)
1552 return log_error_errno(r, "Failed to parse journald syntax \"%s\": %m", line);
1553
1554 if (r == 0)
1555 break;
d025f1e4
ZJS
1556
1557 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1558 r = parse_boolean(word + 35);
1559 if (r < 0)
1560 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1561 else
1562 s->forward_to_syslog = r;
1563 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1564 r = parse_boolean(word + 33);
1565 if (r < 0)
1566 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1567 else
1568 s->forward_to_kmsg = r;
1569 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1570 r = parse_boolean(word + 36);
1571 if (r < 0)
1572 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1573 else
1574 s->forward_to_console = r;
40b71e89
ST
1575 } else if (startswith(word, "systemd.journald.forward_to_wall=")) {
1576 r = parse_boolean(word + 33);
1577 if (r < 0)
1578 log_warning("Failed to parse forward to wall switch %s. Ignoring.", word + 33);
1579 else
1580 s->forward_to_wall = r;
d025f1e4
ZJS
1581 } else if (startswith(word, "systemd.journald"))
1582 log_warning("Invalid systemd.journald parameter. Ignoring.");
d025f1e4
ZJS
1583 }
1584
804ae586 1585 /* do not warn about state here, since probably systemd already did */
db91ea32 1586 return 0;
d025f1e4
ZJS
1587}
1588
1589static int server_parse_config_file(Server *s) {
d025f1e4
ZJS
1590 assert(s);
1591
43688c49 1592 return config_parse_many_nulstr(PKGSYSCONFDIR "/journald.conf",
75eb6154 1593 CONF_PATHS_NULSTR("systemd/journald.conf.d"),
a9edaeff
JT
1594 "Journal\0",
1595 config_item_perf_lookup, journald_gperf_lookup,
1596 false, s);
d025f1e4
ZJS
1597}
1598
f9a810be
LP
1599static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
1600 Server *s = userdata;
26687bf8
OS
1601
1602 assert(s);
1603
f9a810be 1604 server_sync(s);
26687bf8
OS
1605 return 0;
1606}
1607
d07f7b9e 1608int server_schedule_sync(Server *s, int priority) {
26687bf8
OS
1609 int r;
1610
26687bf8
OS
1611 assert(s);
1612
d07f7b9e
LP
1613 if (priority <= LOG_CRIT) {
1614 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1615 server_sync(s);
1616 return 0;
1617 }
1618
26687bf8
OS
1619 if (s->sync_scheduled)
1620 return 0;
1621
f9a810be
LP
1622 if (s->sync_interval_usec > 0) {
1623 usec_t when;
ca267016 1624
6a0f1f6d 1625 r = sd_event_now(s->event, CLOCK_MONOTONIC, &when);
f9a810be
LP
1626 if (r < 0)
1627 return r;
26687bf8 1628
f9a810be
LP
1629 when += s->sync_interval_usec;
1630
1631 if (!s->sync_event_source) {
6a0f1f6d
LP
1632 r = sd_event_add_time(
1633 s->event,
1634 &s->sync_event_source,
1635 CLOCK_MONOTONIC,
1636 when, 0,
1637 server_dispatch_sync, s);
f9a810be
LP
1638 if (r < 0)
1639 return r;
1640
1641 r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
1642 } else {
1643 r = sd_event_source_set_time(s->sync_event_source, when);
1644 if (r < 0)
1645 return r;
1646
1647 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
1648 }
26687bf8 1649 if (r < 0)
f9a810be 1650 return r;
26687bf8 1651
f9a810be
LP
1652 s->sync_scheduled = true;
1653 }
26687bf8
OS
1654
1655 return 0;
1656}
1657
0c24bb23
LP
1658static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1659 Server *s = userdata;
1660
1661 assert(s);
1662
1663 server_cache_hostname(s);
1664 return 0;
1665}
1666
1667static int server_open_hostname(Server *s) {
1668 int r;
1669
1670 assert(s);
1671
1672 s->hostname_fd = open("/proc/sys/kernel/hostname", O_RDONLY|O_CLOEXEC|O_NDELAY|O_NOCTTY);
4a62c710
MS
1673 if (s->hostname_fd < 0)
1674 return log_error_errno(errno, "Failed to open /proc/sys/kernel/hostname: %m");
0c24bb23 1675
151b9b96 1676 r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s);
0c24bb23 1677 if (r < 0) {
28def94c
DR
1678 /* kernels prior to 3.2 don't support polling this file. Ignore
1679 * the failure. */
1680 if (r == -EPERM) {
e53fc357 1681 log_warning_errno(r, "Failed to register hostname fd in event loop, ignoring: %m");
03e334a1 1682 s->hostname_fd = safe_close(s->hostname_fd);
28def94c
DR
1683 return 0;
1684 }
1685
23bbb0de 1686 return log_error_errno(r, "Failed to register hostname fd in event loop: %m");
0c24bb23
LP
1687 }
1688
1689 r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
23bbb0de
MS
1690 if (r < 0)
1691 return log_error_errno(r, "Failed to adjust priority of host name event source: %m");
0c24bb23
LP
1692
1693 return 0;
1694}
1695
e22aa3d3
LP
1696static int dispatch_notify_event(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1697 Server *s = userdata;
1698 int r;
1699
1700 assert(s);
1701 assert(s->notify_event_source == es);
1702 assert(s->notify_fd == fd);
1703
e22aa3d3 1704 /* The $NOTIFY_SOCKET is writable again, now send exactly one
dd835265 1705 * message on it. Either it's the watchdog event, the initial
119e9655
LP
1706 * READY=1 event or an stdout stream event. If there's nothing
1707 * to write anymore, turn our event source off. The next time
1708 * there's something to send it will be turned on again. */
e22aa3d3
LP
1709
1710 if (!s->sent_notify_ready) {
1711 static const char p[] =
1712 "READY=1\n"
1713 "STATUS=Processing requests...";
1714 ssize_t l;
1715
1716 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1717 if (l < 0) {
1718 if (errno == EAGAIN)
1719 return 0;
1720
1721 return log_error_errno(errno, "Failed to send READY=1 notification message: %m");
1722 }
1723
1724 s->sent_notify_ready = true;
1725 log_debug("Sent READY=1 notification.");
1726
119e9655
LP
1727 } else if (s->send_watchdog) {
1728
1729 static const char p[] =
1730 "WATCHDOG=1";
1731
1732 ssize_t l;
1733
1734 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1735 if (l < 0) {
1736 if (errno == EAGAIN)
1737 return 0;
1738
1739 return log_error_errno(errno, "Failed to send WATCHDOG=1 notification message: %m");
1740 }
1741
1742 s->send_watchdog = false;
1743 log_debug("Sent WATCHDOG=1 notification.");
1744
e22aa3d3
LP
1745 } else if (s->stdout_streams_notify_queue)
1746 /* Dispatch one stream notification event */
1747 stdout_stream_send_notify(s->stdout_streams_notify_queue);
1748
61233823 1749 /* Leave us enabled if there's still more to do. */
119e9655 1750 if (s->send_watchdog || s->stdout_streams_notify_queue)
e22aa3d3
LP
1751 return 0;
1752
1753 /* There was nothing to do anymore, let's turn ourselves off. */
1754 r = sd_event_source_set_enabled(es, SD_EVENT_OFF);
1755 if (r < 0)
1756 return log_error_errno(r, "Failed to turn off notify event source: %m");
1757
1758 return 0;
1759}
1760
119e9655
LP
1761static int dispatch_watchdog(sd_event_source *es, uint64_t usec, void *userdata) {
1762 Server *s = userdata;
1763 int r;
1764
1765 assert(s);
1766
1767 s->send_watchdog = true;
1768
1769 r = sd_event_source_set_enabled(s->notify_event_source, SD_EVENT_ON);
1770 if (r < 0)
1771 log_warning_errno(r, "Failed to turn on notify event source: %m");
1772
1773 r = sd_event_source_set_time(s->watchdog_event_source, usec + s->watchdog_usec / 2);
1774 if (r < 0)
1775 return log_error_errno(r, "Failed to restart watchdog event source: %m");
1776
1777 r = sd_event_source_set_enabled(s->watchdog_event_source, SD_EVENT_ON);
1778 if (r < 0)
1779 return log_error_errno(r, "Failed to enable watchdog event source: %m");
1780
1781 return 0;
1782}
1783
e22aa3d3
LP
1784static int server_connect_notify(Server *s) {
1785 union sockaddr_union sa = {
1786 .un.sun_family = AF_UNIX,
1787 };
1788 const char *e;
1789 int r;
1790
1791 assert(s);
1792 assert(s->notify_fd < 0);
1793 assert(!s->notify_event_source);
1794
1795 /*
1796 So here's the problem: we'd like to send notification
1797 messages to PID 1, but we cannot do that via sd_notify(),
1798 since that's synchronous, and we might end up blocking on
1799 it. Specifically: given that PID 1 might block on
1800 dbus-daemon during IPC, and dbus-daemon is logging to us,
1801 and might hence block on us, we might end up in a deadlock
ccddd104 1802 if we block on sending PID 1 notification messages — by
e22aa3d3
LP
1803 generating a full blocking circle. To avoid this, let's
1804 create a non-blocking socket, and connect it to the
1805 notification socket, and then wait for POLLOUT before we
1806 send anything. This should efficiently avoid any deadlocks,
1807 as we'll never block on PID 1, hence PID 1 can safely block
1808 on dbus-daemon which can safely block on us again.
1809
1810 Don't think that this issue is real? It is, see:
1811 https://github.com/systemd/systemd/issues/1505
1812 */
1813
1814 e = getenv("NOTIFY_SOCKET");
1815 if (!e)
1816 return 0;
1817
1818 if ((e[0] != '@' && e[0] != '/') || e[1] == 0) {
1819 log_error("NOTIFY_SOCKET set to an invalid value: %s", e);
1820 return -EINVAL;
1821 }
1822
1823 if (strlen(e) > sizeof(sa.un.sun_path)) {
1824 log_error("NOTIFY_SOCKET path too long: %s", e);
1825 return -EINVAL;
1826 }
1827
1828 s->notify_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
1829 if (s->notify_fd < 0)
1830 return log_error_errno(errno, "Failed to create notify socket: %m");
1831
1832 (void) fd_inc_sndbuf(s->notify_fd, NOTIFY_SNDBUF_SIZE);
1833
1834 strncpy(sa.un.sun_path, e, sizeof(sa.un.sun_path));
1835 if (sa.un.sun_path[0] == '@')
1836 sa.un.sun_path[0] = 0;
1837
fc2fffe7 1838 r = connect(s->notify_fd, &sa.sa, SOCKADDR_UN_LEN(sa.un));
e22aa3d3
LP
1839 if (r < 0)
1840 return log_error_errno(errno, "Failed to connect to notify socket: %m");
1841
1842 r = sd_event_add_io(s->event, &s->notify_event_source, s->notify_fd, EPOLLOUT, dispatch_notify_event, s);
1843 if (r < 0)
1844 return log_error_errno(r, "Failed to watch notification socket: %m");
1845
119e9655
LP
1846 if (sd_watchdog_enabled(false, &s->watchdog_usec) > 0) {
1847 s->send_watchdog = true;
1848
4de2402b 1849 r = sd_event_add_time(s->event, &s->watchdog_event_source, CLOCK_MONOTONIC, now(CLOCK_MONOTONIC) + s->watchdog_usec/2, s->watchdog_usec/4, dispatch_watchdog, s);
119e9655
LP
1850 if (r < 0)
1851 return log_error_errno(r, "Failed to add watchdog time event: %m");
1852 }
1853
e22aa3d3
LP
1854 /* This should fire pretty soon, which we'll use to send the
1855 * READY=1 event. */
1856
1857 return 0;
1858}
1859
d025f1e4 1860int server_init(Server *s) {
13790add 1861 _cleanup_fdset_free_ FDSet *fds = NULL;
d025f1e4 1862 int n, r, fd;
7d18d348 1863 bool no_sockets;
d025f1e4
ZJS
1864
1865 assert(s);
1866
1867 zero(*s);
e22aa3d3 1868 s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->audit_fd = s->hostname_fd = s->notify_fd = -1;
d025f1e4
ZJS
1869 s->compress = true;
1870 s->seal = true;
1871
119e9655
LP
1872 s->watchdog_usec = USEC_INFINITY;
1873
26687bf8
OS
1874 s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1875 s->sync_scheduled = false;
1876
d025f1e4
ZJS
1877 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1878 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1879
40b71e89 1880 s->forward_to_wall = true;
d025f1e4 1881
e150e820
MB
1882 s->max_file_usec = DEFAULT_MAX_FILE_USEC;
1883
d025f1e4
ZJS
1884 s->max_level_store = LOG_DEBUG;
1885 s->max_level_syslog = LOG_DEBUG;
1886 s->max_level_kmsg = LOG_NOTICE;
1887 s->max_level_console = LOG_INFO;
40b71e89 1888 s->max_level_wall = LOG_EMERG;
d025f1e4 1889
266a4700
FB
1890 journal_reset_metrics(&s->system_storage.metrics);
1891 journal_reset_metrics(&s->runtime_storage.metrics);
d025f1e4
ZJS
1892
1893 server_parse_config_file(s);
1894 server_parse_proc_cmdline(s);
8580d1f7 1895
d288f79f 1896 if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
b1389b0d
ZJS
1897 log_debug("Setting both rate limit interval and burst from "USEC_FMT",%u to 0,0",
1898 s->rate_limit_interval, s->rate_limit_burst);
d288f79f
ZJS
1899 s->rate_limit_interval = s->rate_limit_burst = 0;
1900 }
d025f1e4 1901
8580d1f7 1902 (void) mkdir_p("/run/systemd/journal", 0755);
d025f1e4 1903
43cf8388 1904 s->user_journals = ordered_hashmap_new(NULL);
d025f1e4
ZJS
1905 if (!s->user_journals)
1906 return log_oom();
1907
1908 s->mmap = mmap_cache_new();
1909 if (!s->mmap)
1910 return log_oom();
1911
b58c888f
VC
1912 s->deferred_closes = set_new(NULL);
1913 if (!s->deferred_closes)
1914 return log_oom();
1915
f9a810be 1916 r = sd_event_default(&s->event);
23bbb0de
MS
1917 if (r < 0)
1918 return log_error_errno(r, "Failed to create event loop: %m");
d025f1e4
ZJS
1919
1920 n = sd_listen_fds(true);
23bbb0de
MS
1921 if (n < 0)
1922 return log_error_errno(n, "Failed to read listening file descriptors from environment: %m");
d025f1e4
ZJS
1923
1924 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1925
1926 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1927
1928 if (s->native_fd >= 0) {
1929 log_error("Too many native sockets passed.");
1930 return -EINVAL;
1931 }
1932
1933 s->native_fd = fd;
1934
1935 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1936
1937 if (s->stdout_fd >= 0) {
1938 log_error("Too many stdout sockets passed.");
1939 return -EINVAL;
1940 }
1941
1942 s->stdout_fd = fd;
1943
03ee5c38
LP
1944 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0 ||
1945 sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/dev-log", 0) > 0) {
d025f1e4
ZJS
1946
1947 if (s->syslog_fd >= 0) {
1948 log_error("Too many /dev/log sockets passed.");
1949 return -EINVAL;
1950 }
1951
1952 s->syslog_fd = fd;
1953
875c2e22
LP
1954 } else if (sd_is_socket(fd, AF_NETLINK, SOCK_RAW, -1) > 0) {
1955
1956 if (s->audit_fd >= 0) {
1957 log_error("Too many audit sockets passed.");
1958 return -EINVAL;
1959 }
1960
1961 s->audit_fd = fd;
1962
4ec3cd73 1963 } else {
4ec3cd73 1964
13790add
LP
1965 if (!fds) {
1966 fds = fdset_new();
1967 if (!fds)
1968 return log_oom();
1969 }
4ec3cd73 1970
13790add
LP
1971 r = fdset_put(fds, fd);
1972 if (r < 0)
1973 return log_oom();
4ec3cd73 1974 }
d025f1e4
ZJS
1975 }
1976
15d91bff
ZJS
1977 /* Try to restore streams, but don't bother if this fails */
1978 (void) server_restore_streams(s, fds);
d025f1e4 1979
13790add
LP
1980 if (fdset_size(fds) > 0) {
1981 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds));
1982 fds = fdset_free(fds);
1983 }
1984
7d18d348
ZJS
1985 no_sockets = s->native_fd < 0 && s->stdout_fd < 0 && s->syslog_fd < 0 && s->audit_fd < 0;
1986
1987 /* always open stdout, syslog, native, and kmsg sockets */
37b7affe
ZJS
1988
1989 /* systemd-journald.socket: /run/systemd/journal/stdout */
15d91bff
ZJS
1990 r = server_open_stdout_socket(s);
1991 if (r < 0)
1992 return r;
1993
37b7affe 1994 /* systemd-journald-dev-log.socket: /run/systemd/journal/dev-log */
13790add 1995 r = server_open_syslog_socket(s);
d025f1e4
ZJS
1996 if (r < 0)
1997 return r;
1998
37b7affe 1999 /* systemd-journald.socket: /run/systemd/journal/socket */
13790add 2000 r = server_open_native_socket(s);
d025f1e4
ZJS
2001 if (r < 0)
2002 return r;
2003
37b7affe 2004 /* /dev/ksmg */
d025f1e4
ZJS
2005 r = server_open_dev_kmsg(s);
2006 if (r < 0)
2007 return r;
2008
7d18d348
ZJS
2009 /* Unless we got *some* sockets and not audit, open audit socket */
2010 if (s->audit_fd >= 0 || no_sockets) {
2011 r = server_open_audit(s);
2012 if (r < 0)
2013 return r;
2014 }
875c2e22 2015
d025f1e4
ZJS
2016 r = server_open_kernel_seqnum(s);
2017 if (r < 0)
2018 return r;
2019
0c24bb23
LP
2020 r = server_open_hostname(s);
2021 if (r < 0)
2022 return r;
2023
f9a810be 2024 r = setup_signals(s);
d025f1e4
ZJS
2025 if (r < 0)
2026 return r;
2027
2028 s->udev = udev_new();
2029 if (!s->udev)
2030 return -ENOMEM;
2031
f9a810be 2032 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
d025f1e4
ZJS
2033 if (!s->rate_limit)
2034 return -ENOMEM;
2035
e9174f29
LP
2036 r = cg_get_root_path(&s->cgroup_root);
2037 if (r < 0)
2038 return r;
2039
0c24bb23
LP
2040 server_cache_hostname(s);
2041 server_cache_boot_id(s);
2042 server_cache_machine_id(s);
2043
266a4700
FB
2044 s->runtime_storage.name = "Runtime journal";
2045 s->system_storage.name = "System journal";
2046
2047 s->runtime_storage.path = strjoin("/run/log/journal/", SERVER_MACHINE_ID(s), NULL);
2048 s->system_storage.path = strjoin("/var/log/journal/", SERVER_MACHINE_ID(s), NULL);
2049 if (!s->runtime_storage.path || !s->system_storage.path)
2050 return -ENOMEM;
2051
e22aa3d3
LP
2052 (void) server_connect_notify(s);
2053
804ae586 2054 return system_journal_open(s, false);
d025f1e4
ZJS
2055}
2056
2057void server_maybe_append_tags(Server *s) {
2058#ifdef HAVE_GCRYPT
2059 JournalFile *f;
2060 Iterator i;
2061 usec_t n;
2062
2063 n = now(CLOCK_REALTIME);
2064
2065 if (s->system_journal)
2066 journal_file_maybe_append_tag(s->system_journal, n);
2067
43cf8388 2068 ORDERED_HASHMAP_FOREACH(f, s->user_journals, i)
d025f1e4
ZJS
2069 journal_file_maybe_append_tag(f, n);
2070#endif
2071}
2072
2073void server_done(Server *s) {
2074 JournalFile *f;
2075 assert(s);
2076
b58c888f
VC
2077 if (s->deferred_closes) {
2078 journal_file_close_set(s->deferred_closes);
2079 set_free(s->deferred_closes);
2080 }
2081
d025f1e4
ZJS
2082 while (s->stdout_streams)
2083 stdout_stream_free(s->stdout_streams);
2084
2085 if (s->system_journal)
69a3a6fd 2086 (void) journal_file_close(s->system_journal);
d025f1e4
ZJS
2087
2088 if (s->runtime_journal)
69a3a6fd 2089 (void) journal_file_close(s->runtime_journal);
d025f1e4 2090
43cf8388 2091 while ((f = ordered_hashmap_steal_first(s->user_journals)))
69a3a6fd 2092 (void) journal_file_close(f);
d025f1e4 2093
43cf8388 2094 ordered_hashmap_free(s->user_journals);
d025f1e4 2095
f9a810be
LP
2096 sd_event_source_unref(s->syslog_event_source);
2097 sd_event_source_unref(s->native_event_source);
2098 sd_event_source_unref(s->stdout_event_source);
2099 sd_event_source_unref(s->dev_kmsg_event_source);
875c2e22 2100 sd_event_source_unref(s->audit_event_source);
f9a810be
LP
2101 sd_event_source_unref(s->sync_event_source);
2102 sd_event_source_unref(s->sigusr1_event_source);
2103 sd_event_source_unref(s->sigusr2_event_source);
2104 sd_event_source_unref(s->sigterm_event_source);
2105 sd_event_source_unref(s->sigint_event_source);
94b65516 2106 sd_event_source_unref(s->sigrtmin1_event_source);
0c24bb23 2107 sd_event_source_unref(s->hostname_event_source);
e22aa3d3 2108 sd_event_source_unref(s->notify_event_source);
119e9655 2109 sd_event_source_unref(s->watchdog_event_source);
f9a810be 2110 sd_event_unref(s->event);
d025f1e4 2111
03e334a1
LP
2112 safe_close(s->syslog_fd);
2113 safe_close(s->native_fd);
2114 safe_close(s->stdout_fd);
2115 safe_close(s->dev_kmsg_fd);
875c2e22 2116 safe_close(s->audit_fd);
03e334a1 2117 safe_close(s->hostname_fd);
e22aa3d3 2118 safe_close(s->notify_fd);
0c24bb23 2119
d025f1e4
ZJS
2120 if (s->rate_limit)
2121 journal_rate_limit_free(s->rate_limit);
2122
2123 if (s->kernel_seqnum)
2124 munmap(s->kernel_seqnum, sizeof(uint64_t));
2125
2126 free(s->buffer);
2127 free(s->tty_path);
e9174f29 2128 free(s->cgroup_root);
99d0966e 2129 free(s->hostname_field);
d025f1e4
ZJS
2130
2131 if (s->mmap)
2132 mmap_cache_unref(s->mmap);
2133
3e044c49 2134 udev_unref(s->udev);
d025f1e4 2135}
8580d1f7
LP
2136
2137static const char* const storage_table[_STORAGE_MAX] = {
2138 [STORAGE_AUTO] = "auto",
2139 [STORAGE_VOLATILE] = "volatile",
2140 [STORAGE_PERSISTENT] = "persistent",
2141 [STORAGE_NONE] = "none"
2142};
2143
2144DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
2145DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
2146
2147static const char* const split_mode_table[_SPLIT_MAX] = {
2148 [SPLIT_LOGIN] = "login",
2149 [SPLIT_UID] = "uid",
2150 [SPLIT_NONE] = "none",
2151};
2152
2153DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
2154DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");