]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/journal/journald-server.c
NEWS: option is ProtectKernelTunables not ProtectedKernelTunables (#4451)
[thirdparty/systemd.git] / src / journal / journald-server.c
CommitLineData
d025f1e4
ZJS
1/***
2 This file is part of systemd.
3
4 Copyright 2011 Lennart Poettering
5
6 systemd is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
10
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public License
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
18***/
19
24882e06
LP
20#ifdef HAVE_SELINUX
21#include <selinux/selinux.h>
22#endif
8580d1f7
LP
23#include <sys/ioctl.h>
24#include <sys/mman.h>
25#include <sys/signalfd.h>
26#include <sys/statvfs.h>
07630cea 27#include <linux/sockios.h>
24882e06 28
b4bbcaa9 29#include "libudev.h"
8580d1f7 30#include "sd-daemon.h"
74df0fca
LP
31#include "sd-journal.h"
32#include "sd-messages.h"
8580d1f7
LP
33
34#include "acl-util.h"
b5efdb8a 35#include "alloc-util.h"
430f0182 36#include "audit-util.h"
d025f1e4 37#include "cgroup-util.h"
d025f1e4 38#include "conf-parser.h"
a0956174 39#include "dirent-util.h"
0dec689b 40#include "extract-word.h"
3ffd4af2 41#include "fd-util.h"
33d52ab9 42#include "fileio.h"
958b66ea 43#include "formats-util.h"
f4f15635 44#include "fs-util.h"
8580d1f7 45#include "hashmap.h"
958b66ea 46#include "hostname-util.h"
4b58153d 47#include "id128-util.h"
afc5dbf3 48#include "io-util.h"
8580d1f7
LP
49#include "journal-authenticate.h"
50#include "journal-file.h"
d025f1e4
ZJS
51#include "journal-internal.h"
52#include "journal-vacuum.h"
8580d1f7 53#include "journald-audit.h"
d025f1e4 54#include "journald-kmsg.h"
d025f1e4 55#include "journald-native.h"
8580d1f7 56#include "journald-rate-limit.h"
3ffd4af2 57#include "journald-server.h"
8580d1f7
LP
58#include "journald-stream.h"
59#include "journald-syslog.h"
4b58153d 60#include "log.h"
07630cea
LP
61#include "missing.h"
62#include "mkdir.h"
6bedfcbb 63#include "parse-util.h"
4e731273 64#include "proc-cmdline.h"
07630cea
LP
65#include "process-util.h"
66#include "rm-rf.h"
67#include "selinux-util.h"
68#include "signal-util.h"
69#include "socket-util.h"
32917e33 70#include "stdio-util.h"
8b43440b 71#include "string-table.h"
07630cea 72#include "string-util.h"
4a0b58c4 73#include "user-util.h"
d025f1e4 74
d025f1e4
ZJS
75#define USER_JOURNALS_MAX 1024
76
26687bf8 77#define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
7f1ad696
LP
78#define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
79#define DEFAULT_RATE_LIMIT_BURST 1000
e150e820 80#define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
d025f1e4 81
8580d1f7 82#define RECHECK_SPACE_USEC (30*USEC_PER_SEC)
d025f1e4 83
e22aa3d3
LP
84#define NOTIFY_SNDBUF_SIZE (8*1024*1024)
85
7a24f3bf
VC
86/* The period to insert between posting changes for coalescing */
87#define POST_CHANGE_TIMER_INTERVAL_USEC (250*USEC_PER_MSEC)
88
e0ed6db9
FB
89static int determine_path_usage(Server *s, const char *path, uint64_t *ret_used, uint64_t *ret_free) {
90 _cleanup_closedir_ DIR *d = NULL;
91 struct dirent *de;
92 struct statvfs ss;
e0ed6db9
FB
93
94 assert(ret_used);
95 assert(ret_free);
96
266a4700 97 d = opendir(path);
e0ed6db9
FB
98 if (!d)
99 return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR,
266a4700 100 errno, "Failed to open %s: %m", path);
e0ed6db9
FB
101
102 if (fstatvfs(dirfd(d), &ss) < 0)
266a4700 103 return log_error_errno(errno, "Failed to fstatvfs(%s): %m", path);
e0ed6db9
FB
104
105 *ret_free = ss.f_bsize * ss.f_bavail;
106 *ret_used = 0;
107 FOREACH_DIRENT_ALL(de, d, break) {
108 struct stat st;
109
110 if (!endswith(de->d_name, ".journal") &&
111 !endswith(de->d_name, ".journal~"))
112 continue;
113
114 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) {
266a4700 115 log_debug_errno(errno, "Failed to stat %s/%s, ignoring: %m", path, de->d_name);
e0ed6db9
FB
116 continue;
117 }
118
119 if (!S_ISREG(st.st_mode))
120 continue;
121
122 *ret_used += (uint64_t) st.st_blocks * 512UL;
123 }
124
125 return 0;
126}
127
a0edc477
FB
128static void cache_space_invalidate(JournalStorageSpace *space) {
129 memset(space, 0, sizeof(*space));
130}
131
57f443a6 132static int cache_space_refresh(Server *s, JournalStorage *storage) {
8580d1f7 133
7fd1b19b 134 _cleanup_closedir_ DIR *d = NULL;
23aba343 135 JournalStorageSpace *space;
266a4700 136 JournalMetrics *metrics;
23aba343 137 uint64_t vfs_used, vfs_avail, avail;
d025f1e4 138 usec_t ts;
e0ed6db9 139 int r;
d025f1e4 140
8580d1f7 141 assert(s);
266a4700 142
266a4700 143 metrics = &storage->metrics;
23aba343 144 space = &storage->space;
d025f1e4 145
8580d1f7 146 ts = now(CLOCK_MONOTONIC);
d025f1e4 147
57f443a6 148 if (space->timestamp + RECHECK_SPACE_USEC > ts)
d025f1e4
ZJS
149 return 0;
150
23aba343 151 r = determine_path_usage(s, storage->path, &vfs_used, &vfs_avail);
e0ed6db9
FB
152 if (r < 0)
153 return r;
d025f1e4 154
23aba343
FB
155 space->vfs_used = vfs_used;
156 space->vfs_available = vfs_avail;
157
158 avail = LESS_BY(vfs_avail, metrics->keep_free);
159
23aba343
FB
160 space->limit = MIN(MAX(vfs_used + avail, metrics->min_use), metrics->max_use);
161 space->available = LESS_BY(space->limit, vfs_used);
162 space->timestamp = ts;
8580d1f7
LP
163 return 1;
164}
165
3a19f215
FB
166static void patch_min_use(JournalStorage *storage) {
167 assert(storage);
168
169 /* Let's bump the min_use limit to the current usage on disk. We do
170 * this when starting up and first opening the journal files. This way
171 * sudden spikes in disk usage will not cause journald to vacuum files
172 * without bounds. Note that this means that only a restart of journald
173 * will make it reset this value. */
174
175 storage->metrics.min_use = MAX(storage->metrics.min_use, storage->space.vfs_used);
176}
177
178
179static int determine_space(Server *s, uint64_t *available, uint64_t *limit) {
266a4700 180 JournalStorage *js;
57f443a6 181 int r;
8580d1f7
LP
182
183 assert(s);
184
266a4700 185 js = s->system_journal ? &s->system_storage : &s->runtime_storage;
57f443a6
FB
186
187 r = cache_space_refresh(s, js);
188 if (r >= 0) {
189 if (available)
190 *available = js->space.available;
191 if (limit)
192 *limit = js->space.limit;
193 }
194 return r;
d025f1e4
ZJS
195}
196
cba5629e
FB
197void server_space_usage_message(Server *s, JournalStorage *storage) {
198 char fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
199 fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX], fb6[FORMAT_BYTES_MAX];
200 JournalMetrics *metrics;
cba5629e
FB
201
202 assert(s);
203
204 if (!storage)
205 storage = s->system_journal ? &s->system_storage : &s->runtime_storage;
206
57f443a6 207 if (cache_space_refresh(s, storage) < 0)
cba5629e
FB
208 return;
209
210 metrics = &storage->metrics;
23aba343 211 format_bytes(fb1, sizeof(fb1), storage->space.vfs_used);
cba5629e
FB
212 format_bytes(fb2, sizeof(fb2), metrics->max_use);
213 format_bytes(fb3, sizeof(fb3), metrics->keep_free);
23aba343 214 format_bytes(fb4, sizeof(fb4), storage->space.vfs_available);
cba5629e
FB
215 format_bytes(fb5, sizeof(fb5), storage->space.limit);
216 format_bytes(fb6, sizeof(fb6), storage->space.available);
217
218 server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
219 LOG_MESSAGE("%s (%s) is %s, max %s, %s free.",
220 storage->name, storage->path, fb1, fb5, fb6),
221 "JOURNAL_NAME=%s", storage->name,
222 "JOURNAL_PATH=%s", storage->path,
23aba343 223 "CURRENT_USE=%"PRIu64, storage->space.vfs_used,
cba5629e
FB
224 "CURRENT_USE_PRETTY=%s", fb1,
225 "MAX_USE=%"PRIu64, metrics->max_use,
226 "MAX_USE_PRETTY=%s", fb2,
227 "DISK_KEEP_FREE=%"PRIu64, metrics->keep_free,
228 "DISK_KEEP_FREE_PRETTY=%s", fb3,
23aba343 229 "DISK_AVAILABLE=%"PRIu64, storage->space.vfs_available,
cba5629e
FB
230 "DISK_AVAILABLE_PRETTY=%s", fb4,
231 "LIMIT=%"PRIu64, storage->space.limit,
232 "LIMIT_PRETTY=%s", fb5,
233 "AVAILABLE=%"PRIu64, storage->space.available,
234 "AVAILABLE_PRETTY=%s", fb6,
235 NULL);
236}
237
5c3bde3f 238static void server_add_acls(JournalFile *f, uid_t uid) {
d025f1e4 239#ifdef HAVE_ACL
5c3bde3f 240 int r;
d025f1e4 241#endif
d025f1e4
ZJS
242 assert(f);
243
d025f1e4 244#ifdef HAVE_ACL
34c10968 245 if (uid <= SYSTEM_UID_MAX)
d025f1e4
ZJS
246 return;
247
5c3bde3f
ZJS
248 r = add_acls_for_user(f->fd, uid);
249 if (r < 0)
250 log_warning_errno(r, "Failed to set ACL on %s, ignoring: %m", f->path);
d025f1e4
ZJS
251#endif
252}
253
7a24f3bf
VC
254static int open_journal(
255 Server *s,
256 bool reliably,
257 const char *fname,
258 int flags,
259 bool seal,
260 JournalMetrics *metrics,
7a24f3bf
VC
261 JournalFile **ret) {
262 int r;
e167d7fd 263 JournalFile *f;
7a24f3bf
VC
264
265 assert(s);
266 assert(fname);
267 assert(ret);
268
269 if (reliably)
b58c888f 270 r = journal_file_open_reliably(fname, flags, 0640, s->compress, seal, metrics, s->mmap, s->deferred_closes, NULL, &f);
7a24f3bf 271 else
5d1ce257 272 r = journal_file_open(-1, fname, flags, 0640, s->compress, seal, metrics, s->mmap, s->deferred_closes, NULL, &f);
7a24f3bf
VC
273 if (r < 0)
274 return r;
275
e167d7fd 276 r = journal_file_enable_post_change_timer(f, s->event, POST_CHANGE_TIMER_INTERVAL_USEC);
7a24f3bf 277 if (r < 0) {
69a3a6fd 278 (void) journal_file_close(f);
7a24f3bf
VC
279 return r;
280 }
281
e167d7fd 282 *ret = f;
7a24f3bf
VC
283 return r;
284}
285
6431c7e2
VC
286static bool flushed_flag_is_set(void) {
287 return (access("/run/systemd/journal/flushed", F_OK) >= 0);
288}
289
105bdb46 290static int system_journal_open(Server *s, bool flush_requested) {
929eeb54 291 bool flushed = false;
105bdb46
VC
292 const char *fn;
293 int r = 0;
294
295 if (!s->system_journal &&
296 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
6431c7e2 297 (flush_requested || (flushed = flushed_flag_is_set()))) {
105bdb46
VC
298
299 /* If in auto mode: first try to create the machine
300 * path, but not the prefix.
301 *
302 * If in persistent mode: create /var/log/journal and
303 * the machine path */
304
305 if (s->storage == STORAGE_PERSISTENT)
306 (void) mkdir_p("/var/log/journal/", 0755);
307
266a4700 308 (void) mkdir(s->system_storage.path, 0755);
105bdb46 309
266a4700
FB
310 fn = strjoina(s->system_storage.path, "/system.journal");
311 r = open_journal(s, true, fn, O_RDWR|O_CREAT, s->seal, &s->system_storage.metrics, &s->system_journal);
105bdb46
VC
312 if (r >= 0) {
313 server_add_acls(s->system_journal, 0);
57f443a6 314 (void) cache_space_refresh(s, &s->system_storage);
3a19f215 315 patch_min_use(&s->system_storage);
105bdb46
VC
316 } else if (r < 0) {
317 if (r != -ENOENT && r != -EROFS)
318 log_warning_errno(r, "Failed to open system journal: %m");
319
320 r = 0;
321 }
929eeb54
VC
322
323 /* If the runtime journal is open, and we're post-flush, we're
324 * recovering from a failed system journal rotate (ENOSPC)
325 * for which the runtime journal was reopened.
326 *
327 * Perform an implicit flush to var, leaving the runtime
328 * journal closed, now that the system journal is back.
329 */
330 if (s->runtime_journal && flushed)
331 (void) server_flush_to_var(s);
105bdb46
VC
332 }
333
334 if (!s->runtime_journal &&
335 (s->storage != STORAGE_NONE)) {
336
266a4700 337 fn = strjoina(s->runtime_storage.path, "/system.journal");
105bdb46
VC
338
339 if (s->system_journal) {
340
341 /* Try to open the runtime journal, but only
342 * if it already exists, so that we can flush
343 * it into the system journal */
344
266a4700 345 r = open_journal(s, false, fn, O_RDWR, false, &s->runtime_storage.metrics, &s->runtime_journal);
105bdb46
VC
346 if (r < 0) {
347 if (r != -ENOENT)
348 log_warning_errno(r, "Failed to open runtime journal: %m");
349
350 r = 0;
351 }
352
353 } else {
354
355 /* OK, we really need the runtime journal, so create
356 * it if necessary. */
357
358 (void) mkdir("/run/log", 0755);
359 (void) mkdir("/run/log/journal", 0755);
360 (void) mkdir_parents(fn, 0750);
361
266a4700 362 r = open_journal(s, true, fn, O_RDWR|O_CREAT, false, &s->runtime_storage.metrics, &s->runtime_journal);
105bdb46
VC
363 if (r < 0)
364 return log_error_errno(r, "Failed to open runtime journal: %m");
365 }
366
367 if (s->runtime_journal) {
368 server_add_acls(s->runtime_journal, 0);
57f443a6 369 (void) cache_space_refresh(s, &s->runtime_storage);
3a19f215 370 patch_min_use(&s->runtime_storage);
105bdb46
VC
371 }
372 }
373
374 return r;
375}
376
d025f1e4 377static JournalFile* find_journal(Server *s, uid_t uid) {
ed375beb 378 _cleanup_free_ char *p = NULL;
d025f1e4
ZJS
379 int r;
380 JournalFile *f;
381 sd_id128_t machine;
382
383 assert(s);
384
105bdb46
VC
385 /* A rotate that fails to create the new journal (ENOSPC) leaves the
386 * rotated journal as NULL. Unless we revisit opening, even after
387 * space is made available we'll continue to return NULL indefinitely.
388 *
389 * system_journal_open() is a noop if the journals are already open, so
390 * we can just call it here to recover from failed rotates (or anything
391 * else that's left the journals as NULL).
392 *
393 * Fixes https://github.com/systemd/systemd/issues/3968 */
394 (void) system_journal_open(s, false);
395
d025f1e4
ZJS
396 /* We split up user logs only on /var, not on /run. If the
397 * runtime file is open, we write to it exclusively, in order
398 * to guarantee proper order as soon as we flush /run to
399 * /var and close the runtime file. */
400
401 if (s->runtime_journal)
402 return s->runtime_journal;
403
61755fda 404 if (uid <= SYSTEM_UID_MAX || uid_is_dynamic(uid))
d025f1e4
ZJS
405 return s->system_journal;
406
407 r = sd_id128_get_machine(&machine);
408 if (r < 0)
409 return s->system_journal;
410
4a0b58c4 411 f = ordered_hashmap_get(s->user_journals, UID_TO_PTR(uid));
d025f1e4
ZJS
412 if (f)
413 return f;
414
de0671ee
ZJS
415 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-"UID_FMT".journal",
416 SD_ID128_FORMAT_VAL(machine), uid) < 0)
d025f1e4
ZJS
417 return s->system_journal;
418
43cf8388 419 while (ordered_hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
d025f1e4 420 /* Too many open? Then let's close one */
43cf8388 421 f = ordered_hashmap_steal_first(s->user_journals);
d025f1e4 422 assert(f);
69a3a6fd 423 (void) journal_file_close(f);
d025f1e4
ZJS
424 }
425
266a4700 426 r = open_journal(s, true, p, O_RDWR|O_CREAT, s->seal, &s->system_storage.metrics, &f);
d025f1e4
ZJS
427 if (r < 0)
428 return s->system_journal;
429
5c3bde3f 430 server_add_acls(f, uid);
d025f1e4 431
4a0b58c4 432 r = ordered_hashmap_put(s->user_journals, UID_TO_PTR(uid), f);
d025f1e4 433 if (r < 0) {
69a3a6fd 434 (void) journal_file_close(f);
d025f1e4
ZJS
435 return s->system_journal;
436 }
437
438 return f;
439}
440
ea69bd41
LP
441static int do_rotate(
442 Server *s,
443 JournalFile **f,
444 const char* name,
445 bool seal,
446 uint32_t uid) {
447
fc55baee
ZJS
448 int r;
449 assert(s);
450
451 if (!*f)
452 return -EINVAL;
453
b58c888f 454 r = journal_file_rotate(f, s->compress, seal, s->deferred_closes);
fc55baee
ZJS
455 if (r < 0)
456 if (*f)
ea69bd41 457 log_error_errno(r, "Failed to rotate %s: %m", (*f)->path);
fc55baee 458 else
ea69bd41 459 log_error_errno(r, "Failed to create new %s journal: %m", name);
fc55baee 460 else
5c3bde3f 461 server_add_acls(*f, uid);
2678031a 462
fc55baee
ZJS
463 return r;
464}
465
d025f1e4
ZJS
466void server_rotate(Server *s) {
467 JournalFile *f;
468 void *k;
469 Iterator i;
470 int r;
471
472 log_debug("Rotating...");
473
8580d1f7
LP
474 (void) do_rotate(s, &s->runtime_journal, "runtime", false, 0);
475 (void) do_rotate(s, &s->system_journal, "system", s->seal, 0);
d025f1e4 476
43cf8388 477 ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
4a0b58c4 478 r = do_rotate(s, &f, "user", s->seal, PTR_TO_UID(k));
fc55baee 479 if (r >= 0)
43cf8388 480 ordered_hashmap_replace(s->user_journals, k, f);
fc55baee
ZJS
481 else if (!f)
482 /* Old file has been closed and deallocated */
43cf8388 483 ordered_hashmap_remove(s->user_journals, k);
d025f1e4 484 }
b58c888f
VC
485
486 /* Perform any deferred closes which aren't still offlining. */
487 SET_FOREACH(f, s->deferred_closes, i)
488 if (!journal_file_is_offlining(f)) {
489 (void) set_remove(s->deferred_closes, f);
490 (void) journal_file_close(f);
491 }
d025f1e4
ZJS
492}
493
26687bf8
OS
494void server_sync(Server *s) {
495 JournalFile *f;
26687bf8
OS
496 Iterator i;
497 int r;
498
26687bf8 499 if (s->system_journal) {
ac2e41f5 500 r = journal_file_set_offline(s->system_journal, false);
26687bf8 501 if (r < 0)
65089b82 502 log_warning_errno(r, "Failed to sync system journal, ignoring: %m");
26687bf8
OS
503 }
504
65c1d46b 505 ORDERED_HASHMAP_FOREACH(f, s->user_journals, i) {
ac2e41f5 506 r = journal_file_set_offline(f, false);
26687bf8 507 if (r < 0)
65089b82 508 log_warning_errno(r, "Failed to sync user journal, ignoring: %m");
26687bf8
OS
509 }
510
f9a810be
LP
511 if (s->sync_event_source) {
512 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
513 if (r < 0)
da927ba9 514 log_error_errno(r, "Failed to disable sync timer source: %m");
f9a810be 515 }
26687bf8
OS
516
517 s->sync_scheduled = false;
518}
519
3a19f215 520static void do_vacuum(Server *s, JournalStorage *storage, bool verbose) {
ea69bd41 521
63c8666b
ZJS
522 int r;
523
8580d1f7 524 assert(s);
266a4700 525 assert(storage);
8580d1f7 526
57f443a6 527 (void) cache_space_refresh(s, storage);
18e758bf
FB
528
529 if (verbose)
530 server_space_usage_message(s, storage);
8580d1f7 531
57f443a6
FB
532 r = journal_directory_vacuum(storage->path, storage->space.limit,
533 storage->metrics.n_max_files, s->max_retention_usec,
534 &s->oldest_file_usec, verbose);
63c8666b 535 if (r < 0 && r != -ENOENT)
266a4700
FB
536 log_warning_errno(r, "Failed to vacuum %s, ignoring: %m", storage->path);
537
a0edc477 538 cache_space_invalidate(&storage->space);
63c8666b
ZJS
539}
540
3a19f215 541int server_vacuum(Server *s, bool verbose) {
8580d1f7 542 assert(s);
d025f1e4
ZJS
543
544 log_debug("Vacuuming...");
545
546 s->oldest_file_usec = 0;
547
266a4700 548 if (s->system_journal)
3a19f215 549 do_vacuum(s, &s->system_storage, verbose);
266a4700 550 if (s->runtime_journal)
3a19f215 551 do_vacuum(s, &s->runtime_storage, verbose);
d025f1e4 552
8580d1f7 553 return 0;
d025f1e4
ZJS
554}
555
0c24bb23
LP
556static void server_cache_machine_id(Server *s) {
557 sd_id128_t id;
558 int r;
559
560 assert(s);
561
562 r = sd_id128_get_machine(&id);
563 if (r < 0)
564 return;
565
566 sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
567}
568
569static void server_cache_boot_id(Server *s) {
570 sd_id128_t id;
571 int r;
572
573 assert(s);
574
575 r = sd_id128_get_boot(&id);
576 if (r < 0)
577 return;
578
579 sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
580}
581
582static void server_cache_hostname(Server *s) {
583 _cleanup_free_ char *t = NULL;
584 char *x;
585
586 assert(s);
587
588 t = gethostname_malloc();
589 if (!t)
590 return;
591
592 x = strappend("_HOSTNAME=", t);
593 if (!x)
594 return;
595
596 free(s->hostname_field);
597 s->hostname_field = x;
598}
599
8531ae70 600static bool shall_try_append_again(JournalFile *f, int r) {
6e1045e5 601 switch(r) {
ae739cc1 602
6e1045e5
ZJS
603 case -E2BIG: /* Hit configured limit */
604 case -EFBIG: /* Hit fs limit */
605 case -EDQUOT: /* Quota limit hit */
606 case -ENOSPC: /* Disk full */
d025f1e4 607 log_debug("%s: Allocation limit reached, rotating.", f->path);
6e1045e5 608 return true;
ae739cc1 609
6e1045e5
ZJS
610 case -EIO: /* I/O error of some kind (mmap) */
611 log_warning("%s: IO error, rotating.", f->path);
612 return true;
ae739cc1 613
6e1045e5 614 case -EHOSTDOWN: /* Other machine */
d025f1e4 615 log_info("%s: Journal file from other machine, rotating.", f->path);
6e1045e5 616 return true;
ae739cc1 617
6e1045e5 618 case -EBUSY: /* Unclean shutdown */
d025f1e4 619 log_info("%s: Unclean shutdown, rotating.", f->path);
6e1045e5 620 return true;
ae739cc1 621
6e1045e5 622 case -EPROTONOSUPPORT: /* Unsupported feature */
d025f1e4 623 log_info("%s: Unsupported feature, rotating.", f->path);
6e1045e5 624 return true;
ae739cc1 625
6e1045e5
ZJS
626 case -EBADMSG: /* Corrupted */
627 case -ENODATA: /* Truncated */
628 case -ESHUTDOWN: /* Already archived */
d025f1e4 629 log_warning("%s: Journal file corrupted, rotating.", f->path);
6e1045e5 630 return true;
ae739cc1 631
6e1045e5 632 case -EIDRM: /* Journal file has been deleted */
2678031a 633 log_warning("%s: Journal file has been deleted, rotating.", f->path);
6e1045e5 634 return true;
ae739cc1
LP
635
636 case -ETXTBSY: /* Journal file is from the future */
c1a9199e 637 log_warning("%s: Journal file is from the future, rotating.", f->path);
ae739cc1
LP
638 return true;
639
6e1045e5 640 default:
d025f1e4 641 return false;
6e1045e5 642 }
d025f1e4
ZJS
643}
644
d07f7b9e 645static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
7c070017 646 bool vacuumed = false, rotate = false;
0f972d66 647 struct dual_timestamp ts;
d025f1e4 648 JournalFile *f;
d025f1e4
ZJS
649 int r;
650
651 assert(s);
652 assert(iovec);
653 assert(n > 0);
654
0f972d66
LP
655 /* Get the closest, linearized time we have for this log event from the event loop. (Note that we do not use
656 * the source time, and not even the time the event was originally seen, but instead simply the time we started
657 * processing it, as we want strictly linear ordering in what we write out.) */
658 assert_se(sd_event_now(s->event, CLOCK_REALTIME, &ts.realtime) >= 0);
659 assert_se(sd_event_now(s->event, CLOCK_MONOTONIC, &ts.monotonic) >= 0);
660
7c070017
LP
661 if (ts.realtime < s->last_realtime_clock) {
662 /* When the time jumps backwards, let's immediately rotate. Of course, this should not happen during
663 * regular operation. However, when it does happen, then we should make sure that we start fresh files
664 * to ensure that the entries in the journal files are strictly ordered by time, in order to ensure
665 * bisection works correctly. */
d025f1e4 666
7c070017
LP
667 log_debug("Time jumped backwards, rotating.");
668 rotate = true;
669 } else {
670
671 f = find_journal(s, uid);
672 if (!f)
673 return;
674
675 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
676 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
677 rotate = true;
678 }
679 }
d025f1e4 680
7c070017 681 if (rotate) {
d025f1e4 682 server_rotate(s);
3a19f215 683 server_vacuum(s, false);
d025f1e4
ZJS
684 vacuumed = true;
685
686 f = find_journal(s, uid);
687 if (!f)
688 return;
689 }
690
7c070017
LP
691 s->last_realtime_clock = ts.realtime;
692
0f972d66 693 r = journal_file_append_entry(f, &ts, iovec, n, &s->seqnum, NULL, NULL);
26687bf8 694 if (r >= 0) {
d07f7b9e 695 server_schedule_sync(s, priority);
d025f1e4 696 return;
26687bf8 697 }
d025f1e4
ZJS
698
699 if (vacuumed || !shall_try_append_again(f, r)) {
8266e1c0 700 log_error_errno(r, "Failed to write entry (%d items, %zu bytes), ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
d025f1e4
ZJS
701 return;
702 }
703
704 server_rotate(s);
3a19f215 705 server_vacuum(s, false);
d025f1e4
ZJS
706
707 f = find_journal(s, uid);
708 if (!f)
709 return;
710
711 log_debug("Retrying write.");
0f972d66 712 r = journal_file_append_entry(f, &ts, iovec, n, &s->seqnum, NULL, NULL);
8266e1c0
LP
713 if (r < 0)
714 log_error_errno(r, "Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
715 else
d07f7b9e 716 server_schedule_sync(s, priority);
d025f1e4
ZJS
717}
718
4b58153d
LP
719static int get_invocation_id(const char *cgroup_root, const char *slice, const char *unit, char **ret) {
720 _cleanup_free_ char *escaped = NULL, *slice_path = NULL, *p = NULL;
721 char *copy, ids[SD_ID128_STRING_MAX];
722 int r;
723
724 /* Read the invocation ID of a unit off a unit. It's stored in the "trusted.invocation_id" extended attribute
725 * on the cgroup path. */
726
727 r = cg_slice_to_path(slice, &slice_path);
728 if (r < 0)
729 return r;
730
731 escaped = cg_escape(unit);
732 if (!escaped)
733 return -ENOMEM;
734
735 p = strjoin(cgroup_root, "/", slice_path, "/", escaped, NULL);
736 if (!p)
737 return -ENOMEM;
738
739 r = cg_get_xattr(SYSTEMD_CGROUP_CONTROLLER, p, "trusted.invocation_id", ids, 32);
740 if (r < 0)
741 return r;
742 if (r != 32)
743 return -EINVAL;
744 ids[32] = 0;
745
746 if (!id128_is_valid(ids))
747 return -EINVAL;
748
749 copy = strdup(ids);
750 if (!copy)
751 return -ENOMEM;
752
753 *ret = copy;
754 return 0;
755}
756
d025f1e4
ZJS
757static void dispatch_message_real(
758 Server *s,
759 struct iovec *iovec, unsigned n, unsigned m,
3b3154df
LP
760 const struct ucred *ucred,
761 const struct timeval *tv,
d025f1e4 762 const char *label, size_t label_len,
968f3196 763 const char *unit_id,
d07f7b9e 764 int priority,
968f3196 765 pid_t object_pid) {
d025f1e4 766
968f3196 767 char pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
ae018d9b
LP
768 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
769 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
770 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
d3789917 771 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
968f3196
ZJS
772 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
773 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
774 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
775 uid_t object_uid;
776 gid_t object_gid;
968f3196 777 char *x;
d025f1e4 778 int r;
ae018d9b 779 char *t, *c;
82499507
LP
780 uid_t realuid = 0, owner = 0, journal_uid;
781 bool owner_valid = false;
ae018d9b 782#ifdef HAVE_AUDIT
968f3196
ZJS
783 char audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
784 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
785 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
786 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
ae018d9b
LP
787
788 uint32_t audit;
789 uid_t loginuid;
790#endif
d025f1e4
ZJS
791
792 assert(s);
793 assert(iovec);
794 assert(n > 0);
d473176a 795 assert(n + N_IOVEC_META_FIELDS + (object_pid > 0 ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
d025f1e4
ZJS
796
797 if (ucred) {
d025f1e4
ZJS
798 realuid = ucred->uid;
799
de0671ee 800 sprintf(pid, "_PID="PID_FMT, ucred->pid);
c2457105 801 IOVEC_SET_STRING(iovec[n++], pid);
d025f1e4 802
de0671ee 803 sprintf(uid, "_UID="UID_FMT, ucred->uid);
c2457105 804 IOVEC_SET_STRING(iovec[n++], uid);
d025f1e4 805
de0671ee 806 sprintf(gid, "_GID="GID_FMT, ucred->gid);
c2457105 807 IOVEC_SET_STRING(iovec[n++], gid);
d025f1e4
ZJS
808
809 r = get_process_comm(ucred->pid, &t);
810 if (r >= 0) {
63c372cb 811 x = strjoina("_COMM=", t);
d025f1e4 812 free(t);
968f3196 813 IOVEC_SET_STRING(iovec[n++], x);
d025f1e4
ZJS
814 }
815
816 r = get_process_exe(ucred->pid, &t);
817 if (r >= 0) {
63c372cb 818 x = strjoina("_EXE=", t);
d025f1e4 819 free(t);
968f3196 820 IOVEC_SET_STRING(iovec[n++], x);
d025f1e4
ZJS
821 }
822
9bdbc2e2 823 r = get_process_cmdline(ucred->pid, 0, false, &t);
d025f1e4 824 if (r >= 0) {
63c372cb 825 x = strjoina("_CMDLINE=", t);
d025f1e4 826 free(t);
3a832116
SL
827 IOVEC_SET_STRING(iovec[n++], x);
828 }
829
830 r = get_process_capeff(ucred->pid, &t);
831 if (r >= 0) {
63c372cb 832 x = strjoina("_CAP_EFFECTIVE=", t);
3a832116 833 free(t);
968f3196 834 IOVEC_SET_STRING(iovec[n++], x);
d025f1e4
ZJS
835 }
836
0a20e3c1 837#ifdef HAVE_AUDIT
d025f1e4 838 r = audit_session_from_pid(ucred->pid, &audit);
ae018d9b 839 if (r >= 0) {
de0671ee 840 sprintf(audit_session, "_AUDIT_SESSION=%"PRIu32, audit);
ae018d9b
LP
841 IOVEC_SET_STRING(iovec[n++], audit_session);
842 }
d025f1e4
ZJS
843
844 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
7027ff61 845 if (r >= 0) {
de0671ee 846 sprintf(audit_loginuid, "_AUDIT_LOGINUID="UID_FMT, loginuid);
ae018d9b 847 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
d025f1e4 848 }
ae018d9b 849#endif
d025f1e4 850
e9174f29 851 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &c);
7027ff61 852 if (r >= 0) {
4b58153d 853 _cleanup_free_ char *raw_unit = NULL, *raw_slice = NULL;
968f3196
ZJS
854 char *session = NULL;
855
63c372cb 856 x = strjoina("_SYSTEMD_CGROUP=", c);
968f3196 857 IOVEC_SET_STRING(iovec[n++], x);
d025f1e4 858
ae018d9b
LP
859 r = cg_path_get_session(c, &t);
860 if (r >= 0) {
63c372cb 861 session = strjoina("_SYSTEMD_SESSION=", t);
ae018d9b 862 free(t);
d025f1e4 863 IOVEC_SET_STRING(iovec[n++], session);
ae018d9b
LP
864 }
865
866 if (cg_path_get_owner_uid(c, &owner) >= 0) {
867 owner_valid = true;
d025f1e4 868
de0671ee 869 sprintf(owner_uid, "_SYSTEMD_OWNER_UID="UID_FMT, owner);
d025f1e4 870 IOVEC_SET_STRING(iovec[n++], owner_uid);
ae018d9b 871 }
d025f1e4 872
4b58153d
LP
873 if (cg_path_get_unit(c, &raw_unit) >= 0) {
874 x = strjoina("_SYSTEMD_UNIT=", raw_unit);
19cace37
LP
875 IOVEC_SET_STRING(iovec[n++], x);
876 } else if (unit_id && !session) {
63c372cb 877 x = strjoina("_SYSTEMD_UNIT=", unit_id);
19cace37
LP
878 IOVEC_SET_STRING(iovec[n++], x);
879 }
880
881 if (cg_path_get_user_unit(c, &t) >= 0) {
63c372cb 882 x = strjoina("_SYSTEMD_USER_UNIT=", t);
ae018d9b 883 free(t);
968f3196 884 IOVEC_SET_STRING(iovec[n++], x);
19cace37 885 } else if (unit_id && session) {
63c372cb 886 x = strjoina("_SYSTEMD_USER_UNIT=", unit_id);
19cace37
LP
887 IOVEC_SET_STRING(iovec[n++], x);
888 }
ae018d9b 889
4b58153d
LP
890 if (cg_path_get_slice(c, &raw_slice) >= 0) {
891 x = strjoina("_SYSTEMD_SLICE=", raw_slice);
0a244b8e
LP
892 IOVEC_SET_STRING(iovec[n++], x);
893 }
894
d473176a
LP
895 if (cg_path_get_user_slice(c, &t) >= 0) {
896 x = strjoina("_SYSTEMD_USER_SLICE=", t);
897 free(t);
898 IOVEC_SET_STRING(iovec[n++], x);
899 }
900
4b58153d
LP
901 if (raw_slice && raw_unit) {
902 if (get_invocation_id(s->cgroup_root, raw_slice, raw_unit, &t) >= 0) {
903 x = strjoina("_SYSTEMD_INVOCATION_ID=", t);
904 free(t);
905 IOVEC_SET_STRING(iovec[n++], x);
906 }
907 }
908
ae018d9b 909 free(c);
2d43b190 910 } else if (unit_id) {
63c372cb 911 x = strjoina("_SYSTEMD_UNIT=", unit_id);
2d43b190 912 IOVEC_SET_STRING(iovec[n++], x);
ef1673d1 913 }
d025f1e4 914
d025f1e4 915#ifdef HAVE_SELINUX
6355e756 916 if (mac_selinux_have()) {
d682b3a7 917 if (label) {
f8294e41 918 x = alloca(strlen("_SELINUX_CONTEXT=") + label_len + 1);
ae018d9b 919
d682b3a7
LP
920 *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
921 IOVEC_SET_STRING(iovec[n++], x);
922 } else {
2ed96880 923 char *con;
d025f1e4 924
d682b3a7 925 if (getpidcon(ucred->pid, &con) >= 0) {
63c372cb 926 x = strjoina("_SELINUX_CONTEXT=", con);
e7ff4e7f 927
d682b3a7
LP
928 freecon(con);
929 IOVEC_SET_STRING(iovec[n++], x);
930 }
d025f1e4
ZJS
931 }
932 }
933#endif
934 }
968f3196
ZJS
935 assert(n <= m);
936
937 if (object_pid) {
938 r = get_process_uid(object_pid, &object_uid);
939 if (r >= 0) {
de0671ee 940 sprintf(o_uid, "OBJECT_UID="UID_FMT, object_uid);
968f3196
ZJS
941 IOVEC_SET_STRING(iovec[n++], o_uid);
942 }
943
944 r = get_process_gid(object_pid, &object_gid);
945 if (r >= 0) {
de0671ee 946 sprintf(o_gid, "OBJECT_GID="GID_FMT, object_gid);
968f3196
ZJS
947 IOVEC_SET_STRING(iovec[n++], o_gid);
948 }
949
950 r = get_process_comm(object_pid, &t);
951 if (r >= 0) {
63c372cb 952 x = strjoina("OBJECT_COMM=", t);
968f3196
ZJS
953 free(t);
954 IOVEC_SET_STRING(iovec[n++], x);
955 }
956
957 r = get_process_exe(object_pid, &t);
958 if (r >= 0) {
63c372cb 959 x = strjoina("OBJECT_EXE=", t);
968f3196
ZJS
960 free(t);
961 IOVEC_SET_STRING(iovec[n++], x);
962 }
963
964 r = get_process_cmdline(object_pid, 0, false, &t);
965 if (r >= 0) {
63c372cb 966 x = strjoina("OBJECT_CMDLINE=", t);
968f3196
ZJS
967 free(t);
968 IOVEC_SET_STRING(iovec[n++], x);
969 }
970
971#ifdef HAVE_AUDIT
972 r = audit_session_from_pid(object_pid, &audit);
973 if (r >= 0) {
de0671ee 974 sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%"PRIu32, audit);
968f3196
ZJS
975 IOVEC_SET_STRING(iovec[n++], o_audit_session);
976 }
977
978 r = audit_loginuid_from_pid(object_pid, &loginuid);
979 if (r >= 0) {
de0671ee 980 sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID="UID_FMT, loginuid);
968f3196
ZJS
981 IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
982 }
983#endif
984
e9174f29 985 r = cg_pid_get_path_shifted(object_pid, s->cgroup_root, &c);
968f3196 986 if (r >= 0) {
63c372cb 987 x = strjoina("OBJECT_SYSTEMD_CGROUP=", c);
968f3196
ZJS
988 IOVEC_SET_STRING(iovec[n++], x);
989
990 r = cg_path_get_session(c, &t);
991 if (r >= 0) {
63c372cb 992 x = strjoina("OBJECT_SYSTEMD_SESSION=", t);
968f3196
ZJS
993 free(t);
994 IOVEC_SET_STRING(iovec[n++], x);
995 }
996
997 if (cg_path_get_owner_uid(c, &owner) >= 0) {
de0671ee 998 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID="UID_FMT, owner);
968f3196
ZJS
999 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
1000 }
1001
1002 if (cg_path_get_unit(c, &t) >= 0) {
63c372cb 1003 x = strjoina("OBJECT_SYSTEMD_UNIT=", t);
968f3196 1004 free(t);
19cace37
LP
1005 IOVEC_SET_STRING(iovec[n++], x);
1006 }
1007
1008 if (cg_path_get_user_unit(c, &t) >= 0) {
63c372cb 1009 x = strjoina("OBJECT_SYSTEMD_USER_UNIT=", t);
968f3196 1010 free(t);
968f3196 1011 IOVEC_SET_STRING(iovec[n++], x);
19cace37 1012 }
968f3196 1013
d473176a
LP
1014 if (cg_path_get_slice(c, &t) >= 0) {
1015 x = strjoina("OBJECT_SYSTEMD_SLICE=", t);
1016 free(t);
1017 IOVEC_SET_STRING(iovec[n++], x);
1018 }
1019
1020 if (cg_path_get_user_slice(c, &t) >= 0) {
1021 x = strjoina("OBJECT_SYSTEMD_USER_SLICE=", t);
1022 free(t);
1023 IOVEC_SET_STRING(iovec[n++], x);
1024 }
1025
968f3196
ZJS
1026 free(c);
1027 }
1028 }
1029 assert(n <= m);
d025f1e4
ZJS
1030
1031 if (tv) {
398a50cd 1032 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=" USEC_FMT, timeval_load(tv));
a5693989 1033 IOVEC_SET_STRING(iovec[n++], source_time);
d025f1e4
ZJS
1034 }
1035
1036 /* Note that strictly speaking storing the boot id here is
1037 * redundant since the entry includes this in-line
1038 * anyway. However, we need this indexed, too. */
0c24bb23
LP
1039 if (!isempty(s->boot_id_field))
1040 IOVEC_SET_STRING(iovec[n++], s->boot_id_field);
d025f1e4 1041
0c24bb23
LP
1042 if (!isempty(s->machine_id_field))
1043 IOVEC_SET_STRING(iovec[n++], s->machine_id_field);
d025f1e4 1044
0c24bb23
LP
1045 if (!isempty(s->hostname_field))
1046 IOVEC_SET_STRING(iovec[n++], s->hostname_field);
d025f1e4
ZJS
1047
1048 assert(n <= m);
1049
da499392 1050 if (s->split_mode == SPLIT_UID && realuid > 0)
40adcda8 1051 /* Split up strictly by any UID */
759c945a 1052 journal_uid = realuid;
82499507 1053 else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
edc3797f
LP
1054 /* Split up by login UIDs. We do this only if the
1055 * realuid is not root, in order not to accidentally
1056 * leak privileged information to the user that is
1057 * logged by a privileged process that is part of an
7517e174 1058 * unprivileged session. */
8a0889df 1059 journal_uid = owner;
da499392
KS
1060 else
1061 journal_uid = 0;
759c945a 1062
d07f7b9e 1063 write_to_journal(s, journal_uid, iovec, n, priority);
d025f1e4
ZJS
1064}
1065
1066void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
1067 char mid[11 + 32 + 1];
8a03c9ef
ZJS
1068 struct iovec iovec[N_IOVEC_META_FIELDS + 5 + N_IOVEC_PAYLOAD_FIELDS];
1069 unsigned n = 0, m;
32917e33 1070 int r;
d025f1e4 1071 va_list ap;
b92bea5d 1072 struct ucred ucred = {};
d025f1e4
ZJS
1073
1074 assert(s);
1075 assert(format);
1076
4850d39a 1077 assert_cc(3 == LOG_FAC(LOG_DAEMON));
b6fa2555
EV
1078 IOVEC_SET_STRING(iovec[n++], "SYSLOG_FACILITY=3");
1079 IOVEC_SET_STRING(iovec[n++], "SYSLOG_IDENTIFIER=systemd-journald");
1080
d025f1e4 1081 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
4850d39a 1082 assert_cc(6 == LOG_INFO);
32917e33 1083 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
d025f1e4 1084
3bbaff3e 1085 if (!sd_id128_is_null(message_id)) {
e2cc6eca 1086 snprintf(mid, sizeof(mid), LOG_MESSAGE_ID(message_id));
d025f1e4
ZJS
1087 IOVEC_SET_STRING(iovec[n++], mid);
1088 }
1089
8a03c9ef
ZJS
1090 m = n;
1091
1092 va_start(ap, format);
32917e33
ZJS
1093 r = log_format_iovec(iovec, ELEMENTSOF(iovec), &n, false, 0, format, ap);
1094 /* Error handling below */
8a03c9ef
ZJS
1095 va_end(ap);
1096
d025f1e4
ZJS
1097 ucred.pid = getpid();
1098 ucred.uid = getuid();
1099 ucred.gid = getgid();
1100
32917e33
ZJS
1101 if (r >= 0)
1102 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
8a03c9ef
ZJS
1103
1104 while (m < n)
1105 free(iovec[m++].iov_base);
32917e33
ZJS
1106
1107 if (r < 0) {
1108 /* We failed to format the message. Emit a warning instead. */
1109 char buf[LINE_MAX];
1110
1111 xsprintf(buf, "MESSAGE=Entry printing failed: %s", strerror(-r));
1112
1113 n = 3;
1114 IOVEC_SET_STRING(iovec[n++], "PRIORITY=4");
1115 IOVEC_SET_STRING(iovec[n++], buf);
1116 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
1117 }
d025f1e4
ZJS
1118}
1119
1120void server_dispatch_message(
1121 Server *s,
1122 struct iovec *iovec, unsigned n, unsigned m,
3b3154df
LP
1123 const struct ucred *ucred,
1124 const struct timeval *tv,
d025f1e4
ZJS
1125 const char *label, size_t label_len,
1126 const char *unit_id,
968f3196
ZJS
1127 int priority,
1128 pid_t object_pid) {
d025f1e4 1129
7027ff61 1130 int rl, r;
7fd1b19b 1131 _cleanup_free_ char *path = NULL;
8580d1f7 1132 uint64_t available = 0;
db91ea32 1133 char *c;
d025f1e4
ZJS
1134
1135 assert(s);
1136 assert(iovec || n == 0);
1137
1138 if (n == 0)
1139 return;
1140
1141 if (LOG_PRI(priority) > s->max_level_store)
1142 return;
1143
2f5df74a
HHPF
1144 /* Stop early in case the information will not be stored
1145 * in a journal. */
1146 if (s->storage == STORAGE_NONE)
1147 return;
1148
d025f1e4
ZJS
1149 if (!ucred)
1150 goto finish;
1151
e9174f29 1152 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &path);
7027ff61 1153 if (r < 0)
d025f1e4
ZJS
1154 goto finish;
1155
1156 /* example: /user/lennart/3/foobar
1157 * /system/dbus.service/foobar
1158 *
1159 * So let's cut of everything past the third /, since that is
1160 * where user directories start */
1161
1162 c = strchr(path, '/');
1163 if (c) {
1164 c = strchr(c+1, '/');
1165 if (c) {
1166 c = strchr(c+1, '/');
1167 if (c)
1168 *c = 0;
1169 }
1170 }
1171
3a19f215 1172 (void) determine_space(s, &available, NULL);
8580d1f7 1173 rl = journal_rate_limit_test(s->rate_limit, path, priority & LOG_PRIMASK, available);
db91ea32 1174 if (rl == 0)
d025f1e4 1175 return;
d025f1e4
ZJS
1176
1177 /* Write a suppression message if we suppressed something */
1178 if (rl > 1)
db91ea32 1179 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
8a03c9ef
ZJS
1180 LOG_MESSAGE("Suppressed %u messages from %s", rl - 1, path),
1181 NULL);
d025f1e4
ZJS
1182
1183finish:
d07f7b9e 1184 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, priority, object_pid);
d025f1e4
ZJS
1185}
1186
d025f1e4 1187int server_flush_to_var(Server *s) {
d025f1e4
ZJS
1188 sd_id128_t machine;
1189 sd_journal *j = NULL;
fbb63411
LP
1190 char ts[FORMAT_TIMESPAN_MAX];
1191 usec_t start;
1192 unsigned n = 0;
1193 int r;
d025f1e4
ZJS
1194
1195 assert(s);
1196
1197 if (s->storage != STORAGE_AUTO &&
1198 s->storage != STORAGE_PERSISTENT)
1199 return 0;
1200
1201 if (!s->runtime_journal)
1202 return 0;
1203
8580d1f7 1204 (void) system_journal_open(s, true);
d025f1e4
ZJS
1205
1206 if (!s->system_journal)
1207 return 0;
1208
1209 log_debug("Flushing to /var...");
1210
fbb63411
LP
1211 start = now(CLOCK_MONOTONIC);
1212
d025f1e4 1213 r = sd_id128_get_machine(&machine);
00a16861 1214 if (r < 0)
d025f1e4 1215 return r;
d025f1e4
ZJS
1216
1217 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
23bbb0de
MS
1218 if (r < 0)
1219 return log_error_errno(r, "Failed to read runtime journal: %m");
d025f1e4 1220
93b73b06
LP
1221 sd_journal_set_data_threshold(j, 0);
1222
d025f1e4
ZJS
1223 SD_JOURNAL_FOREACH(j) {
1224 Object *o = NULL;
1225 JournalFile *f;
1226
1227 f = j->current_file;
1228 assert(f && f->current_offset > 0);
1229
fbb63411
LP
1230 n++;
1231
d025f1e4
ZJS
1232 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1233 if (r < 0) {
da927ba9 1234 log_error_errno(r, "Can't read entry: %m");
d025f1e4
ZJS
1235 goto finish;
1236 }
1237
1238 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1239 if (r >= 0)
1240 continue;
1241
1242 if (!shall_try_append_again(s->system_journal, r)) {
da927ba9 1243 log_error_errno(r, "Can't write entry: %m");
d025f1e4
ZJS
1244 goto finish;
1245 }
1246
1247 server_rotate(s);
3a19f215 1248 server_vacuum(s, false);
d025f1e4 1249
253f59df
LP
1250 if (!s->system_journal) {
1251 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1252 r = -EIO;
1253 goto finish;
1254 }
1255
d025f1e4
ZJS
1256 log_debug("Retrying write.");
1257 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1258 if (r < 0) {
da927ba9 1259 log_error_errno(r, "Can't write entry: %m");
d025f1e4
ZJS
1260 goto finish;
1261 }
1262 }
1263
804ae586
LP
1264 r = 0;
1265
d025f1e4
ZJS
1266finish:
1267 journal_file_post_change(s->system_journal);
1268
804ae586 1269 s->runtime_journal = journal_file_close(s->runtime_journal);
d025f1e4
ZJS
1270
1271 if (r >= 0)
c6878637 1272 (void) rm_rf("/run/log/journal", REMOVE_ROOT);
d025f1e4 1273
763c7aa2 1274 sd_journal_close(j);
d025f1e4 1275
8a03c9ef
ZJS
1276 server_driver_message(s, SD_ID128_NULL,
1277 LOG_MESSAGE("Time spent on flushing to /var is %s for %u entries.",
1278 format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0),
1279 n),
1280 NULL);
fbb63411 1281
d025f1e4
ZJS
1282 return r;
1283}
1284
8531ae70 1285int server_process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
f9a810be 1286 Server *s = userdata;
a315ac4e
LP
1287 struct ucred *ucred = NULL;
1288 struct timeval *tv = NULL;
1289 struct cmsghdr *cmsg;
1290 char *label = NULL;
1291 size_t label_len = 0, m;
1292 struct iovec iovec;
1293 ssize_t n;
1294 int *fds = NULL, v = 0;
1295 unsigned n_fds = 0;
1296
1297 union {
1298 struct cmsghdr cmsghdr;
1299
1300 /* We use NAME_MAX space for the SELinux label
1301 * here. The kernel currently enforces no
1302 * limit, but according to suggestions from
1303 * the SELinux people this will change and it
1304 * will probably be identical to NAME_MAX. For
1305 * now we use that, but this should be updated
1306 * one day when the final limit is known. */
1307 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1308 CMSG_SPACE(sizeof(struct timeval)) +
1309 CMSG_SPACE(sizeof(int)) + /* fd */
1310 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1311 } control = {};
1312
1313 union sockaddr_union sa = {};
1314
1315 struct msghdr msghdr = {
1316 .msg_iov = &iovec,
1317 .msg_iovlen = 1,
1318 .msg_control = &control,
1319 .msg_controllen = sizeof(control),
1320 .msg_name = &sa,
1321 .msg_namelen = sizeof(sa),
1322 };
f9a810be 1323
d025f1e4 1324 assert(s);
875c2e22 1325 assert(fd == s->native_fd || fd == s->syslog_fd || fd == s->audit_fd);
f9a810be
LP
1326
1327 if (revents != EPOLLIN) {
1328 log_error("Got invalid event from epoll for datagram fd: %"PRIx32, revents);
1329 return -EIO;
1330 }
1331
a315ac4e
LP
1332 /* Try to get the right size, if we can. (Not all
1333 * sockets support SIOCINQ, hence we just try, but
1334 * don't rely on it. */
1335 (void) ioctl(fd, SIOCINQ, &v);
d025f1e4 1336
a315ac4e
LP
1337 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1338 m = PAGE_ALIGN(MAX3((size_t) v + 1,
1339 (size_t) LINE_MAX,
1340 ALIGN(sizeof(struct nlmsghdr)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH)) + 1);
d025f1e4 1341
a315ac4e
LP
1342 if (!GREEDY_REALLOC(s->buffer, s->buffer_size, m))
1343 return log_oom();
875c2e22 1344
a315ac4e
LP
1345 iovec.iov_base = s->buffer;
1346 iovec.iov_len = s->buffer_size - 1; /* Leave room for trailing NUL we add later */
d025f1e4 1347
a315ac4e
LP
1348 n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1349 if (n < 0) {
1350 if (errno == EINTR || errno == EAGAIN)
1351 return 0;
875c2e22 1352
a315ac4e
LP
1353 return log_error_errno(errno, "recvmsg() failed: %m");
1354 }
875c2e22 1355
a315ac4e
LP
1356 CMSG_FOREACH(cmsg, &msghdr) {
1357
1358 if (cmsg->cmsg_level == SOL_SOCKET &&
1359 cmsg->cmsg_type == SCM_CREDENTIALS &&
1360 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1361 ucred = (struct ucred*) CMSG_DATA(cmsg);
1362 else if (cmsg->cmsg_level == SOL_SOCKET &&
1363 cmsg->cmsg_type == SCM_SECURITY) {
1364 label = (char*) CMSG_DATA(cmsg);
1365 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1366 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1367 cmsg->cmsg_type == SO_TIMESTAMP &&
1368 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1369 tv = (struct timeval*) CMSG_DATA(cmsg);
1370 else if (cmsg->cmsg_level == SOL_SOCKET &&
1371 cmsg->cmsg_type == SCM_RIGHTS) {
1372 fds = (int*) CMSG_DATA(cmsg);
1373 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
d025f1e4 1374 }
a315ac4e 1375 }
d025f1e4 1376
a315ac4e
LP
1377 /* And a trailing NUL, just in case */
1378 s->buffer[n] = 0;
1379
1380 if (fd == s->syslog_fd) {
1381 if (n > 0 && n_fds == 0)
1382 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1383 else if (n_fds > 0)
1384 log_warning("Got file descriptors via syslog socket. Ignoring.");
1385
1386 } else if (fd == s->native_fd) {
1387 if (n > 0 && n_fds == 0)
1388 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1389 else if (n == 0 && n_fds == 1)
1390 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1391 else if (n_fds > 0)
1392 log_warning("Got too many file descriptors via native socket. Ignoring.");
1393
1394 } else {
1395 assert(fd == s->audit_fd);
1396
1397 if (n > 0 && n_fds == 0)
1398 server_process_audit_message(s, s->buffer, n, ucred, &sa, msghdr.msg_namelen);
1399 else if (n_fds > 0)
1400 log_warning("Got file descriptors via audit socket. Ignoring.");
f9a810be 1401 }
a315ac4e
LP
1402
1403 close_many(fds, n_fds);
1404 return 0;
f9a810be 1405}
d025f1e4 1406
f9a810be
LP
1407static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1408 Server *s = userdata;
33d52ab9 1409 int r;
d025f1e4 1410
f9a810be 1411 assert(s);
d025f1e4 1412
94b65516 1413 log_info("Received request to flush runtime journal from PID " PID_FMT, si->ssi_pid);
d025f1e4 1414
929eeb54 1415 (void) server_flush_to_var(s);
f9a810be 1416 server_sync(s);
3a19f215 1417 server_vacuum(s, false);
d025f1e4 1418
33d52ab9
LP
1419 r = touch("/run/systemd/journal/flushed");
1420 if (r < 0)
1421 log_warning_errno(r, "Failed to touch /run/systemd/journal/flushed, ignoring: %m");
74055aa7 1422
18e758bf 1423 server_space_usage_message(s, NULL);
f9a810be
LP
1424 return 0;
1425}
d025f1e4 1426
f9a810be
LP
1427static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1428 Server *s = userdata;
33d52ab9 1429 int r;
d025f1e4 1430
f9a810be 1431 assert(s);
d025f1e4 1432
94b65516 1433 log_info("Received request to rotate journal from PID " PID_FMT, si->ssi_pid);
f9a810be 1434 server_rotate(s);
3a19f215
FB
1435 server_vacuum(s, true);
1436
1437 if (s->system_journal)
1438 patch_min_use(&s->system_storage);
1439 if (s->runtime_journal)
1440 patch_min_use(&s->runtime_storage);
d025f1e4 1441
dbd6e31c 1442 /* Let clients know when the most recent rotation happened. */
33d52ab9
LP
1443 r = write_timestamp_file_atomic("/run/systemd/journal/rotated", now(CLOCK_MONOTONIC));
1444 if (r < 0)
1445 log_warning_errno(r, "Failed to write /run/systemd/journal/rotated, ignoring: %m");
dbd6e31c 1446
f9a810be
LP
1447 return 0;
1448}
d025f1e4 1449
f9a810be
LP
1450static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1451 Server *s = userdata;
d025f1e4 1452
f9a810be 1453 assert(s);
d025f1e4 1454
4daf54a8 1455 log_received_signal(LOG_INFO, si);
d025f1e4 1456
6203e07a 1457 sd_event_exit(s->event, 0);
d025f1e4
ZJS
1458 return 0;
1459}
1460
94b65516
LP
1461static int dispatch_sigrtmin1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1462 Server *s = userdata;
33d52ab9 1463 int r;
94b65516
LP
1464
1465 assert(s);
1466
1467 log_debug("Received request to sync from PID " PID_FMT, si->ssi_pid);
1468
1469 server_sync(s);
1470
1471 /* Let clients know when the most recent sync happened. */
33d52ab9
LP
1472 r = write_timestamp_file_atomic("/run/systemd/journal/synced", now(CLOCK_MONOTONIC));
1473 if (r < 0)
1474 log_warning_errno(r, "Failed to write /run/systemd/journal/synced, ignoring: %m");
94b65516
LP
1475
1476 return 0;
1477}
1478
f9a810be 1479static int setup_signals(Server *s) {
f9a810be 1480 int r;
d025f1e4
ZJS
1481
1482 assert(s);
1483
94b65516 1484 assert(sigprocmask_many(SIG_SETMASK, NULL, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, SIGRTMIN+1, -1) >= 0);
d025f1e4 1485
151b9b96 1486 r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
f9a810be
LP
1487 if (r < 0)
1488 return r;
1489
151b9b96 1490 r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s);
f9a810be
LP
1491 if (r < 0)
1492 return r;
d025f1e4 1493
151b9b96 1494 r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s);
f9a810be
LP
1495 if (r < 0)
1496 return r;
d025f1e4 1497
b374689c
LP
1498 /* Let's process SIGTERM late, so that we flush all queued
1499 * messages to disk before we exit */
1500 r = sd_event_source_set_priority(s->sigterm_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1501 if (r < 0)
1502 return r;
1503
1504 /* When journald is invoked on the terminal (when debugging),
1505 * it's useful if C-c is handled equivalent to SIGTERM. */
151b9b96 1506 r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s);
f9a810be
LP
1507 if (r < 0)
1508 return r;
d025f1e4 1509
b374689c
LP
1510 r = sd_event_source_set_priority(s->sigint_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1511 if (r < 0)
1512 return r;
1513
94b65516
LP
1514 /* SIGRTMIN+1 causes an immediate sync. We process this very
1515 * late, so that everything else queued at this point is
1516 * really written to disk. Clients can watch
1517 * /run/systemd/journal/synced with inotify until its mtime
1518 * changes to see when a sync happened. */
1519 r = sd_event_add_signal(s->event, &s->sigrtmin1_event_source, SIGRTMIN+1, dispatch_sigrtmin1, s);
1520 if (r < 0)
1521 return r;
1522
1523 r = sd_event_source_set_priority(s->sigrtmin1_event_source, SD_EVENT_PRIORITY_NORMAL+15);
1524 if (r < 0)
1525 return r;
1526
d025f1e4
ZJS
1527 return 0;
1528}
1529
1530static int server_parse_proc_cmdline(Server *s) {
7fd1b19b 1531 _cleanup_free_ char *line = NULL;
d581d9d9 1532 const char *p;
74df0fca 1533 int r;
d025f1e4 1534
74df0fca 1535 r = proc_cmdline(&line);
b5884878 1536 if (r < 0) {
da927ba9 1537 log_warning_errno(r, "Failed to read /proc/cmdline, ignoring: %m");
d025f1e4 1538 return 0;
b5884878 1539 }
d025f1e4 1540
d581d9d9 1541 p = line;
9ed794a3 1542 for (;;) {
ff82c36c 1543 _cleanup_free_ char *word = NULL;
d025f1e4 1544
d581d9d9
SS
1545 r = extract_first_word(&p, &word, NULL, 0);
1546 if (r < 0)
1547 return log_error_errno(r, "Failed to parse journald syntax \"%s\": %m", line);
1548
1549 if (r == 0)
1550 break;
d025f1e4
ZJS
1551
1552 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1553 r = parse_boolean(word + 35);
1554 if (r < 0)
1555 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1556 else
1557 s->forward_to_syslog = r;
1558 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1559 r = parse_boolean(word + 33);
1560 if (r < 0)
1561 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1562 else
1563 s->forward_to_kmsg = r;
1564 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1565 r = parse_boolean(word + 36);
1566 if (r < 0)
1567 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1568 else
1569 s->forward_to_console = r;
40b71e89
ST
1570 } else if (startswith(word, "systemd.journald.forward_to_wall=")) {
1571 r = parse_boolean(word + 33);
1572 if (r < 0)
1573 log_warning("Failed to parse forward to wall switch %s. Ignoring.", word + 33);
1574 else
1575 s->forward_to_wall = r;
d025f1e4
ZJS
1576 } else if (startswith(word, "systemd.journald"))
1577 log_warning("Invalid systemd.journald parameter. Ignoring.");
d025f1e4
ZJS
1578 }
1579
804ae586 1580 /* do not warn about state here, since probably systemd already did */
db91ea32 1581 return 0;
d025f1e4
ZJS
1582}
1583
1584static int server_parse_config_file(Server *s) {
d025f1e4
ZJS
1585 assert(s);
1586
43688c49 1587 return config_parse_many_nulstr(PKGSYSCONFDIR "/journald.conf",
75eb6154 1588 CONF_PATHS_NULSTR("systemd/journald.conf.d"),
a9edaeff
JT
1589 "Journal\0",
1590 config_item_perf_lookup, journald_gperf_lookup,
1591 false, s);
d025f1e4
ZJS
1592}
1593
f9a810be
LP
1594static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
1595 Server *s = userdata;
26687bf8
OS
1596
1597 assert(s);
1598
f9a810be 1599 server_sync(s);
26687bf8
OS
1600 return 0;
1601}
1602
d07f7b9e 1603int server_schedule_sync(Server *s, int priority) {
26687bf8
OS
1604 int r;
1605
26687bf8
OS
1606 assert(s);
1607
d07f7b9e
LP
1608 if (priority <= LOG_CRIT) {
1609 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1610 server_sync(s);
1611 return 0;
1612 }
1613
26687bf8
OS
1614 if (s->sync_scheduled)
1615 return 0;
1616
f9a810be
LP
1617 if (s->sync_interval_usec > 0) {
1618 usec_t when;
ca267016 1619
6a0f1f6d 1620 r = sd_event_now(s->event, CLOCK_MONOTONIC, &when);
f9a810be
LP
1621 if (r < 0)
1622 return r;
26687bf8 1623
f9a810be
LP
1624 when += s->sync_interval_usec;
1625
1626 if (!s->sync_event_source) {
6a0f1f6d
LP
1627 r = sd_event_add_time(
1628 s->event,
1629 &s->sync_event_source,
1630 CLOCK_MONOTONIC,
1631 when, 0,
1632 server_dispatch_sync, s);
f9a810be
LP
1633 if (r < 0)
1634 return r;
1635
1636 r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
1637 } else {
1638 r = sd_event_source_set_time(s->sync_event_source, when);
1639 if (r < 0)
1640 return r;
1641
1642 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
1643 }
26687bf8 1644 if (r < 0)
f9a810be 1645 return r;
26687bf8 1646
f9a810be
LP
1647 s->sync_scheduled = true;
1648 }
26687bf8
OS
1649
1650 return 0;
1651}
1652
0c24bb23
LP
1653static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1654 Server *s = userdata;
1655
1656 assert(s);
1657
1658 server_cache_hostname(s);
1659 return 0;
1660}
1661
1662static int server_open_hostname(Server *s) {
1663 int r;
1664
1665 assert(s);
1666
1667 s->hostname_fd = open("/proc/sys/kernel/hostname", O_RDONLY|O_CLOEXEC|O_NDELAY|O_NOCTTY);
4a62c710
MS
1668 if (s->hostname_fd < 0)
1669 return log_error_errno(errno, "Failed to open /proc/sys/kernel/hostname: %m");
0c24bb23 1670
151b9b96 1671 r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s);
0c24bb23 1672 if (r < 0) {
28def94c
DR
1673 /* kernels prior to 3.2 don't support polling this file. Ignore
1674 * the failure. */
1675 if (r == -EPERM) {
e53fc357 1676 log_warning_errno(r, "Failed to register hostname fd in event loop, ignoring: %m");
03e334a1 1677 s->hostname_fd = safe_close(s->hostname_fd);
28def94c
DR
1678 return 0;
1679 }
1680
23bbb0de 1681 return log_error_errno(r, "Failed to register hostname fd in event loop: %m");
0c24bb23
LP
1682 }
1683
1684 r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
23bbb0de
MS
1685 if (r < 0)
1686 return log_error_errno(r, "Failed to adjust priority of host name event source: %m");
0c24bb23
LP
1687
1688 return 0;
1689}
1690
e22aa3d3
LP
1691static int dispatch_notify_event(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1692 Server *s = userdata;
1693 int r;
1694
1695 assert(s);
1696 assert(s->notify_event_source == es);
1697 assert(s->notify_fd == fd);
1698
e22aa3d3 1699 /* The $NOTIFY_SOCKET is writable again, now send exactly one
dd835265 1700 * message on it. Either it's the watchdog event, the initial
119e9655
LP
1701 * READY=1 event or an stdout stream event. If there's nothing
1702 * to write anymore, turn our event source off. The next time
1703 * there's something to send it will be turned on again. */
e22aa3d3
LP
1704
1705 if (!s->sent_notify_ready) {
1706 static const char p[] =
1707 "READY=1\n"
1708 "STATUS=Processing requests...";
1709 ssize_t l;
1710
1711 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1712 if (l < 0) {
1713 if (errno == EAGAIN)
1714 return 0;
1715
1716 return log_error_errno(errno, "Failed to send READY=1 notification message: %m");
1717 }
1718
1719 s->sent_notify_ready = true;
1720 log_debug("Sent READY=1 notification.");
1721
119e9655
LP
1722 } else if (s->send_watchdog) {
1723
1724 static const char p[] =
1725 "WATCHDOG=1";
1726
1727 ssize_t l;
1728
1729 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1730 if (l < 0) {
1731 if (errno == EAGAIN)
1732 return 0;
1733
1734 return log_error_errno(errno, "Failed to send WATCHDOG=1 notification message: %m");
1735 }
1736
1737 s->send_watchdog = false;
1738 log_debug("Sent WATCHDOG=1 notification.");
1739
e22aa3d3
LP
1740 } else if (s->stdout_streams_notify_queue)
1741 /* Dispatch one stream notification event */
1742 stdout_stream_send_notify(s->stdout_streams_notify_queue);
1743
61233823 1744 /* Leave us enabled if there's still more to do. */
119e9655 1745 if (s->send_watchdog || s->stdout_streams_notify_queue)
e22aa3d3
LP
1746 return 0;
1747
1748 /* There was nothing to do anymore, let's turn ourselves off. */
1749 r = sd_event_source_set_enabled(es, SD_EVENT_OFF);
1750 if (r < 0)
1751 return log_error_errno(r, "Failed to turn off notify event source: %m");
1752
1753 return 0;
1754}
1755
119e9655
LP
1756static int dispatch_watchdog(sd_event_source *es, uint64_t usec, void *userdata) {
1757 Server *s = userdata;
1758 int r;
1759
1760 assert(s);
1761
1762 s->send_watchdog = true;
1763
1764 r = sd_event_source_set_enabled(s->notify_event_source, SD_EVENT_ON);
1765 if (r < 0)
1766 log_warning_errno(r, "Failed to turn on notify event source: %m");
1767
1768 r = sd_event_source_set_time(s->watchdog_event_source, usec + s->watchdog_usec / 2);
1769 if (r < 0)
1770 return log_error_errno(r, "Failed to restart watchdog event source: %m");
1771
1772 r = sd_event_source_set_enabled(s->watchdog_event_source, SD_EVENT_ON);
1773 if (r < 0)
1774 return log_error_errno(r, "Failed to enable watchdog event source: %m");
1775
1776 return 0;
1777}
1778
e22aa3d3
LP
1779static int server_connect_notify(Server *s) {
1780 union sockaddr_union sa = {
1781 .un.sun_family = AF_UNIX,
1782 };
1783 const char *e;
1784 int r;
1785
1786 assert(s);
1787 assert(s->notify_fd < 0);
1788 assert(!s->notify_event_source);
1789
1790 /*
1791 So here's the problem: we'd like to send notification
1792 messages to PID 1, but we cannot do that via sd_notify(),
1793 since that's synchronous, and we might end up blocking on
1794 it. Specifically: given that PID 1 might block on
1795 dbus-daemon during IPC, and dbus-daemon is logging to us,
1796 and might hence block on us, we might end up in a deadlock
ccddd104 1797 if we block on sending PID 1 notification messages — by
e22aa3d3
LP
1798 generating a full blocking circle. To avoid this, let's
1799 create a non-blocking socket, and connect it to the
1800 notification socket, and then wait for POLLOUT before we
1801 send anything. This should efficiently avoid any deadlocks,
1802 as we'll never block on PID 1, hence PID 1 can safely block
1803 on dbus-daemon which can safely block on us again.
1804
1805 Don't think that this issue is real? It is, see:
1806 https://github.com/systemd/systemd/issues/1505
1807 */
1808
1809 e = getenv("NOTIFY_SOCKET");
1810 if (!e)
1811 return 0;
1812
1813 if ((e[0] != '@' && e[0] != '/') || e[1] == 0) {
1814 log_error("NOTIFY_SOCKET set to an invalid value: %s", e);
1815 return -EINVAL;
1816 }
1817
1818 if (strlen(e) > sizeof(sa.un.sun_path)) {
1819 log_error("NOTIFY_SOCKET path too long: %s", e);
1820 return -EINVAL;
1821 }
1822
1823 s->notify_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
1824 if (s->notify_fd < 0)
1825 return log_error_errno(errno, "Failed to create notify socket: %m");
1826
1827 (void) fd_inc_sndbuf(s->notify_fd, NOTIFY_SNDBUF_SIZE);
1828
1829 strncpy(sa.un.sun_path, e, sizeof(sa.un.sun_path));
1830 if (sa.un.sun_path[0] == '@')
1831 sa.un.sun_path[0] = 0;
1832
fc2fffe7 1833 r = connect(s->notify_fd, &sa.sa, SOCKADDR_UN_LEN(sa.un));
e22aa3d3
LP
1834 if (r < 0)
1835 return log_error_errno(errno, "Failed to connect to notify socket: %m");
1836
1837 r = sd_event_add_io(s->event, &s->notify_event_source, s->notify_fd, EPOLLOUT, dispatch_notify_event, s);
1838 if (r < 0)
1839 return log_error_errno(r, "Failed to watch notification socket: %m");
1840
119e9655
LP
1841 if (sd_watchdog_enabled(false, &s->watchdog_usec) > 0) {
1842 s->send_watchdog = true;
1843
4de2402b 1844 r = sd_event_add_time(s->event, &s->watchdog_event_source, CLOCK_MONOTONIC, now(CLOCK_MONOTONIC) + s->watchdog_usec/2, s->watchdog_usec/4, dispatch_watchdog, s);
119e9655
LP
1845 if (r < 0)
1846 return log_error_errno(r, "Failed to add watchdog time event: %m");
1847 }
1848
e22aa3d3
LP
1849 /* This should fire pretty soon, which we'll use to send the
1850 * READY=1 event. */
1851
1852 return 0;
1853}
1854
d025f1e4 1855int server_init(Server *s) {
13790add 1856 _cleanup_fdset_free_ FDSet *fds = NULL;
d025f1e4 1857 int n, r, fd;
7d18d348 1858 bool no_sockets;
d025f1e4
ZJS
1859
1860 assert(s);
1861
1862 zero(*s);
e22aa3d3 1863 s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->audit_fd = s->hostname_fd = s->notify_fd = -1;
d025f1e4
ZJS
1864 s->compress = true;
1865 s->seal = true;
1866
119e9655
LP
1867 s->watchdog_usec = USEC_INFINITY;
1868
26687bf8
OS
1869 s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1870 s->sync_scheduled = false;
1871
d025f1e4
ZJS
1872 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1873 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1874
40b71e89 1875 s->forward_to_wall = true;
d025f1e4 1876
e150e820
MB
1877 s->max_file_usec = DEFAULT_MAX_FILE_USEC;
1878
d025f1e4
ZJS
1879 s->max_level_store = LOG_DEBUG;
1880 s->max_level_syslog = LOG_DEBUG;
1881 s->max_level_kmsg = LOG_NOTICE;
1882 s->max_level_console = LOG_INFO;
40b71e89 1883 s->max_level_wall = LOG_EMERG;
d025f1e4 1884
266a4700
FB
1885 journal_reset_metrics(&s->system_storage.metrics);
1886 journal_reset_metrics(&s->runtime_storage.metrics);
d025f1e4
ZJS
1887
1888 server_parse_config_file(s);
1889 server_parse_proc_cmdline(s);
8580d1f7 1890
d288f79f 1891 if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
b1389b0d
ZJS
1892 log_debug("Setting both rate limit interval and burst from "USEC_FMT",%u to 0,0",
1893 s->rate_limit_interval, s->rate_limit_burst);
d288f79f
ZJS
1894 s->rate_limit_interval = s->rate_limit_burst = 0;
1895 }
d025f1e4 1896
8580d1f7 1897 (void) mkdir_p("/run/systemd/journal", 0755);
d025f1e4 1898
43cf8388 1899 s->user_journals = ordered_hashmap_new(NULL);
d025f1e4
ZJS
1900 if (!s->user_journals)
1901 return log_oom();
1902
1903 s->mmap = mmap_cache_new();
1904 if (!s->mmap)
1905 return log_oom();
1906
b58c888f
VC
1907 s->deferred_closes = set_new(NULL);
1908 if (!s->deferred_closes)
1909 return log_oom();
1910
f9a810be 1911 r = sd_event_default(&s->event);
23bbb0de
MS
1912 if (r < 0)
1913 return log_error_errno(r, "Failed to create event loop: %m");
d025f1e4
ZJS
1914
1915 n = sd_listen_fds(true);
23bbb0de
MS
1916 if (n < 0)
1917 return log_error_errno(n, "Failed to read listening file descriptors from environment: %m");
d025f1e4
ZJS
1918
1919 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1920
1921 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1922
1923 if (s->native_fd >= 0) {
1924 log_error("Too many native sockets passed.");
1925 return -EINVAL;
1926 }
1927
1928 s->native_fd = fd;
1929
1930 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1931
1932 if (s->stdout_fd >= 0) {
1933 log_error("Too many stdout sockets passed.");
1934 return -EINVAL;
1935 }
1936
1937 s->stdout_fd = fd;
1938
03ee5c38
LP
1939 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0 ||
1940 sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/dev-log", 0) > 0) {
d025f1e4
ZJS
1941
1942 if (s->syslog_fd >= 0) {
1943 log_error("Too many /dev/log sockets passed.");
1944 return -EINVAL;
1945 }
1946
1947 s->syslog_fd = fd;
1948
875c2e22
LP
1949 } else if (sd_is_socket(fd, AF_NETLINK, SOCK_RAW, -1) > 0) {
1950
1951 if (s->audit_fd >= 0) {
1952 log_error("Too many audit sockets passed.");
1953 return -EINVAL;
1954 }
1955
1956 s->audit_fd = fd;
1957
4ec3cd73 1958 } else {
4ec3cd73 1959
13790add
LP
1960 if (!fds) {
1961 fds = fdset_new();
1962 if (!fds)
1963 return log_oom();
1964 }
4ec3cd73 1965
13790add
LP
1966 r = fdset_put(fds, fd);
1967 if (r < 0)
1968 return log_oom();
4ec3cd73 1969 }
d025f1e4
ZJS
1970 }
1971
15d91bff
ZJS
1972 /* Try to restore streams, but don't bother if this fails */
1973 (void) server_restore_streams(s, fds);
d025f1e4 1974
13790add
LP
1975 if (fdset_size(fds) > 0) {
1976 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds));
1977 fds = fdset_free(fds);
1978 }
1979
7d18d348
ZJS
1980 no_sockets = s->native_fd < 0 && s->stdout_fd < 0 && s->syslog_fd < 0 && s->audit_fd < 0;
1981
1982 /* always open stdout, syslog, native, and kmsg sockets */
37b7affe
ZJS
1983
1984 /* systemd-journald.socket: /run/systemd/journal/stdout */
15d91bff
ZJS
1985 r = server_open_stdout_socket(s);
1986 if (r < 0)
1987 return r;
1988
37b7affe 1989 /* systemd-journald-dev-log.socket: /run/systemd/journal/dev-log */
13790add 1990 r = server_open_syslog_socket(s);
d025f1e4
ZJS
1991 if (r < 0)
1992 return r;
1993
37b7affe 1994 /* systemd-journald.socket: /run/systemd/journal/socket */
13790add 1995 r = server_open_native_socket(s);
d025f1e4
ZJS
1996 if (r < 0)
1997 return r;
1998
37b7affe 1999 /* /dev/ksmg */
d025f1e4
ZJS
2000 r = server_open_dev_kmsg(s);
2001 if (r < 0)
2002 return r;
2003
7d18d348
ZJS
2004 /* Unless we got *some* sockets and not audit, open audit socket */
2005 if (s->audit_fd >= 0 || no_sockets) {
2006 r = server_open_audit(s);
2007 if (r < 0)
2008 return r;
2009 }
875c2e22 2010
d025f1e4
ZJS
2011 r = server_open_kernel_seqnum(s);
2012 if (r < 0)
2013 return r;
2014
0c24bb23
LP
2015 r = server_open_hostname(s);
2016 if (r < 0)
2017 return r;
2018
f9a810be 2019 r = setup_signals(s);
d025f1e4
ZJS
2020 if (r < 0)
2021 return r;
2022
2023 s->udev = udev_new();
2024 if (!s->udev)
2025 return -ENOMEM;
2026
f9a810be 2027 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
d025f1e4
ZJS
2028 if (!s->rate_limit)
2029 return -ENOMEM;
2030
e9174f29
LP
2031 r = cg_get_root_path(&s->cgroup_root);
2032 if (r < 0)
2033 return r;
2034
0c24bb23
LP
2035 server_cache_hostname(s);
2036 server_cache_boot_id(s);
2037 server_cache_machine_id(s);
2038
266a4700
FB
2039 s->runtime_storage.name = "Runtime journal";
2040 s->system_storage.name = "System journal";
2041
2042 s->runtime_storage.path = strjoin("/run/log/journal/", SERVER_MACHINE_ID(s), NULL);
2043 s->system_storage.path = strjoin("/var/log/journal/", SERVER_MACHINE_ID(s), NULL);
2044 if (!s->runtime_storage.path || !s->system_storage.path)
2045 return -ENOMEM;
2046
e22aa3d3
LP
2047 (void) server_connect_notify(s);
2048
804ae586 2049 return system_journal_open(s, false);
d025f1e4
ZJS
2050}
2051
2052void server_maybe_append_tags(Server *s) {
2053#ifdef HAVE_GCRYPT
2054 JournalFile *f;
2055 Iterator i;
2056 usec_t n;
2057
2058 n = now(CLOCK_REALTIME);
2059
2060 if (s->system_journal)
2061 journal_file_maybe_append_tag(s->system_journal, n);
2062
43cf8388 2063 ORDERED_HASHMAP_FOREACH(f, s->user_journals, i)
d025f1e4
ZJS
2064 journal_file_maybe_append_tag(f, n);
2065#endif
2066}
2067
2068void server_done(Server *s) {
2069 JournalFile *f;
2070 assert(s);
2071
b58c888f
VC
2072 if (s->deferred_closes) {
2073 journal_file_close_set(s->deferred_closes);
2074 set_free(s->deferred_closes);
2075 }
2076
d025f1e4
ZJS
2077 while (s->stdout_streams)
2078 stdout_stream_free(s->stdout_streams);
2079
2080 if (s->system_journal)
69a3a6fd 2081 (void) journal_file_close(s->system_journal);
d025f1e4
ZJS
2082
2083 if (s->runtime_journal)
69a3a6fd 2084 (void) journal_file_close(s->runtime_journal);
d025f1e4 2085
43cf8388 2086 while ((f = ordered_hashmap_steal_first(s->user_journals)))
69a3a6fd 2087 (void) journal_file_close(f);
d025f1e4 2088
43cf8388 2089 ordered_hashmap_free(s->user_journals);
d025f1e4 2090
f9a810be
LP
2091 sd_event_source_unref(s->syslog_event_source);
2092 sd_event_source_unref(s->native_event_source);
2093 sd_event_source_unref(s->stdout_event_source);
2094 sd_event_source_unref(s->dev_kmsg_event_source);
875c2e22 2095 sd_event_source_unref(s->audit_event_source);
f9a810be
LP
2096 sd_event_source_unref(s->sync_event_source);
2097 sd_event_source_unref(s->sigusr1_event_source);
2098 sd_event_source_unref(s->sigusr2_event_source);
2099 sd_event_source_unref(s->sigterm_event_source);
2100 sd_event_source_unref(s->sigint_event_source);
94b65516 2101 sd_event_source_unref(s->sigrtmin1_event_source);
0c24bb23 2102 sd_event_source_unref(s->hostname_event_source);
e22aa3d3 2103 sd_event_source_unref(s->notify_event_source);
119e9655 2104 sd_event_source_unref(s->watchdog_event_source);
f9a810be 2105 sd_event_unref(s->event);
d025f1e4 2106
03e334a1
LP
2107 safe_close(s->syslog_fd);
2108 safe_close(s->native_fd);
2109 safe_close(s->stdout_fd);
2110 safe_close(s->dev_kmsg_fd);
875c2e22 2111 safe_close(s->audit_fd);
03e334a1 2112 safe_close(s->hostname_fd);
e22aa3d3 2113 safe_close(s->notify_fd);
0c24bb23 2114
d025f1e4
ZJS
2115 if (s->rate_limit)
2116 journal_rate_limit_free(s->rate_limit);
2117
2118 if (s->kernel_seqnum)
2119 munmap(s->kernel_seqnum, sizeof(uint64_t));
2120
2121 free(s->buffer);
2122 free(s->tty_path);
e9174f29 2123 free(s->cgroup_root);
99d0966e 2124 free(s->hostname_field);
d025f1e4
ZJS
2125
2126 if (s->mmap)
2127 mmap_cache_unref(s->mmap);
2128
3e044c49 2129 udev_unref(s->udev);
d025f1e4 2130}
8580d1f7
LP
2131
2132static const char* const storage_table[_STORAGE_MAX] = {
2133 [STORAGE_AUTO] = "auto",
2134 [STORAGE_VOLATILE] = "volatile",
2135 [STORAGE_PERSISTENT] = "persistent",
2136 [STORAGE_NONE] = "none"
2137};
2138
2139DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
2140DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
2141
2142static const char* const split_mode_table[_SPLIT_MAX] = {
2143 [SPLIT_LOGIN] = "login",
2144 [SPLIT_UID] = "uid",
2145 [SPLIT_NONE] = "none",
2146};
2147
2148DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
2149DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");