]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/journal/journald-server.c
build-sys: use #if Y instead of #ifdef Y everywhere
[thirdparty/systemd.git] / src / journal / journald-server.c
CommitLineData
d025f1e4
ZJS
1/***
2 This file is part of systemd.
3
4 Copyright 2011 Lennart Poettering
5
6 systemd is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
10
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public License
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
18***/
19
349cc4a5 20#if HAVE_SELINUX
24882e06
LP
21#include <selinux/selinux.h>
22#endif
8580d1f7
LP
23#include <sys/ioctl.h>
24#include <sys/mman.h>
25#include <sys/signalfd.h>
26#include <sys/statvfs.h>
07630cea 27#include <linux/sockios.h>
24882e06 28
b4bbcaa9 29#include "libudev.h"
8580d1f7 30#include "sd-daemon.h"
74df0fca
LP
31#include "sd-journal.h"
32#include "sd-messages.h"
8580d1f7
LP
33
34#include "acl-util.h"
b5efdb8a 35#include "alloc-util.h"
430f0182 36#include "audit-util.h"
d025f1e4 37#include "cgroup-util.h"
d025f1e4 38#include "conf-parser.h"
a0956174 39#include "dirent-util.h"
0dec689b 40#include "extract-word.h"
3ffd4af2 41#include "fd-util.h"
33d52ab9 42#include "fileio.h"
f97b34a6 43#include "format-util.h"
f4f15635 44#include "fs-util.h"
8580d1f7 45#include "hashmap.h"
958b66ea 46#include "hostname-util.h"
4b58153d 47#include "id128-util.h"
afc5dbf3 48#include "io-util.h"
8580d1f7
LP
49#include "journal-authenticate.h"
50#include "journal-file.h"
d025f1e4
ZJS
51#include "journal-internal.h"
52#include "journal-vacuum.h"
8580d1f7 53#include "journald-audit.h"
22e3a02b 54#include "journald-context.h"
d025f1e4 55#include "journald-kmsg.h"
d025f1e4 56#include "journald-native.h"
8580d1f7 57#include "journald-rate-limit.h"
3ffd4af2 58#include "journald-server.h"
8580d1f7
LP
59#include "journald-stream.h"
60#include "journald-syslog.h"
4b58153d 61#include "log.h"
07630cea
LP
62#include "missing.h"
63#include "mkdir.h"
6bedfcbb 64#include "parse-util.h"
4e731273 65#include "proc-cmdline.h"
07630cea
LP
66#include "process-util.h"
67#include "rm-rf.h"
68#include "selinux-util.h"
69#include "signal-util.h"
70#include "socket-util.h"
32917e33 71#include "stdio-util.h"
8b43440b 72#include "string-table.h"
07630cea 73#include "string-util.h"
863a5610 74#include "syslog-util.h"
22e3a02b 75#include "user-util.h"
d025f1e4 76
d025f1e4
ZJS
77#define USER_JOURNALS_MAX 1024
78
26687bf8 79#define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
7f1ad696
LP
80#define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
81#define DEFAULT_RATE_LIMIT_BURST 1000
e150e820 82#define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
d025f1e4 83
8580d1f7 84#define RECHECK_SPACE_USEC (30*USEC_PER_SEC)
d025f1e4 85
e22aa3d3
LP
86#define NOTIFY_SNDBUF_SIZE (8*1024*1024)
87
7a24f3bf
VC
88/* The period to insert between posting changes for coalescing */
89#define POST_CHANGE_TIMER_INTERVAL_USEC (250*USEC_PER_MSEC)
90
ec20fe5f
LP
91/* Pick a good default that is likely to fit into AF_UNIX and AF_INET SOCK_DGRAM datagrams, and even leaves some room
92 * for a bit of additional metadata. */
93#define DEFAULT_LINE_MAX (48*1024)
94
e0ed6db9
FB
95static int determine_path_usage(Server *s, const char *path, uint64_t *ret_used, uint64_t *ret_free) {
96 _cleanup_closedir_ DIR *d = NULL;
97 struct dirent *de;
98 struct statvfs ss;
e0ed6db9
FB
99
100 assert(ret_used);
101 assert(ret_free);
102
266a4700 103 d = opendir(path);
e0ed6db9
FB
104 if (!d)
105 return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR,
266a4700 106 errno, "Failed to open %s: %m", path);
e0ed6db9
FB
107
108 if (fstatvfs(dirfd(d), &ss) < 0)
266a4700 109 return log_error_errno(errno, "Failed to fstatvfs(%s): %m", path);
e0ed6db9
FB
110
111 *ret_free = ss.f_bsize * ss.f_bavail;
112 *ret_used = 0;
113 FOREACH_DIRENT_ALL(de, d, break) {
114 struct stat st;
115
116 if (!endswith(de->d_name, ".journal") &&
117 !endswith(de->d_name, ".journal~"))
118 continue;
119
120 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) {
266a4700 121 log_debug_errno(errno, "Failed to stat %s/%s, ignoring: %m", path, de->d_name);
e0ed6db9
FB
122 continue;
123 }
124
125 if (!S_ISREG(st.st_mode))
126 continue;
127
128 *ret_used += (uint64_t) st.st_blocks * 512UL;
129 }
130
131 return 0;
132}
133
a0edc477
FB
134static void cache_space_invalidate(JournalStorageSpace *space) {
135 memset(space, 0, sizeof(*space));
136}
137
57f443a6 138static int cache_space_refresh(Server *s, JournalStorage *storage) {
23aba343 139 JournalStorageSpace *space;
266a4700 140 JournalMetrics *metrics;
23aba343 141 uint64_t vfs_used, vfs_avail, avail;
d025f1e4 142 usec_t ts;
e0ed6db9 143 int r;
d025f1e4 144
8580d1f7 145 assert(s);
266a4700 146
266a4700 147 metrics = &storage->metrics;
23aba343 148 space = &storage->space;
d025f1e4 149
8580d1f7 150 ts = now(CLOCK_MONOTONIC);
d025f1e4 151
3099caf2 152 if (space->timestamp != 0 && space->timestamp + RECHECK_SPACE_USEC > ts)
d025f1e4
ZJS
153 return 0;
154
23aba343 155 r = determine_path_usage(s, storage->path, &vfs_used, &vfs_avail);
e0ed6db9
FB
156 if (r < 0)
157 return r;
d025f1e4 158
23aba343
FB
159 space->vfs_used = vfs_used;
160 space->vfs_available = vfs_avail;
161
162 avail = LESS_BY(vfs_avail, metrics->keep_free);
163
23aba343
FB
164 space->limit = MIN(MAX(vfs_used + avail, metrics->min_use), metrics->max_use);
165 space->available = LESS_BY(space->limit, vfs_used);
166 space->timestamp = ts;
8580d1f7
LP
167 return 1;
168}
169
3a19f215
FB
170static void patch_min_use(JournalStorage *storage) {
171 assert(storage);
172
173 /* Let's bump the min_use limit to the current usage on disk. We do
174 * this when starting up and first opening the journal files. This way
175 * sudden spikes in disk usage will not cause journald to vacuum files
176 * without bounds. Note that this means that only a restart of journald
177 * will make it reset this value. */
178
179 storage->metrics.min_use = MAX(storage->metrics.min_use, storage->space.vfs_used);
180}
181
182
183static int determine_space(Server *s, uint64_t *available, uint64_t *limit) {
266a4700 184 JournalStorage *js;
57f443a6 185 int r;
8580d1f7
LP
186
187 assert(s);
188
266a4700 189 js = s->system_journal ? &s->system_storage : &s->runtime_storage;
57f443a6
FB
190
191 r = cache_space_refresh(s, js);
192 if (r >= 0) {
193 if (available)
194 *available = js->space.available;
195 if (limit)
196 *limit = js->space.limit;
197 }
198 return r;
d025f1e4
ZJS
199}
200
cba5629e
FB
201void server_space_usage_message(Server *s, JournalStorage *storage) {
202 char fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
203 fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX], fb6[FORMAT_BYTES_MAX];
204 JournalMetrics *metrics;
cba5629e
FB
205
206 assert(s);
207
208 if (!storage)
209 storage = s->system_journal ? &s->system_storage : &s->runtime_storage;
210
57f443a6 211 if (cache_space_refresh(s, storage) < 0)
cba5629e
FB
212 return;
213
214 metrics = &storage->metrics;
23aba343 215 format_bytes(fb1, sizeof(fb1), storage->space.vfs_used);
cba5629e
FB
216 format_bytes(fb2, sizeof(fb2), metrics->max_use);
217 format_bytes(fb3, sizeof(fb3), metrics->keep_free);
23aba343 218 format_bytes(fb4, sizeof(fb4), storage->space.vfs_available);
cba5629e
FB
219 format_bytes(fb5, sizeof(fb5), storage->space.limit);
220 format_bytes(fb6, sizeof(fb6), storage->space.available);
221
2b044526 222 server_driver_message(s, "MESSAGE_ID=" SD_MESSAGE_JOURNAL_USAGE_STR,
cba5629e
FB
223 LOG_MESSAGE("%s (%s) is %s, max %s, %s free.",
224 storage->name, storage->path, fb1, fb5, fb6),
225 "JOURNAL_NAME=%s", storage->name,
226 "JOURNAL_PATH=%s", storage->path,
23aba343 227 "CURRENT_USE=%"PRIu64, storage->space.vfs_used,
cba5629e
FB
228 "CURRENT_USE_PRETTY=%s", fb1,
229 "MAX_USE=%"PRIu64, metrics->max_use,
230 "MAX_USE_PRETTY=%s", fb2,
231 "DISK_KEEP_FREE=%"PRIu64, metrics->keep_free,
232 "DISK_KEEP_FREE_PRETTY=%s", fb3,
23aba343 233 "DISK_AVAILABLE=%"PRIu64, storage->space.vfs_available,
cba5629e
FB
234 "DISK_AVAILABLE_PRETTY=%s", fb4,
235 "LIMIT=%"PRIu64, storage->space.limit,
236 "LIMIT_PRETTY=%s", fb5,
237 "AVAILABLE=%"PRIu64, storage->space.available,
238 "AVAILABLE_PRETTY=%s", fb6,
239 NULL);
240}
241
5c3bde3f 242static void server_add_acls(JournalFile *f, uid_t uid) {
349cc4a5 243#if HAVE_ACL
5c3bde3f 244 int r;
d025f1e4 245#endif
d025f1e4
ZJS
246 assert(f);
247
349cc4a5 248#if HAVE_ACL
34c10968 249 if (uid <= SYSTEM_UID_MAX)
d025f1e4
ZJS
250 return;
251
5c3bde3f
ZJS
252 r = add_acls_for_user(f->fd, uid);
253 if (r < 0)
254 log_warning_errno(r, "Failed to set ACL on %s, ignoring: %m", f->path);
d025f1e4
ZJS
255#endif
256}
257
7a24f3bf
VC
258static int open_journal(
259 Server *s,
260 bool reliably,
261 const char *fname,
262 int flags,
263 bool seal,
264 JournalMetrics *metrics,
7a24f3bf
VC
265 JournalFile **ret) {
266 int r;
e167d7fd 267 JournalFile *f;
7a24f3bf
VC
268
269 assert(s);
270 assert(fname);
271 assert(ret);
272
273 if (reliably)
b58c888f 274 r = journal_file_open_reliably(fname, flags, 0640, s->compress, seal, metrics, s->mmap, s->deferred_closes, NULL, &f);
7a24f3bf 275 else
5d1ce257 276 r = journal_file_open(-1, fname, flags, 0640, s->compress, seal, metrics, s->mmap, s->deferred_closes, NULL, &f);
7a24f3bf
VC
277 if (r < 0)
278 return r;
279
e167d7fd 280 r = journal_file_enable_post_change_timer(f, s->event, POST_CHANGE_TIMER_INTERVAL_USEC);
7a24f3bf 281 if (r < 0) {
69a3a6fd 282 (void) journal_file_close(f);
7a24f3bf
VC
283 return r;
284 }
285
e167d7fd 286 *ret = f;
7a24f3bf
VC
287 return r;
288}
289
6431c7e2 290static bool flushed_flag_is_set(void) {
f78273c8 291 return access("/run/systemd/journal/flushed", F_OK) >= 0;
6431c7e2
VC
292}
293
105bdb46
VC
294static int system_journal_open(Server *s, bool flush_requested) {
295 const char *fn;
296 int r = 0;
297
298 if (!s->system_journal &&
f78273c8
LP
299 IN_SET(s->storage, STORAGE_PERSISTENT, STORAGE_AUTO) &&
300 (flush_requested || flushed_flag_is_set())) {
105bdb46
VC
301
302 /* If in auto mode: first try to create the machine
303 * path, but not the prefix.
304 *
305 * If in persistent mode: create /var/log/journal and
306 * the machine path */
307
308 if (s->storage == STORAGE_PERSISTENT)
309 (void) mkdir_p("/var/log/journal/", 0755);
310
266a4700 311 (void) mkdir(s->system_storage.path, 0755);
105bdb46 312
266a4700
FB
313 fn = strjoina(s->system_storage.path, "/system.journal");
314 r = open_journal(s, true, fn, O_RDWR|O_CREAT, s->seal, &s->system_storage.metrics, &s->system_journal);
105bdb46
VC
315 if (r >= 0) {
316 server_add_acls(s->system_journal, 0);
57f443a6 317 (void) cache_space_refresh(s, &s->system_storage);
3a19f215 318 patch_min_use(&s->system_storage);
105bdb46
VC
319 } else if (r < 0) {
320 if (r != -ENOENT && r != -EROFS)
321 log_warning_errno(r, "Failed to open system journal: %m");
322
323 r = 0;
324 }
929eeb54
VC
325
326 /* If the runtime journal is open, and we're post-flush, we're
327 * recovering from a failed system journal rotate (ENOSPC)
328 * for which the runtime journal was reopened.
329 *
330 * Perform an implicit flush to var, leaving the runtime
331 * journal closed, now that the system journal is back.
332 */
f78273c8
LP
333 if (!flush_requested)
334 (void) server_flush_to_var(s, true);
105bdb46
VC
335 }
336
337 if (!s->runtime_journal &&
338 (s->storage != STORAGE_NONE)) {
339
266a4700 340 fn = strjoina(s->runtime_storage.path, "/system.journal");
105bdb46
VC
341
342 if (s->system_journal) {
343
344 /* Try to open the runtime journal, but only
345 * if it already exists, so that we can flush
346 * it into the system journal */
347
266a4700 348 r = open_journal(s, false, fn, O_RDWR, false, &s->runtime_storage.metrics, &s->runtime_journal);
105bdb46
VC
349 if (r < 0) {
350 if (r != -ENOENT)
351 log_warning_errno(r, "Failed to open runtime journal: %m");
352
353 r = 0;
354 }
355
356 } else {
357
358 /* OK, we really need the runtime journal, so create
359 * it if necessary. */
360
361 (void) mkdir("/run/log", 0755);
362 (void) mkdir("/run/log/journal", 0755);
363 (void) mkdir_parents(fn, 0750);
364
266a4700 365 r = open_journal(s, true, fn, O_RDWR|O_CREAT, false, &s->runtime_storage.metrics, &s->runtime_journal);
105bdb46
VC
366 if (r < 0)
367 return log_error_errno(r, "Failed to open runtime journal: %m");
368 }
369
370 if (s->runtime_journal) {
371 server_add_acls(s->runtime_journal, 0);
57f443a6 372 (void) cache_space_refresh(s, &s->runtime_storage);
3a19f215 373 patch_min_use(&s->runtime_storage);
105bdb46
VC
374 }
375 }
376
377 return r;
378}
379
d025f1e4 380static JournalFile* find_journal(Server *s, uid_t uid) {
ed375beb 381 _cleanup_free_ char *p = NULL;
d025f1e4
ZJS
382 int r;
383 JournalFile *f;
384 sd_id128_t machine;
385
386 assert(s);
387
105bdb46
VC
388 /* A rotate that fails to create the new journal (ENOSPC) leaves the
389 * rotated journal as NULL. Unless we revisit opening, even after
390 * space is made available we'll continue to return NULL indefinitely.
391 *
392 * system_journal_open() is a noop if the journals are already open, so
393 * we can just call it here to recover from failed rotates (or anything
394 * else that's left the journals as NULL).
395 *
396 * Fixes https://github.com/systemd/systemd/issues/3968 */
397 (void) system_journal_open(s, false);
398
d025f1e4
ZJS
399 /* We split up user logs only on /var, not on /run. If the
400 * runtime file is open, we write to it exclusively, in order
401 * to guarantee proper order as soon as we flush /run to
402 * /var and close the runtime file. */
403
404 if (s->runtime_journal)
405 return s->runtime_journal;
406
61755fda 407 if (uid <= SYSTEM_UID_MAX || uid_is_dynamic(uid))
d025f1e4
ZJS
408 return s->system_journal;
409
410 r = sd_id128_get_machine(&machine);
411 if (r < 0)
412 return s->system_journal;
413
4a0b58c4 414 f = ordered_hashmap_get(s->user_journals, UID_TO_PTR(uid));
d025f1e4
ZJS
415 if (f)
416 return f;
417
de0671ee
ZJS
418 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-"UID_FMT".journal",
419 SD_ID128_FORMAT_VAL(machine), uid) < 0)
d025f1e4
ZJS
420 return s->system_journal;
421
43cf8388 422 while (ordered_hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
d025f1e4 423 /* Too many open? Then let's close one */
43cf8388 424 f = ordered_hashmap_steal_first(s->user_journals);
d025f1e4 425 assert(f);
69a3a6fd 426 (void) journal_file_close(f);
d025f1e4
ZJS
427 }
428
266a4700 429 r = open_journal(s, true, p, O_RDWR|O_CREAT, s->seal, &s->system_storage.metrics, &f);
d025f1e4
ZJS
430 if (r < 0)
431 return s->system_journal;
432
5c3bde3f 433 server_add_acls(f, uid);
d025f1e4 434
4a0b58c4 435 r = ordered_hashmap_put(s->user_journals, UID_TO_PTR(uid), f);
d025f1e4 436 if (r < 0) {
69a3a6fd 437 (void) journal_file_close(f);
d025f1e4
ZJS
438 return s->system_journal;
439 }
440
441 return f;
442}
443
ea69bd41
LP
444static int do_rotate(
445 Server *s,
446 JournalFile **f,
447 const char* name,
448 bool seal,
449 uint32_t uid) {
450
fc55baee
ZJS
451 int r;
452 assert(s);
453
454 if (!*f)
455 return -EINVAL;
456
b58c888f 457 r = journal_file_rotate(f, s->compress, seal, s->deferred_closes);
fc55baee
ZJS
458 if (r < 0)
459 if (*f)
ea69bd41 460 log_error_errno(r, "Failed to rotate %s: %m", (*f)->path);
fc55baee 461 else
ea69bd41 462 log_error_errno(r, "Failed to create new %s journal: %m", name);
fc55baee 463 else
5c3bde3f 464 server_add_acls(*f, uid);
2678031a 465
fc55baee
ZJS
466 return r;
467}
468
d025f1e4
ZJS
469void server_rotate(Server *s) {
470 JournalFile *f;
471 void *k;
472 Iterator i;
473 int r;
474
475 log_debug("Rotating...");
476
8580d1f7
LP
477 (void) do_rotate(s, &s->runtime_journal, "runtime", false, 0);
478 (void) do_rotate(s, &s->system_journal, "system", s->seal, 0);
d025f1e4 479
43cf8388 480 ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
4a0b58c4 481 r = do_rotate(s, &f, "user", s->seal, PTR_TO_UID(k));
fc55baee 482 if (r >= 0)
43cf8388 483 ordered_hashmap_replace(s->user_journals, k, f);
fc55baee
ZJS
484 else if (!f)
485 /* Old file has been closed and deallocated */
43cf8388 486 ordered_hashmap_remove(s->user_journals, k);
d025f1e4 487 }
b58c888f
VC
488
489 /* Perform any deferred closes which aren't still offlining. */
490 SET_FOREACH(f, s->deferred_closes, i)
491 if (!journal_file_is_offlining(f)) {
492 (void) set_remove(s->deferred_closes, f);
493 (void) journal_file_close(f);
494 }
d025f1e4
ZJS
495}
496
26687bf8
OS
497void server_sync(Server *s) {
498 JournalFile *f;
26687bf8
OS
499 Iterator i;
500 int r;
501
26687bf8 502 if (s->system_journal) {
ac2e41f5 503 r = journal_file_set_offline(s->system_journal, false);
26687bf8 504 if (r < 0)
65089b82 505 log_warning_errno(r, "Failed to sync system journal, ignoring: %m");
26687bf8
OS
506 }
507
65c1d46b 508 ORDERED_HASHMAP_FOREACH(f, s->user_journals, i) {
ac2e41f5 509 r = journal_file_set_offline(f, false);
26687bf8 510 if (r < 0)
65089b82 511 log_warning_errno(r, "Failed to sync user journal, ignoring: %m");
26687bf8
OS
512 }
513
f9a810be
LP
514 if (s->sync_event_source) {
515 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
516 if (r < 0)
da927ba9 517 log_error_errno(r, "Failed to disable sync timer source: %m");
f9a810be 518 }
26687bf8
OS
519
520 s->sync_scheduled = false;
521}
522
3a19f215 523static void do_vacuum(Server *s, JournalStorage *storage, bool verbose) {
ea69bd41 524
63c8666b
ZJS
525 int r;
526
8580d1f7 527 assert(s);
266a4700 528 assert(storage);
8580d1f7 529
57f443a6 530 (void) cache_space_refresh(s, storage);
18e758bf
FB
531
532 if (verbose)
533 server_space_usage_message(s, storage);
8580d1f7 534
57f443a6
FB
535 r = journal_directory_vacuum(storage->path, storage->space.limit,
536 storage->metrics.n_max_files, s->max_retention_usec,
537 &s->oldest_file_usec, verbose);
63c8666b 538 if (r < 0 && r != -ENOENT)
266a4700
FB
539 log_warning_errno(r, "Failed to vacuum %s, ignoring: %m", storage->path);
540
a0edc477 541 cache_space_invalidate(&storage->space);
63c8666b
ZJS
542}
543
3a19f215 544int server_vacuum(Server *s, bool verbose) {
8580d1f7 545 assert(s);
d025f1e4
ZJS
546
547 log_debug("Vacuuming...");
548
549 s->oldest_file_usec = 0;
550
266a4700 551 if (s->system_journal)
3a19f215 552 do_vacuum(s, &s->system_storage, verbose);
266a4700 553 if (s->runtime_journal)
3a19f215 554 do_vacuum(s, &s->runtime_storage, verbose);
d025f1e4 555
8580d1f7 556 return 0;
d025f1e4
ZJS
557}
558
0c24bb23
LP
559static void server_cache_machine_id(Server *s) {
560 sd_id128_t id;
561 int r;
562
563 assert(s);
564
565 r = sd_id128_get_machine(&id);
566 if (r < 0)
567 return;
568
569 sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
570}
571
572static void server_cache_boot_id(Server *s) {
573 sd_id128_t id;
574 int r;
575
576 assert(s);
577
578 r = sd_id128_get_boot(&id);
579 if (r < 0)
580 return;
581
582 sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
583}
584
585static void server_cache_hostname(Server *s) {
586 _cleanup_free_ char *t = NULL;
587 char *x;
588
589 assert(s);
590
591 t = gethostname_malloc();
592 if (!t)
593 return;
594
595 x = strappend("_HOSTNAME=", t);
596 if (!x)
597 return;
598
599 free(s->hostname_field);
600 s->hostname_field = x;
601}
602
8531ae70 603static bool shall_try_append_again(JournalFile *f, int r) {
6e1045e5 604 switch(r) {
ae739cc1 605
6e1045e5
ZJS
606 case -E2BIG: /* Hit configured limit */
607 case -EFBIG: /* Hit fs limit */
608 case -EDQUOT: /* Quota limit hit */
609 case -ENOSPC: /* Disk full */
d025f1e4 610 log_debug("%s: Allocation limit reached, rotating.", f->path);
6e1045e5 611 return true;
ae739cc1 612
6e1045e5
ZJS
613 case -EIO: /* I/O error of some kind (mmap) */
614 log_warning("%s: IO error, rotating.", f->path);
615 return true;
ae739cc1 616
6e1045e5 617 case -EHOSTDOWN: /* Other machine */
d025f1e4 618 log_info("%s: Journal file from other machine, rotating.", f->path);
6e1045e5 619 return true;
ae739cc1 620
6e1045e5 621 case -EBUSY: /* Unclean shutdown */
d025f1e4 622 log_info("%s: Unclean shutdown, rotating.", f->path);
6e1045e5 623 return true;
ae739cc1 624
6e1045e5 625 case -EPROTONOSUPPORT: /* Unsupported feature */
d025f1e4 626 log_info("%s: Unsupported feature, rotating.", f->path);
6e1045e5 627 return true;
ae739cc1 628
6e1045e5
ZJS
629 case -EBADMSG: /* Corrupted */
630 case -ENODATA: /* Truncated */
631 case -ESHUTDOWN: /* Already archived */
d025f1e4 632 log_warning("%s: Journal file corrupted, rotating.", f->path);
6e1045e5 633 return true;
ae739cc1 634
6e1045e5 635 case -EIDRM: /* Journal file has been deleted */
2678031a 636 log_warning("%s: Journal file has been deleted, rotating.", f->path);
6e1045e5 637 return true;
ae739cc1
LP
638
639 case -ETXTBSY: /* Journal file is from the future */
c1a9199e 640 log_warning("%s: Journal file is from the future, rotating.", f->path);
ae739cc1
LP
641 return true;
642
6e1045e5 643 default:
d025f1e4 644 return false;
6e1045e5 645 }
d025f1e4
ZJS
646}
647
d07f7b9e 648static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
7c070017 649 bool vacuumed = false, rotate = false;
0f972d66 650 struct dual_timestamp ts;
d025f1e4 651 JournalFile *f;
d025f1e4
ZJS
652 int r;
653
654 assert(s);
655 assert(iovec);
656 assert(n > 0);
657
0f972d66
LP
658 /* Get the closest, linearized time we have for this log event from the event loop. (Note that we do not use
659 * the source time, and not even the time the event was originally seen, but instead simply the time we started
660 * processing it, as we want strictly linear ordering in what we write out.) */
661 assert_se(sd_event_now(s->event, CLOCK_REALTIME, &ts.realtime) >= 0);
662 assert_se(sd_event_now(s->event, CLOCK_MONOTONIC, &ts.monotonic) >= 0);
663
7c070017
LP
664 if (ts.realtime < s->last_realtime_clock) {
665 /* When the time jumps backwards, let's immediately rotate. Of course, this should not happen during
666 * regular operation. However, when it does happen, then we should make sure that we start fresh files
667 * to ensure that the entries in the journal files are strictly ordered by time, in order to ensure
668 * bisection works correctly. */
d025f1e4 669
7c070017
LP
670 log_debug("Time jumped backwards, rotating.");
671 rotate = true;
672 } else {
673
674 f = find_journal(s, uid);
675 if (!f)
676 return;
677
678 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
679 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
680 rotate = true;
681 }
682 }
d025f1e4 683
7c070017 684 if (rotate) {
d025f1e4 685 server_rotate(s);
3a19f215 686 server_vacuum(s, false);
d025f1e4
ZJS
687 vacuumed = true;
688
689 f = find_journal(s, uid);
690 if (!f)
691 return;
692 }
693
7c070017
LP
694 s->last_realtime_clock = ts.realtime;
695
0f972d66 696 r = journal_file_append_entry(f, &ts, iovec, n, &s->seqnum, NULL, NULL);
26687bf8 697 if (r >= 0) {
d07f7b9e 698 server_schedule_sync(s, priority);
d025f1e4 699 return;
26687bf8 700 }
d025f1e4
ZJS
701
702 if (vacuumed || !shall_try_append_again(f, r)) {
8266e1c0 703 log_error_errno(r, "Failed to write entry (%d items, %zu bytes), ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
d025f1e4
ZJS
704 return;
705 }
706
707 server_rotate(s);
3a19f215 708 server_vacuum(s, false);
d025f1e4
ZJS
709
710 f = find_journal(s, uid);
711 if (!f)
712 return;
713
714 log_debug("Retrying write.");
0f972d66 715 r = journal_file_append_entry(f, &ts, iovec, n, &s->seqnum, NULL, NULL);
8266e1c0
LP
716 if (r < 0)
717 log_error_errno(r, "Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
718 else
d07f7b9e 719 server_schedule_sync(s, priority);
d025f1e4
ZJS
720}
721
22e3a02b
LP
722#define IOVEC_ADD_NUMERIC_FIELD(iovec, n, value, type, isset, format, field) \
723 if (isset(value)) { \
724 char *k; \
725 k = newa(char, strlen(field "=") + DECIMAL_STR_MAX(type) + 1); \
726 sprintf(k, field "=" format, value); \
e6a7ec4b 727 iovec[n++] = IOVEC_MAKE_STRING(k); \
22e3a02b 728 }
4b58153d 729
22e3a02b
LP
730#define IOVEC_ADD_STRING_FIELD(iovec, n, value, field) \
731 if (!isempty(value)) { \
732 char *k; \
733 k = strjoina(field "=", value); \
e6a7ec4b 734 iovec[n++] = IOVEC_MAKE_STRING(k); \
22e3a02b 735 }
4b58153d 736
22e3a02b
LP
737#define IOVEC_ADD_ID128_FIELD(iovec, n, value, field) \
738 if (!sd_id128_is_null(value)) { \
739 char *k; \
740 k = newa(char, strlen(field "=") + SD_ID128_STRING_MAX); \
741 sd_id128_to_string(value, stpcpy(k, field "=")); \
e6a7ec4b 742 iovec[n++] = IOVEC_MAKE_STRING(k); \
22e3a02b 743 }
4b58153d 744
22e3a02b
LP
745#define IOVEC_ADD_SIZED_FIELD(iovec, n, value, value_size, field) \
746 if (value_size > 0) { \
747 char *k; \
748 k = newa(char, strlen(field "=") + value_size + 1); \
749 *((char*) mempcpy(stpcpy(k, field "="), value, value_size)) = 0; \
e6a7ec4b 750 iovec[n++] = IOVEC_MAKE_STRING(k); \
22e3a02b 751 } \
4b58153d 752
d025f1e4
ZJS
753static void dispatch_message_real(
754 Server *s,
755 struct iovec *iovec, unsigned n, unsigned m,
22e3a02b 756 const ClientContext *c,
3b3154df 757 const struct timeval *tv,
d07f7b9e 758 int priority,
22e3a02b
LP
759 pid_t object_pid) {
760
761 char source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)];
762 uid_t journal_uid;
763 ClientContext *o;
d025f1e4
ZJS
764
765 assert(s);
766 assert(iovec);
767 assert(n > 0);
22e3a02b 768 assert(n + N_IOVEC_META_FIELDS + (pid_is_valid(object_pid) ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
19cace37 769
22e3a02b
LP
770 if (c) {
771 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->pid, pid_t, pid_is_valid, PID_FMT, "_PID");
772 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->uid, uid_t, uid_is_valid, UID_FMT, "_UID");
773 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->gid, gid_t, gid_is_valid, GID_FMT, "_GID");
4b58153d 774
22e3a02b
LP
775 IOVEC_ADD_STRING_FIELD(iovec, n, c->comm, "_COMM");
776 IOVEC_ADD_STRING_FIELD(iovec, n, c->exe, "_EXE");
777 IOVEC_ADD_STRING_FIELD(iovec, n, c->cmdline, "_CMDLINE");
778 IOVEC_ADD_STRING_FIELD(iovec, n, c->capeff, "_CAP_EFFECTIVE");
d025f1e4 779
22e3a02b 780 IOVEC_ADD_SIZED_FIELD(iovec, n, c->label, c->label_size, "_SELINUX_CONTEXT");
ae018d9b 781
22e3a02b
LP
782 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->auditid, uint32_t, audit_session_is_valid, "%" PRIu32, "_AUDIT_SESSION");
783 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->loginuid, uid_t, uid_is_valid, UID_FMT, "_AUDIT_LOGINUID");
d025f1e4 784
22e3a02b
LP
785 IOVEC_ADD_STRING_FIELD(iovec, n, c->cgroup, "_SYSTEMD_CGROUP");
786 IOVEC_ADD_STRING_FIELD(iovec, n, c->session, "_SYSTEMD_SESSION");
787 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->owner_uid, uid_t, uid_is_valid, UID_FMT, "_SYSTEMD_OWNER_UID");
788 IOVEC_ADD_STRING_FIELD(iovec, n, c->unit, "_SYSTEMD_UNIT");
789 IOVEC_ADD_STRING_FIELD(iovec, n, c->user_unit, "_SYSTEMD_USER_UNIT");
790 IOVEC_ADD_STRING_FIELD(iovec, n, c->slice, "_SYSTEMD_SLICE");
791 IOVEC_ADD_STRING_FIELD(iovec, n, c->user_slice, "_SYSTEMD_USER_SLICE");
e7ff4e7f 792
22e3a02b 793 IOVEC_ADD_ID128_FIELD(iovec, n, c->invocation_id, "_SYSTEMD_INVOCATION_ID");
d025f1e4 794 }
968f3196 795
22e3a02b 796 assert(n <= m);
968f3196 797
22e3a02b 798 if (pid_is_valid(object_pid) && client_context_get(s, object_pid, NULL, NULL, 0, NULL, &o) >= 0) {
968f3196 799
22e3a02b
LP
800 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->pid, pid_t, pid_is_valid, PID_FMT, "OBJECT_PID");
801 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->uid, uid_t, uid_is_valid, UID_FMT, "OBJECT_UID");
802 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->gid, gid_t, gid_is_valid, GID_FMT, "OBJECT_GID");
968f3196 803
22e3a02b
LP
804 IOVEC_ADD_STRING_FIELD(iovec, n, o->comm, "OBJECT_COMM");
805 IOVEC_ADD_STRING_FIELD(iovec, n, o->exe, "OBJECT_EXE");
806 IOVEC_ADD_STRING_FIELD(iovec, n, o->cmdline, "OBJECT_CMDLINE");
807 IOVEC_ADD_STRING_FIELD(iovec, n, o->capeff, "OBJECT_CAP_EFFECTIVE");
968f3196 808
22e3a02b 809 IOVEC_ADD_SIZED_FIELD(iovec, n, o->label, o->label_size, "OBJECT_SELINUX_CONTEXT");
19cace37 810
22e3a02b
LP
811 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->auditid, uint32_t, audit_session_is_valid, "%" PRIu32, "OBJECT_AUDIT_SESSION");
812 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->loginuid, uid_t, uid_is_valid, UID_FMT, "OBJECT_AUDIT_LOGINUID");
d473176a 813
22e3a02b
LP
814 IOVEC_ADD_STRING_FIELD(iovec, n, o->cgroup, "OBJECT_SYSTEMD_CGROUP");
815 IOVEC_ADD_STRING_FIELD(iovec, n, o->session, "OBJECT_SYSTEMD_SESSION");
816 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->owner_uid, uid_t, uid_is_valid, UID_FMT, "OBJECT_SYSTEMD_OWNER_UID");
817 IOVEC_ADD_STRING_FIELD(iovec, n, o->unit, "OBJECT_SYSTEMD_UNIT");
818 IOVEC_ADD_STRING_FIELD(iovec, n, o->user_unit, "OBJECT_SYSTEMD_USER_UNIT");
819 IOVEC_ADD_STRING_FIELD(iovec, n, o->slice, "OBJECT_SYSTEMD_SLICE");
820 IOVEC_ADD_STRING_FIELD(iovec, n, o->user_slice, "OBJECT_SYSTEMD_USER_SLICE");
d473176a 821
22e3a02b 822 IOVEC_ADD_ID128_FIELD(iovec, n, o->invocation_id, "OBJECT_SYSTEMD_INVOCATION_ID=");
968f3196 823 }
22e3a02b 824
968f3196 825 assert(n <= m);
d025f1e4
ZJS
826
827 if (tv) {
398a50cd 828 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=" USEC_FMT, timeval_load(tv));
e6a7ec4b 829 iovec[n++] = IOVEC_MAKE_STRING(source_time);
d025f1e4
ZJS
830 }
831
832 /* Note that strictly speaking storing the boot id here is
833 * redundant since the entry includes this in-line
834 * anyway. However, we need this indexed, too. */
0c24bb23 835 if (!isempty(s->boot_id_field))
e6a7ec4b 836 iovec[n++] = IOVEC_MAKE_STRING(s->boot_id_field);
d025f1e4 837
0c24bb23 838 if (!isempty(s->machine_id_field))
e6a7ec4b 839 iovec[n++] = IOVEC_MAKE_STRING(s->machine_id_field);
d025f1e4 840
0c24bb23 841 if (!isempty(s->hostname_field))
e6a7ec4b 842 iovec[n++] = IOVEC_MAKE_STRING(s->hostname_field);
d025f1e4
ZJS
843
844 assert(n <= m);
845
22e3a02b
LP
846 if (s->split_mode == SPLIT_UID && c && uid_is_valid(c->uid))
847 /* Split up strictly by (non-root) UID */
848 journal_uid = c->uid;
849 else if (s->split_mode == SPLIT_LOGIN && c && c->uid > 0 && uid_is_valid(c->owner_uid))
edc3797f
LP
850 /* Split up by login UIDs. We do this only if the
851 * realuid is not root, in order not to accidentally
852 * leak privileged information to the user that is
853 * logged by a privileged process that is part of an
7517e174 854 * unprivileged session. */
22e3a02b 855 journal_uid = c->owner_uid;
da499392
KS
856 else
857 journal_uid = 0;
759c945a 858
d07f7b9e 859 write_to_journal(s, journal_uid, iovec, n, priority);
d025f1e4
ZJS
860}
861
2b044526 862void server_driver_message(Server *s, const char *message_id, const char *format, ...) {
22e3a02b 863
8a03c9ef
ZJS
864 struct iovec iovec[N_IOVEC_META_FIELDS + 5 + N_IOVEC_PAYLOAD_FIELDS];
865 unsigned n = 0, m;
d025f1e4 866 va_list ap;
22e3a02b 867 int r;
d025f1e4
ZJS
868
869 assert(s);
870 assert(format);
871
4850d39a 872 assert_cc(3 == LOG_FAC(LOG_DAEMON));
e6a7ec4b
LP
873 iovec[n++] = IOVEC_MAKE_STRING("SYSLOG_FACILITY=3");
874 iovec[n++] = IOVEC_MAKE_STRING("SYSLOG_IDENTIFIER=systemd-journald");
b6fa2555 875
e6a7ec4b 876 iovec[n++] = IOVEC_MAKE_STRING("_TRANSPORT=driver");
4850d39a 877 assert_cc(6 == LOG_INFO);
e6a7ec4b 878 iovec[n++] = IOVEC_MAKE_STRING("PRIORITY=6");
d025f1e4 879
2b044526 880 if (message_id)
e6a7ec4b 881 iovec[n++] = IOVEC_MAKE_STRING(message_id);
8a03c9ef
ZJS
882 m = n;
883
884 va_start(ap, format);
32917e33
ZJS
885 r = log_format_iovec(iovec, ELEMENTSOF(iovec), &n, false, 0, format, ap);
886 /* Error handling below */
8a03c9ef
ZJS
887 va_end(ap);
888
32917e33 889 if (r >= 0)
22e3a02b 890 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), s->my_context, NULL, LOG_INFO, 0);
8a03c9ef
ZJS
891
892 while (m < n)
893 free(iovec[m++].iov_base);
32917e33
ZJS
894
895 if (r < 0) {
896 /* We failed to format the message. Emit a warning instead. */
897 char buf[LINE_MAX];
898
899 xsprintf(buf, "MESSAGE=Entry printing failed: %s", strerror(-r));
900
901 n = 3;
e6a7ec4b
LP
902 iovec[n++] = IOVEC_MAKE_STRING("PRIORITY=4");
903 iovec[n++] = IOVEC_MAKE_STRING(buf);
22e3a02b 904 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), s->my_context, NULL, LOG_INFO, 0);
32917e33 905 }
d025f1e4
ZJS
906}
907
908void server_dispatch_message(
909 Server *s,
910 struct iovec *iovec, unsigned n, unsigned m,
22e3a02b 911 ClientContext *c,
3b3154df 912 const struct timeval *tv,
968f3196
ZJS
913 int priority,
914 pid_t object_pid) {
d025f1e4 915
8580d1f7 916 uint64_t available = 0;
22e3a02b 917 int rl;
d025f1e4
ZJS
918
919 assert(s);
920 assert(iovec || n == 0);
921
922 if (n == 0)
923 return;
924
925 if (LOG_PRI(priority) > s->max_level_store)
926 return;
927
2f5df74a
HHPF
928 /* Stop early in case the information will not be stored
929 * in a journal. */
930 if (s->storage == STORAGE_NONE)
931 return;
932
22e3a02b
LP
933 if (c && c->unit) {
934 (void) determine_space(s, &available, NULL);
d025f1e4 935
22e3a02b
LP
936 rl = journal_rate_limit_test(s->rate_limit, c->unit, priority & LOG_PRIMASK, available);
937 if (rl == 0)
938 return;
d025f1e4 939
22e3a02b
LP
940 /* Write a suppression message if we suppressed something */
941 if (rl > 1)
942 server_driver_message(s, "MESSAGE_ID=" SD_MESSAGE_JOURNAL_DROPPED_STR,
943 LOG_MESSAGE("Suppressed %u messages from %s", rl - 1, c->unit),
944 NULL);
d025f1e4
ZJS
945 }
946
22e3a02b 947 dispatch_message_real(s, iovec, n, m, c, tv, priority, object_pid);
d025f1e4
ZJS
948}
949
f78273c8 950int server_flush_to_var(Server *s, bool require_flag_file) {
d025f1e4
ZJS
951 sd_id128_t machine;
952 sd_journal *j = NULL;
fbb63411
LP
953 char ts[FORMAT_TIMESPAN_MAX];
954 usec_t start;
955 unsigned n = 0;
956 int r;
d025f1e4
ZJS
957
958 assert(s);
959
f78273c8 960 if (!IN_SET(s->storage, STORAGE_AUTO, STORAGE_PERSISTENT))
d025f1e4
ZJS
961 return 0;
962
963 if (!s->runtime_journal)
964 return 0;
965
f78273c8
LP
966 if (require_flag_file && !flushed_flag_is_set())
967 return 0;
968
8580d1f7 969 (void) system_journal_open(s, true);
d025f1e4
ZJS
970
971 if (!s->system_journal)
972 return 0;
973
974 log_debug("Flushing to /var...");
975
fbb63411
LP
976 start = now(CLOCK_MONOTONIC);
977
d025f1e4 978 r = sd_id128_get_machine(&machine);
00a16861 979 if (r < 0)
d025f1e4 980 return r;
d025f1e4
ZJS
981
982 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
23bbb0de
MS
983 if (r < 0)
984 return log_error_errno(r, "Failed to read runtime journal: %m");
d025f1e4 985
93b73b06
LP
986 sd_journal_set_data_threshold(j, 0);
987
d025f1e4
ZJS
988 SD_JOURNAL_FOREACH(j) {
989 Object *o = NULL;
990 JournalFile *f;
991
992 f = j->current_file;
993 assert(f && f->current_offset > 0);
994
fbb63411
LP
995 n++;
996
d025f1e4
ZJS
997 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
998 if (r < 0) {
da927ba9 999 log_error_errno(r, "Can't read entry: %m");
d025f1e4
ZJS
1000 goto finish;
1001 }
1002
1003 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1004 if (r >= 0)
1005 continue;
1006
1007 if (!shall_try_append_again(s->system_journal, r)) {
da927ba9 1008 log_error_errno(r, "Can't write entry: %m");
d025f1e4
ZJS
1009 goto finish;
1010 }
1011
1012 server_rotate(s);
3a19f215 1013 server_vacuum(s, false);
d025f1e4 1014
253f59df
LP
1015 if (!s->system_journal) {
1016 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1017 r = -EIO;
1018 goto finish;
1019 }
1020
d025f1e4
ZJS
1021 log_debug("Retrying write.");
1022 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1023 if (r < 0) {
da927ba9 1024 log_error_errno(r, "Can't write entry: %m");
d025f1e4
ZJS
1025 goto finish;
1026 }
1027 }
1028
804ae586
LP
1029 r = 0;
1030
d025f1e4
ZJS
1031finish:
1032 journal_file_post_change(s->system_journal);
1033
804ae586 1034 s->runtime_journal = journal_file_close(s->runtime_journal);
d025f1e4
ZJS
1035
1036 if (r >= 0)
c6878637 1037 (void) rm_rf("/run/log/journal", REMOVE_ROOT);
d025f1e4 1038
763c7aa2 1039 sd_journal_close(j);
d025f1e4 1040
2b044526 1041 server_driver_message(s, NULL,
8a03c9ef
ZJS
1042 LOG_MESSAGE("Time spent on flushing to /var is %s for %u entries.",
1043 format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0),
1044 n),
1045 NULL);
fbb63411 1046
d025f1e4
ZJS
1047 return r;
1048}
1049
8531ae70 1050int server_process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
f9a810be 1051 Server *s = userdata;
a315ac4e
LP
1052 struct ucred *ucred = NULL;
1053 struct timeval *tv = NULL;
1054 struct cmsghdr *cmsg;
1055 char *label = NULL;
1056 size_t label_len = 0, m;
1057 struct iovec iovec;
1058 ssize_t n;
1059 int *fds = NULL, v = 0;
1060 unsigned n_fds = 0;
1061
1062 union {
1063 struct cmsghdr cmsghdr;
1064
1065 /* We use NAME_MAX space for the SELinux label
1066 * here. The kernel currently enforces no
1067 * limit, but according to suggestions from
1068 * the SELinux people this will change and it
1069 * will probably be identical to NAME_MAX. For
1070 * now we use that, but this should be updated
1071 * one day when the final limit is known. */
1072 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1073 CMSG_SPACE(sizeof(struct timeval)) +
1074 CMSG_SPACE(sizeof(int)) + /* fd */
1075 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1076 } control = {};
1077
1078 union sockaddr_union sa = {};
1079
1080 struct msghdr msghdr = {
1081 .msg_iov = &iovec,
1082 .msg_iovlen = 1,
1083 .msg_control = &control,
1084 .msg_controllen = sizeof(control),
1085 .msg_name = &sa,
1086 .msg_namelen = sizeof(sa),
1087 };
f9a810be 1088
d025f1e4 1089 assert(s);
875c2e22 1090 assert(fd == s->native_fd || fd == s->syslog_fd || fd == s->audit_fd);
f9a810be
LP
1091
1092 if (revents != EPOLLIN) {
1093 log_error("Got invalid event from epoll for datagram fd: %"PRIx32, revents);
1094 return -EIO;
1095 }
1096
22e3a02b
LP
1097 /* Try to get the right size, if we can. (Not all sockets support SIOCINQ, hence we just try, but don't rely on
1098 * it.) */
a315ac4e 1099 (void) ioctl(fd, SIOCINQ, &v);
d025f1e4 1100
a315ac4e
LP
1101 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1102 m = PAGE_ALIGN(MAX3((size_t) v + 1,
1103 (size_t) LINE_MAX,
1104 ALIGN(sizeof(struct nlmsghdr)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH)) + 1);
d025f1e4 1105
a315ac4e
LP
1106 if (!GREEDY_REALLOC(s->buffer, s->buffer_size, m))
1107 return log_oom();
875c2e22 1108
a315ac4e
LP
1109 iovec.iov_base = s->buffer;
1110 iovec.iov_len = s->buffer_size - 1; /* Leave room for trailing NUL we add later */
d025f1e4 1111
a315ac4e
LP
1112 n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1113 if (n < 0) {
3742095b 1114 if (IN_SET(errno, EINTR, EAGAIN))
a315ac4e 1115 return 0;
875c2e22 1116
a315ac4e
LP
1117 return log_error_errno(errno, "recvmsg() failed: %m");
1118 }
875c2e22 1119
a315ac4e
LP
1120 CMSG_FOREACH(cmsg, &msghdr) {
1121
1122 if (cmsg->cmsg_level == SOL_SOCKET &&
1123 cmsg->cmsg_type == SCM_CREDENTIALS &&
1124 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1125 ucred = (struct ucred*) CMSG_DATA(cmsg);
1126 else if (cmsg->cmsg_level == SOL_SOCKET &&
1127 cmsg->cmsg_type == SCM_SECURITY) {
1128 label = (char*) CMSG_DATA(cmsg);
1129 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1130 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1131 cmsg->cmsg_type == SO_TIMESTAMP &&
1132 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1133 tv = (struct timeval*) CMSG_DATA(cmsg);
1134 else if (cmsg->cmsg_level == SOL_SOCKET &&
1135 cmsg->cmsg_type == SCM_RIGHTS) {
1136 fds = (int*) CMSG_DATA(cmsg);
1137 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
d025f1e4 1138 }
a315ac4e 1139 }
d025f1e4 1140
a315ac4e
LP
1141 /* And a trailing NUL, just in case */
1142 s->buffer[n] = 0;
1143
1144 if (fd == s->syslog_fd) {
1145 if (n > 0 && n_fds == 0)
1146 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1147 else if (n_fds > 0)
1148 log_warning("Got file descriptors via syslog socket. Ignoring.");
1149
1150 } else if (fd == s->native_fd) {
1151 if (n > 0 && n_fds == 0)
1152 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1153 else if (n == 0 && n_fds == 1)
1154 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1155 else if (n_fds > 0)
1156 log_warning("Got too many file descriptors via native socket. Ignoring.");
1157
1158 } else {
1159 assert(fd == s->audit_fd);
1160
1161 if (n > 0 && n_fds == 0)
1162 server_process_audit_message(s, s->buffer, n, ucred, &sa, msghdr.msg_namelen);
1163 else if (n_fds > 0)
1164 log_warning("Got file descriptors via audit socket. Ignoring.");
f9a810be 1165 }
a315ac4e
LP
1166
1167 close_many(fds, n_fds);
1168 return 0;
f9a810be 1169}
d025f1e4 1170
f9a810be
LP
1171static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1172 Server *s = userdata;
33d52ab9 1173 int r;
d025f1e4 1174
f9a810be 1175 assert(s);
d025f1e4 1176
94b65516 1177 log_info("Received request to flush runtime journal from PID " PID_FMT, si->ssi_pid);
d025f1e4 1178
f78273c8 1179 (void) server_flush_to_var(s, false);
f9a810be 1180 server_sync(s);
3a19f215 1181 server_vacuum(s, false);
d025f1e4 1182
33d52ab9
LP
1183 r = touch("/run/systemd/journal/flushed");
1184 if (r < 0)
1185 log_warning_errno(r, "Failed to touch /run/systemd/journal/flushed, ignoring: %m");
74055aa7 1186
18e758bf 1187 server_space_usage_message(s, NULL);
f9a810be
LP
1188 return 0;
1189}
d025f1e4 1190
f9a810be
LP
1191static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1192 Server *s = userdata;
33d52ab9 1193 int r;
d025f1e4 1194
f9a810be 1195 assert(s);
d025f1e4 1196
94b65516 1197 log_info("Received request to rotate journal from PID " PID_FMT, si->ssi_pid);
f9a810be 1198 server_rotate(s);
3a19f215
FB
1199 server_vacuum(s, true);
1200
1201 if (s->system_journal)
1202 patch_min_use(&s->system_storage);
1203 if (s->runtime_journal)
1204 patch_min_use(&s->runtime_storage);
d025f1e4 1205
dbd6e31c 1206 /* Let clients know when the most recent rotation happened. */
33d52ab9
LP
1207 r = write_timestamp_file_atomic("/run/systemd/journal/rotated", now(CLOCK_MONOTONIC));
1208 if (r < 0)
1209 log_warning_errno(r, "Failed to write /run/systemd/journal/rotated, ignoring: %m");
dbd6e31c 1210
f9a810be
LP
1211 return 0;
1212}
d025f1e4 1213
f9a810be
LP
1214static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1215 Server *s = userdata;
d025f1e4 1216
f9a810be 1217 assert(s);
d025f1e4 1218
4daf54a8 1219 log_received_signal(LOG_INFO, si);
d025f1e4 1220
6203e07a 1221 sd_event_exit(s->event, 0);
d025f1e4
ZJS
1222 return 0;
1223}
1224
94b65516
LP
1225static int dispatch_sigrtmin1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1226 Server *s = userdata;
33d52ab9 1227 int r;
94b65516
LP
1228
1229 assert(s);
1230
1231 log_debug("Received request to sync from PID " PID_FMT, si->ssi_pid);
1232
1233 server_sync(s);
1234
1235 /* Let clients know when the most recent sync happened. */
33d52ab9
LP
1236 r = write_timestamp_file_atomic("/run/systemd/journal/synced", now(CLOCK_MONOTONIC));
1237 if (r < 0)
1238 log_warning_errno(r, "Failed to write /run/systemd/journal/synced, ignoring: %m");
94b65516
LP
1239
1240 return 0;
1241}
1242
f9a810be 1243static int setup_signals(Server *s) {
f9a810be 1244 int r;
d025f1e4
ZJS
1245
1246 assert(s);
1247
9bab3b65 1248 assert_se(sigprocmask_many(SIG_SETMASK, NULL, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, SIGRTMIN+1, -1) >= 0);
d025f1e4 1249
151b9b96 1250 r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
f9a810be
LP
1251 if (r < 0)
1252 return r;
1253
151b9b96 1254 r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s);
f9a810be
LP
1255 if (r < 0)
1256 return r;
d025f1e4 1257
151b9b96 1258 r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s);
f9a810be
LP
1259 if (r < 0)
1260 return r;
d025f1e4 1261
b374689c
LP
1262 /* Let's process SIGTERM late, so that we flush all queued
1263 * messages to disk before we exit */
1264 r = sd_event_source_set_priority(s->sigterm_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1265 if (r < 0)
1266 return r;
1267
1268 /* When journald is invoked on the terminal (when debugging),
1269 * it's useful if C-c is handled equivalent to SIGTERM. */
151b9b96 1270 r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s);
f9a810be
LP
1271 if (r < 0)
1272 return r;
d025f1e4 1273
b374689c
LP
1274 r = sd_event_source_set_priority(s->sigint_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1275 if (r < 0)
1276 return r;
1277
94b65516
LP
1278 /* SIGRTMIN+1 causes an immediate sync. We process this very
1279 * late, so that everything else queued at this point is
1280 * really written to disk. Clients can watch
1281 * /run/systemd/journal/synced with inotify until its mtime
1282 * changes to see when a sync happened. */
1283 r = sd_event_add_signal(s->event, &s->sigrtmin1_event_source, SIGRTMIN+1, dispatch_sigrtmin1, s);
1284 if (r < 0)
1285 return r;
1286
1287 r = sd_event_source_set_priority(s->sigrtmin1_event_source, SD_EVENT_PRIORITY_NORMAL+15);
1288 if (r < 0)
1289 return r;
1290
d025f1e4
ZJS
1291 return 0;
1292}
1293
5707ecf3
ZJS
1294static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
1295 Server *s = data;
74df0fca 1296 int r;
d025f1e4 1297
5707ecf3 1298 assert(s);
d025f1e4 1299
1d84ad94
LP
1300 if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_syslog")) {
1301
5707ecf3 1302 r = value ? parse_boolean(value) : true;
d581d9d9 1303 if (r < 0)
5707ecf3
ZJS
1304 log_warning("Failed to parse forward to syslog switch \"%s\". Ignoring.", value);
1305 else
1306 s->forward_to_syslog = r;
1d84ad94
LP
1307
1308 } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_kmsg")) {
1309
5707ecf3
ZJS
1310 r = value ? parse_boolean(value) : true;
1311 if (r < 0)
1312 log_warning("Failed to parse forward to kmsg switch \"%s\". Ignoring.", value);
1313 else
1314 s->forward_to_kmsg = r;
1d84ad94
LP
1315
1316 } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_console")) {
1317
5707ecf3
ZJS
1318 r = value ? parse_boolean(value) : true;
1319 if (r < 0)
1320 log_warning("Failed to parse forward to console switch \"%s\". Ignoring.", value);
1321 else
1322 s->forward_to_console = r;
1d84ad94
LP
1323
1324 } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_wall")) {
1325
5707ecf3
ZJS
1326 r = value ? parse_boolean(value) : true;
1327 if (r < 0)
1328 log_warning("Failed to parse forward to wall switch \"%s\". Ignoring.", value);
1329 else
1330 s->forward_to_wall = r;
1d84ad94
LP
1331
1332 } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_console")) {
1333
1334 if (proc_cmdline_value_missing(key, value))
1335 return 0;
1336
5707ecf3
ZJS
1337 r = log_level_from_string(value);
1338 if (r < 0)
1339 log_warning("Failed to parse max level console value \"%s\". Ignoring.", value);
1340 else
1341 s->max_level_console = r;
1d84ad94
LP
1342
1343 } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_store")) {
1344
1345 if (proc_cmdline_value_missing(key, value))
1346 return 0;
1347
5707ecf3
ZJS
1348 r = log_level_from_string(value);
1349 if (r < 0)
1350 log_warning("Failed to parse max level store value \"%s\". Ignoring.", value);
1351 else
1352 s->max_level_store = r;
1d84ad94
LP
1353
1354 } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_syslog")) {
1355
1356 if (proc_cmdline_value_missing(key, value))
1357 return 0;
1358
5707ecf3
ZJS
1359 r = log_level_from_string(value);
1360 if (r < 0)
1361 log_warning("Failed to parse max level syslog value \"%s\". Ignoring.", value);
1362 else
1363 s->max_level_syslog = r;
1d84ad94
LP
1364
1365 } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_kmsg")) {
1366
1367 if (proc_cmdline_value_missing(key, value))
1368 return 0;
1369
5707ecf3
ZJS
1370 r = log_level_from_string(value);
1371 if (r < 0)
1372 log_warning("Failed to parse max level kmsg value \"%s\". Ignoring.", value);
1373 else
1374 s->max_level_kmsg = r;
1d84ad94
LP
1375
1376 } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_wall")) {
1377
1378 if (proc_cmdline_value_missing(key, value))
1379 return 0;
1380
5707ecf3
ZJS
1381 r = log_level_from_string(value);
1382 if (r < 0)
1383 log_warning("Failed to parse max level wall value \"%s\". Ignoring.", value);
1384 else
1385 s->max_level_wall = r;
1d84ad94 1386
5707ecf3
ZJS
1387 } else if (startswith(key, "systemd.journald"))
1388 log_warning("Unknown journald kernel command line option \"%s\". Ignoring.", key);
d025f1e4 1389
804ae586 1390 /* do not warn about state here, since probably systemd already did */
db91ea32 1391 return 0;
d025f1e4
ZJS
1392}
1393
1394static int server_parse_config_file(Server *s) {
d025f1e4
ZJS
1395 assert(s);
1396
43688c49 1397 return config_parse_many_nulstr(PKGSYSCONFDIR "/journald.conf",
da412854
YW
1398 CONF_PATHS_NULSTR("systemd/journald.conf.d"),
1399 "Journal\0",
1400 config_item_perf_lookup, journald_gperf_lookup,
1401 false, s);
d025f1e4
ZJS
1402}
1403
f9a810be
LP
1404static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
1405 Server *s = userdata;
26687bf8
OS
1406
1407 assert(s);
1408
f9a810be 1409 server_sync(s);
26687bf8
OS
1410 return 0;
1411}
1412
d07f7b9e 1413int server_schedule_sync(Server *s, int priority) {
26687bf8
OS
1414 int r;
1415
26687bf8
OS
1416 assert(s);
1417
d07f7b9e
LP
1418 if (priority <= LOG_CRIT) {
1419 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1420 server_sync(s);
1421 return 0;
1422 }
1423
26687bf8
OS
1424 if (s->sync_scheduled)
1425 return 0;
1426
f9a810be
LP
1427 if (s->sync_interval_usec > 0) {
1428 usec_t when;
ca267016 1429
6a0f1f6d 1430 r = sd_event_now(s->event, CLOCK_MONOTONIC, &when);
f9a810be
LP
1431 if (r < 0)
1432 return r;
26687bf8 1433
f9a810be
LP
1434 when += s->sync_interval_usec;
1435
1436 if (!s->sync_event_source) {
6a0f1f6d
LP
1437 r = sd_event_add_time(
1438 s->event,
1439 &s->sync_event_source,
1440 CLOCK_MONOTONIC,
1441 when, 0,
1442 server_dispatch_sync, s);
f9a810be
LP
1443 if (r < 0)
1444 return r;
1445
1446 r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
1447 } else {
1448 r = sd_event_source_set_time(s->sync_event_source, when);
1449 if (r < 0)
1450 return r;
1451
1452 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
1453 }
26687bf8 1454 if (r < 0)
f9a810be 1455 return r;
26687bf8 1456
f9a810be
LP
1457 s->sync_scheduled = true;
1458 }
26687bf8
OS
1459
1460 return 0;
1461}
1462
0c24bb23
LP
1463static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1464 Server *s = userdata;
1465
1466 assert(s);
1467
1468 server_cache_hostname(s);
1469 return 0;
1470}
1471
1472static int server_open_hostname(Server *s) {
1473 int r;
1474
1475 assert(s);
1476
1477 s->hostname_fd = open("/proc/sys/kernel/hostname", O_RDONLY|O_CLOEXEC|O_NDELAY|O_NOCTTY);
4a62c710
MS
1478 if (s->hostname_fd < 0)
1479 return log_error_errno(errno, "Failed to open /proc/sys/kernel/hostname: %m");
0c24bb23 1480
151b9b96 1481 r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s);
0c24bb23 1482 if (r < 0) {
28def94c
DR
1483 /* kernels prior to 3.2 don't support polling this file. Ignore
1484 * the failure. */
1485 if (r == -EPERM) {
e53fc357 1486 log_warning_errno(r, "Failed to register hostname fd in event loop, ignoring: %m");
03e334a1 1487 s->hostname_fd = safe_close(s->hostname_fd);
28def94c
DR
1488 return 0;
1489 }
1490
23bbb0de 1491 return log_error_errno(r, "Failed to register hostname fd in event loop: %m");
0c24bb23
LP
1492 }
1493
1494 r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
23bbb0de
MS
1495 if (r < 0)
1496 return log_error_errno(r, "Failed to adjust priority of host name event source: %m");
0c24bb23
LP
1497
1498 return 0;
1499}
1500
e22aa3d3
LP
1501static int dispatch_notify_event(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1502 Server *s = userdata;
1503 int r;
1504
1505 assert(s);
1506 assert(s->notify_event_source == es);
1507 assert(s->notify_fd == fd);
1508
e22aa3d3 1509 /* The $NOTIFY_SOCKET is writable again, now send exactly one
dd835265 1510 * message on it. Either it's the watchdog event, the initial
119e9655
LP
1511 * READY=1 event or an stdout stream event. If there's nothing
1512 * to write anymore, turn our event source off. The next time
1513 * there's something to send it will be turned on again. */
e22aa3d3
LP
1514
1515 if (!s->sent_notify_ready) {
1516 static const char p[] =
1517 "READY=1\n"
1518 "STATUS=Processing requests...";
1519 ssize_t l;
1520
1521 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1522 if (l < 0) {
1523 if (errno == EAGAIN)
1524 return 0;
1525
1526 return log_error_errno(errno, "Failed to send READY=1 notification message: %m");
1527 }
1528
1529 s->sent_notify_ready = true;
1530 log_debug("Sent READY=1 notification.");
1531
119e9655
LP
1532 } else if (s->send_watchdog) {
1533
1534 static const char p[] =
1535 "WATCHDOG=1";
1536
1537 ssize_t l;
1538
1539 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1540 if (l < 0) {
1541 if (errno == EAGAIN)
1542 return 0;
1543
1544 return log_error_errno(errno, "Failed to send WATCHDOG=1 notification message: %m");
1545 }
1546
1547 s->send_watchdog = false;
1548 log_debug("Sent WATCHDOG=1 notification.");
1549
e22aa3d3
LP
1550 } else if (s->stdout_streams_notify_queue)
1551 /* Dispatch one stream notification event */
1552 stdout_stream_send_notify(s->stdout_streams_notify_queue);
1553
61233823 1554 /* Leave us enabled if there's still more to do. */
119e9655 1555 if (s->send_watchdog || s->stdout_streams_notify_queue)
e22aa3d3
LP
1556 return 0;
1557
1558 /* There was nothing to do anymore, let's turn ourselves off. */
1559 r = sd_event_source_set_enabled(es, SD_EVENT_OFF);
1560 if (r < 0)
1561 return log_error_errno(r, "Failed to turn off notify event source: %m");
1562
1563 return 0;
1564}
1565
119e9655
LP
1566static int dispatch_watchdog(sd_event_source *es, uint64_t usec, void *userdata) {
1567 Server *s = userdata;
1568 int r;
1569
1570 assert(s);
1571
1572 s->send_watchdog = true;
1573
1574 r = sd_event_source_set_enabled(s->notify_event_source, SD_EVENT_ON);
1575 if (r < 0)
1576 log_warning_errno(r, "Failed to turn on notify event source: %m");
1577
1578 r = sd_event_source_set_time(s->watchdog_event_source, usec + s->watchdog_usec / 2);
1579 if (r < 0)
1580 return log_error_errno(r, "Failed to restart watchdog event source: %m");
1581
1582 r = sd_event_source_set_enabled(s->watchdog_event_source, SD_EVENT_ON);
1583 if (r < 0)
1584 return log_error_errno(r, "Failed to enable watchdog event source: %m");
1585
1586 return 0;
1587}
1588
e22aa3d3
LP
1589static int server_connect_notify(Server *s) {
1590 union sockaddr_union sa = {
1591 .un.sun_family = AF_UNIX,
1592 };
1593 const char *e;
1594 int r;
1595
1596 assert(s);
1597 assert(s->notify_fd < 0);
1598 assert(!s->notify_event_source);
1599
1600 /*
1601 So here's the problem: we'd like to send notification
1602 messages to PID 1, but we cannot do that via sd_notify(),
1603 since that's synchronous, and we might end up blocking on
1604 it. Specifically: given that PID 1 might block on
1605 dbus-daemon during IPC, and dbus-daemon is logging to us,
1606 and might hence block on us, we might end up in a deadlock
ccddd104 1607 if we block on sending PID 1 notification messages — by
e22aa3d3
LP
1608 generating a full blocking circle. To avoid this, let's
1609 create a non-blocking socket, and connect it to the
1610 notification socket, and then wait for POLLOUT before we
1611 send anything. This should efficiently avoid any deadlocks,
1612 as we'll never block on PID 1, hence PID 1 can safely block
1613 on dbus-daemon which can safely block on us again.
1614
1615 Don't think that this issue is real? It is, see:
1616 https://github.com/systemd/systemd/issues/1505
1617 */
1618
1619 e = getenv("NOTIFY_SOCKET");
1620 if (!e)
1621 return 0;
1622
1623 if ((e[0] != '@' && e[0] != '/') || e[1] == 0) {
1624 log_error("NOTIFY_SOCKET set to an invalid value: %s", e);
1625 return -EINVAL;
1626 }
1627
1628 if (strlen(e) > sizeof(sa.un.sun_path)) {
1629 log_error("NOTIFY_SOCKET path too long: %s", e);
1630 return -EINVAL;
1631 }
1632
1633 s->notify_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
1634 if (s->notify_fd < 0)
1635 return log_error_errno(errno, "Failed to create notify socket: %m");
1636
1637 (void) fd_inc_sndbuf(s->notify_fd, NOTIFY_SNDBUF_SIZE);
1638
1639 strncpy(sa.un.sun_path, e, sizeof(sa.un.sun_path));
1640 if (sa.un.sun_path[0] == '@')
1641 sa.un.sun_path[0] = 0;
1642
fc2fffe7 1643 r = connect(s->notify_fd, &sa.sa, SOCKADDR_UN_LEN(sa.un));
e22aa3d3
LP
1644 if (r < 0)
1645 return log_error_errno(errno, "Failed to connect to notify socket: %m");
1646
1647 r = sd_event_add_io(s->event, &s->notify_event_source, s->notify_fd, EPOLLOUT, dispatch_notify_event, s);
1648 if (r < 0)
1649 return log_error_errno(r, "Failed to watch notification socket: %m");
1650
119e9655
LP
1651 if (sd_watchdog_enabled(false, &s->watchdog_usec) > 0) {
1652 s->send_watchdog = true;
1653
4de2402b 1654 r = sd_event_add_time(s->event, &s->watchdog_event_source, CLOCK_MONOTONIC, now(CLOCK_MONOTONIC) + s->watchdog_usec/2, s->watchdog_usec/4, dispatch_watchdog, s);
119e9655
LP
1655 if (r < 0)
1656 return log_error_errno(r, "Failed to add watchdog time event: %m");
1657 }
1658
e22aa3d3
LP
1659 /* This should fire pretty soon, which we'll use to send the
1660 * READY=1 event. */
1661
1662 return 0;
1663}
1664
d025f1e4 1665int server_init(Server *s) {
13790add 1666 _cleanup_fdset_free_ FDSet *fds = NULL;
d025f1e4 1667 int n, r, fd;
7d18d348 1668 bool no_sockets;
d025f1e4
ZJS
1669
1670 assert(s);
1671
1672 zero(*s);
e22aa3d3 1673 s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->audit_fd = s->hostname_fd = s->notify_fd = -1;
d025f1e4
ZJS
1674 s->compress = true;
1675 s->seal = true;
b2392ff3 1676 s->read_kmsg = true;
d025f1e4 1677
119e9655
LP
1678 s->watchdog_usec = USEC_INFINITY;
1679
26687bf8
OS
1680 s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1681 s->sync_scheduled = false;
1682
d025f1e4
ZJS
1683 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1684 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1685
40b71e89 1686 s->forward_to_wall = true;
d025f1e4 1687
e150e820
MB
1688 s->max_file_usec = DEFAULT_MAX_FILE_USEC;
1689
d025f1e4
ZJS
1690 s->max_level_store = LOG_DEBUG;
1691 s->max_level_syslog = LOG_DEBUG;
1692 s->max_level_kmsg = LOG_NOTICE;
1693 s->max_level_console = LOG_INFO;
40b71e89 1694 s->max_level_wall = LOG_EMERG;
d025f1e4 1695
ec20fe5f
LP
1696 s->line_max = DEFAULT_LINE_MAX;
1697
266a4700
FB
1698 journal_reset_metrics(&s->system_storage.metrics);
1699 journal_reset_metrics(&s->runtime_storage.metrics);
d025f1e4
ZJS
1700
1701 server_parse_config_file(s);
1d84ad94
LP
1702
1703 r = proc_cmdline_parse(parse_proc_cmdline_item, s, PROC_CMDLINE_STRIP_RD_PREFIX);
1704 if (r < 0)
1705 log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
8580d1f7 1706
d288f79f 1707 if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
b1389b0d
ZJS
1708 log_debug("Setting both rate limit interval and burst from "USEC_FMT",%u to 0,0",
1709 s->rate_limit_interval, s->rate_limit_burst);
d288f79f
ZJS
1710 s->rate_limit_interval = s->rate_limit_burst = 0;
1711 }
d025f1e4 1712
8580d1f7 1713 (void) mkdir_p("/run/systemd/journal", 0755);
d025f1e4 1714
43cf8388 1715 s->user_journals = ordered_hashmap_new(NULL);
d025f1e4
ZJS
1716 if (!s->user_journals)
1717 return log_oom();
1718
1719 s->mmap = mmap_cache_new();
1720 if (!s->mmap)
1721 return log_oom();
1722
b58c888f
VC
1723 s->deferred_closes = set_new(NULL);
1724 if (!s->deferred_closes)
1725 return log_oom();
1726
f9a810be 1727 r = sd_event_default(&s->event);
23bbb0de
MS
1728 if (r < 0)
1729 return log_error_errno(r, "Failed to create event loop: %m");
d025f1e4
ZJS
1730
1731 n = sd_listen_fds(true);
23bbb0de
MS
1732 if (n < 0)
1733 return log_error_errno(n, "Failed to read listening file descriptors from environment: %m");
d025f1e4
ZJS
1734
1735 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1736
1737 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1738
1739 if (s->native_fd >= 0) {
1740 log_error("Too many native sockets passed.");
1741 return -EINVAL;
1742 }
1743
1744 s->native_fd = fd;
1745
1746 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1747
1748 if (s->stdout_fd >= 0) {
1749 log_error("Too many stdout sockets passed.");
1750 return -EINVAL;
1751 }
1752
1753 s->stdout_fd = fd;
1754
03ee5c38
LP
1755 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0 ||
1756 sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/dev-log", 0) > 0) {
d025f1e4
ZJS
1757
1758 if (s->syslog_fd >= 0) {
1759 log_error("Too many /dev/log sockets passed.");
1760 return -EINVAL;
1761 }
1762
1763 s->syslog_fd = fd;
1764
875c2e22
LP
1765 } else if (sd_is_socket(fd, AF_NETLINK, SOCK_RAW, -1) > 0) {
1766
1767 if (s->audit_fd >= 0) {
1768 log_error("Too many audit sockets passed.");
1769 return -EINVAL;
1770 }
1771
1772 s->audit_fd = fd;
1773
4ec3cd73 1774 } else {
4ec3cd73 1775
13790add
LP
1776 if (!fds) {
1777 fds = fdset_new();
1778 if (!fds)
1779 return log_oom();
1780 }
4ec3cd73 1781
13790add
LP
1782 r = fdset_put(fds, fd);
1783 if (r < 0)
1784 return log_oom();
4ec3cd73 1785 }
d025f1e4
ZJS
1786 }
1787
15d91bff
ZJS
1788 /* Try to restore streams, but don't bother if this fails */
1789 (void) server_restore_streams(s, fds);
d025f1e4 1790
13790add
LP
1791 if (fdset_size(fds) > 0) {
1792 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds));
1793 fds = fdset_free(fds);
1794 }
1795
7d18d348
ZJS
1796 no_sockets = s->native_fd < 0 && s->stdout_fd < 0 && s->syslog_fd < 0 && s->audit_fd < 0;
1797
1798 /* always open stdout, syslog, native, and kmsg sockets */
37b7affe
ZJS
1799
1800 /* systemd-journald.socket: /run/systemd/journal/stdout */
15d91bff
ZJS
1801 r = server_open_stdout_socket(s);
1802 if (r < 0)
1803 return r;
1804
37b7affe 1805 /* systemd-journald-dev-log.socket: /run/systemd/journal/dev-log */
13790add 1806 r = server_open_syslog_socket(s);
d025f1e4
ZJS
1807 if (r < 0)
1808 return r;
1809
37b7affe 1810 /* systemd-journald.socket: /run/systemd/journal/socket */
13790add 1811 r = server_open_native_socket(s);
d025f1e4
ZJS
1812 if (r < 0)
1813 return r;
1814
b2392ff3 1815 /* /dev/kmsg */
d025f1e4
ZJS
1816 r = server_open_dev_kmsg(s);
1817 if (r < 0)
1818 return r;
1819
7d18d348
ZJS
1820 /* Unless we got *some* sockets and not audit, open audit socket */
1821 if (s->audit_fd >= 0 || no_sockets) {
1822 r = server_open_audit(s);
1823 if (r < 0)
1824 return r;
1825 }
875c2e22 1826
d025f1e4
ZJS
1827 r = server_open_kernel_seqnum(s);
1828 if (r < 0)
1829 return r;
1830
0c24bb23
LP
1831 r = server_open_hostname(s);
1832 if (r < 0)
1833 return r;
1834
f9a810be 1835 r = setup_signals(s);
d025f1e4
ZJS
1836 if (r < 0)
1837 return r;
1838
1839 s->udev = udev_new();
1840 if (!s->udev)
1841 return -ENOMEM;
1842
f9a810be 1843 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
d025f1e4
ZJS
1844 if (!s->rate_limit)
1845 return -ENOMEM;
1846
e9174f29
LP
1847 r = cg_get_root_path(&s->cgroup_root);
1848 if (r < 0)
1849 return r;
1850
0c24bb23
LP
1851 server_cache_hostname(s);
1852 server_cache_boot_id(s);
1853 server_cache_machine_id(s);
1854
266a4700
FB
1855 s->runtime_storage.name = "Runtime journal";
1856 s->system_storage.name = "System journal";
1857
605405c6
ZJS
1858 s->runtime_storage.path = strjoin("/run/log/journal/", SERVER_MACHINE_ID(s));
1859 s->system_storage.path = strjoin("/var/log/journal/", SERVER_MACHINE_ID(s));
266a4700
FB
1860 if (!s->runtime_storage.path || !s->system_storage.path)
1861 return -ENOMEM;
1862
e22aa3d3
LP
1863 (void) server_connect_notify(s);
1864
22e3a02b
LP
1865 (void) client_context_acquire_default(s);
1866
804ae586 1867 return system_journal_open(s, false);
d025f1e4
ZJS
1868}
1869
1870void server_maybe_append_tags(Server *s) {
349cc4a5 1871#if HAVE_GCRYPT
d025f1e4
ZJS
1872 JournalFile *f;
1873 Iterator i;
1874 usec_t n;
1875
1876 n = now(CLOCK_REALTIME);
1877
1878 if (s->system_journal)
1879 journal_file_maybe_append_tag(s->system_journal, n);
1880
43cf8388 1881 ORDERED_HASHMAP_FOREACH(f, s->user_journals, i)
d025f1e4
ZJS
1882 journal_file_maybe_append_tag(f, n);
1883#endif
1884}
1885
1886void server_done(Server *s) {
1887 JournalFile *f;
1888 assert(s);
1889
b58c888f
VC
1890 if (s->deferred_closes) {
1891 journal_file_close_set(s->deferred_closes);
1892 set_free(s->deferred_closes);
1893 }
1894
d025f1e4
ZJS
1895 while (s->stdout_streams)
1896 stdout_stream_free(s->stdout_streams);
1897
22e3a02b
LP
1898 client_context_flush_all(s);
1899
d025f1e4 1900 if (s->system_journal)
69a3a6fd 1901 (void) journal_file_close(s->system_journal);
d025f1e4
ZJS
1902
1903 if (s->runtime_journal)
69a3a6fd 1904 (void) journal_file_close(s->runtime_journal);
d025f1e4 1905
43cf8388 1906 while ((f = ordered_hashmap_steal_first(s->user_journals)))
69a3a6fd 1907 (void) journal_file_close(f);
d025f1e4 1908
43cf8388 1909 ordered_hashmap_free(s->user_journals);
d025f1e4 1910
f9a810be
LP
1911 sd_event_source_unref(s->syslog_event_source);
1912 sd_event_source_unref(s->native_event_source);
1913 sd_event_source_unref(s->stdout_event_source);
1914 sd_event_source_unref(s->dev_kmsg_event_source);
875c2e22 1915 sd_event_source_unref(s->audit_event_source);
f9a810be
LP
1916 sd_event_source_unref(s->sync_event_source);
1917 sd_event_source_unref(s->sigusr1_event_source);
1918 sd_event_source_unref(s->sigusr2_event_source);
1919 sd_event_source_unref(s->sigterm_event_source);
1920 sd_event_source_unref(s->sigint_event_source);
94b65516 1921 sd_event_source_unref(s->sigrtmin1_event_source);
0c24bb23 1922 sd_event_source_unref(s->hostname_event_source);
e22aa3d3 1923 sd_event_source_unref(s->notify_event_source);
119e9655 1924 sd_event_source_unref(s->watchdog_event_source);
f9a810be 1925 sd_event_unref(s->event);
d025f1e4 1926
03e334a1
LP
1927 safe_close(s->syslog_fd);
1928 safe_close(s->native_fd);
1929 safe_close(s->stdout_fd);
1930 safe_close(s->dev_kmsg_fd);
875c2e22 1931 safe_close(s->audit_fd);
03e334a1 1932 safe_close(s->hostname_fd);
e22aa3d3 1933 safe_close(s->notify_fd);
0c24bb23 1934
d025f1e4
ZJS
1935 if (s->rate_limit)
1936 journal_rate_limit_free(s->rate_limit);
1937
1938 if (s->kernel_seqnum)
1939 munmap(s->kernel_seqnum, sizeof(uint64_t));
1940
1941 free(s->buffer);
1942 free(s->tty_path);
e9174f29 1943 free(s->cgroup_root);
99d0966e 1944 free(s->hostname_field);
c6e9e16f
ZJS
1945 free(s->runtime_storage.path);
1946 free(s->system_storage.path);
d025f1e4
ZJS
1947
1948 if (s->mmap)
1949 mmap_cache_unref(s->mmap);
1950
3e044c49 1951 udev_unref(s->udev);
d025f1e4 1952}
8580d1f7
LP
1953
1954static const char* const storage_table[_STORAGE_MAX] = {
1955 [STORAGE_AUTO] = "auto",
1956 [STORAGE_VOLATILE] = "volatile",
1957 [STORAGE_PERSISTENT] = "persistent",
1958 [STORAGE_NONE] = "none"
1959};
1960
1961DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
1962DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
1963
1964static const char* const split_mode_table[_SPLIT_MAX] = {
1965 [SPLIT_LOGIN] = "login",
1966 [SPLIT_UID] = "uid",
1967 [SPLIT_NONE] = "none",
1968};
1969
1970DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
1971DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
ec20fe5f
LP
1972
1973int config_parse_line_max(
1974 const char* unit,
1975 const char *filename,
1976 unsigned line,
1977 const char *section,
1978 unsigned section_line,
1979 const char *lvalue,
1980 int ltype,
1981 const char *rvalue,
1982 void *data,
1983 void *userdata) {
1984
1985 size_t *sz = data;
1986 int r;
1987
1988 assert(filename);
1989 assert(lvalue);
1990 assert(rvalue);
1991 assert(data);
1992
1993 if (isempty(rvalue))
1994 /* Empty assignment means default */
1995 *sz = DEFAULT_LINE_MAX;
1996 else {
1997 uint64_t v;
1998
1999 r = parse_size(rvalue, 1024, &v);
2000 if (r < 0) {
2001 log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse LineMax= value, ignoring: %s", rvalue);
2002 return 0;
2003 }
2004
2005 if (v < 79) {
2006 /* Why specify 79 here as minimum line length? Simply, because the most common traditional
2007 * terminal size is 80ch, and it might make sense to break one character before the natural
2008 * line break would occur on that. */
2009 log_syntax(unit, LOG_WARNING, filename, line, 0, "LineMax= too small, clamping to 79: %s", rvalue);
2010 *sz = 79;
2011 } else if (v > (uint64_t) (SSIZE_MAX-1)) {
2012 /* So, why specify SSIZE_MAX-1 here? Because that's one below the largest size value read()
2013 * can return, and we need one extra byte for the trailing NUL byte. Of course IRL such large
2014 * memory allocations will fail anyway, hence this limit is mostly theoretical anyway, as we'll
2015 * fail much earlier anyway. */
2016 log_syntax(unit, LOG_WARNING, filename, line, 0, "LineMax= too large, clamping to %" PRIu64 ": %s", (uint64_t) (SSIZE_MAX-1), rvalue);
2017 *sz = SSIZE_MAX-1;
2018 } else
2019 *sz = (size_t) v;
2020 }
2021
2022 return 0;
2023}