]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/journal/journald-server.c
Merge pull request #6853 from sourcejedi/GetAll
[thirdparty/systemd.git] / src / journal / journald-server.c
CommitLineData
d025f1e4
ZJS
1/***
2 This file is part of systemd.
3
4 Copyright 2011 Lennart Poettering
5
6 systemd is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
10
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public License
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
18***/
19
24882e06
LP
20#ifdef HAVE_SELINUX
21#include <selinux/selinux.h>
22#endif
8580d1f7
LP
23#include <sys/ioctl.h>
24#include <sys/mman.h>
25#include <sys/signalfd.h>
26#include <sys/statvfs.h>
07630cea 27#include <linux/sockios.h>
24882e06 28
b4bbcaa9 29#include "libudev.h"
8580d1f7 30#include "sd-daemon.h"
74df0fca
LP
31#include "sd-journal.h"
32#include "sd-messages.h"
8580d1f7
LP
33
34#include "acl-util.h"
b5efdb8a 35#include "alloc-util.h"
430f0182 36#include "audit-util.h"
d025f1e4 37#include "cgroup-util.h"
d025f1e4 38#include "conf-parser.h"
a0956174 39#include "dirent-util.h"
0dec689b 40#include "extract-word.h"
3ffd4af2 41#include "fd-util.h"
33d52ab9 42#include "fileio.h"
f97b34a6 43#include "format-util.h"
f4f15635 44#include "fs-util.h"
8580d1f7 45#include "hashmap.h"
958b66ea 46#include "hostname-util.h"
4b58153d 47#include "id128-util.h"
afc5dbf3 48#include "io-util.h"
8580d1f7
LP
49#include "journal-authenticate.h"
50#include "journal-file.h"
d025f1e4
ZJS
51#include "journal-internal.h"
52#include "journal-vacuum.h"
8580d1f7 53#include "journald-audit.h"
22e3a02b 54#include "journald-context.h"
d025f1e4 55#include "journald-kmsg.h"
d025f1e4 56#include "journald-native.h"
8580d1f7 57#include "journald-rate-limit.h"
3ffd4af2 58#include "journald-server.h"
8580d1f7
LP
59#include "journald-stream.h"
60#include "journald-syslog.h"
4b58153d 61#include "log.h"
07630cea
LP
62#include "missing.h"
63#include "mkdir.h"
6bedfcbb 64#include "parse-util.h"
4e731273 65#include "proc-cmdline.h"
07630cea
LP
66#include "process-util.h"
67#include "rm-rf.h"
68#include "selinux-util.h"
69#include "signal-util.h"
70#include "socket-util.h"
32917e33 71#include "stdio-util.h"
8b43440b 72#include "string-table.h"
07630cea 73#include "string-util.h"
863a5610 74#include "syslog-util.h"
22e3a02b 75#include "user-util.h"
d025f1e4 76
d025f1e4
ZJS
77#define USER_JOURNALS_MAX 1024
78
26687bf8 79#define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
7f1ad696
LP
80#define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
81#define DEFAULT_RATE_LIMIT_BURST 1000
e150e820 82#define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
d025f1e4 83
8580d1f7 84#define RECHECK_SPACE_USEC (30*USEC_PER_SEC)
d025f1e4 85
e22aa3d3
LP
86#define NOTIFY_SNDBUF_SIZE (8*1024*1024)
87
7a24f3bf
VC
88/* The period to insert between posting changes for coalescing */
89#define POST_CHANGE_TIMER_INTERVAL_USEC (250*USEC_PER_MSEC)
90
e0ed6db9
FB
91static int determine_path_usage(Server *s, const char *path, uint64_t *ret_used, uint64_t *ret_free) {
92 _cleanup_closedir_ DIR *d = NULL;
93 struct dirent *de;
94 struct statvfs ss;
e0ed6db9
FB
95
96 assert(ret_used);
97 assert(ret_free);
98
266a4700 99 d = opendir(path);
e0ed6db9
FB
100 if (!d)
101 return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR,
266a4700 102 errno, "Failed to open %s: %m", path);
e0ed6db9
FB
103
104 if (fstatvfs(dirfd(d), &ss) < 0)
266a4700 105 return log_error_errno(errno, "Failed to fstatvfs(%s): %m", path);
e0ed6db9
FB
106
107 *ret_free = ss.f_bsize * ss.f_bavail;
108 *ret_used = 0;
109 FOREACH_DIRENT_ALL(de, d, break) {
110 struct stat st;
111
112 if (!endswith(de->d_name, ".journal") &&
113 !endswith(de->d_name, ".journal~"))
114 continue;
115
116 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) {
266a4700 117 log_debug_errno(errno, "Failed to stat %s/%s, ignoring: %m", path, de->d_name);
e0ed6db9
FB
118 continue;
119 }
120
121 if (!S_ISREG(st.st_mode))
122 continue;
123
124 *ret_used += (uint64_t) st.st_blocks * 512UL;
125 }
126
127 return 0;
128}
129
a0edc477
FB
130static void cache_space_invalidate(JournalStorageSpace *space) {
131 memset(space, 0, sizeof(*space));
132}
133
57f443a6 134static int cache_space_refresh(Server *s, JournalStorage *storage) {
23aba343 135 JournalStorageSpace *space;
266a4700 136 JournalMetrics *metrics;
23aba343 137 uint64_t vfs_used, vfs_avail, avail;
d025f1e4 138 usec_t ts;
e0ed6db9 139 int r;
d025f1e4 140
8580d1f7 141 assert(s);
266a4700 142
266a4700 143 metrics = &storage->metrics;
23aba343 144 space = &storage->space;
d025f1e4 145
8580d1f7 146 ts = now(CLOCK_MONOTONIC);
d025f1e4 147
3099caf2 148 if (space->timestamp != 0 && space->timestamp + RECHECK_SPACE_USEC > ts)
d025f1e4
ZJS
149 return 0;
150
23aba343 151 r = determine_path_usage(s, storage->path, &vfs_used, &vfs_avail);
e0ed6db9
FB
152 if (r < 0)
153 return r;
d025f1e4 154
23aba343
FB
155 space->vfs_used = vfs_used;
156 space->vfs_available = vfs_avail;
157
158 avail = LESS_BY(vfs_avail, metrics->keep_free);
159
23aba343
FB
160 space->limit = MIN(MAX(vfs_used + avail, metrics->min_use), metrics->max_use);
161 space->available = LESS_BY(space->limit, vfs_used);
162 space->timestamp = ts;
8580d1f7
LP
163 return 1;
164}
165
3a19f215
FB
166static void patch_min_use(JournalStorage *storage) {
167 assert(storage);
168
169 /* Let's bump the min_use limit to the current usage on disk. We do
170 * this when starting up and first opening the journal files. This way
171 * sudden spikes in disk usage will not cause journald to vacuum files
172 * without bounds. Note that this means that only a restart of journald
173 * will make it reset this value. */
174
175 storage->metrics.min_use = MAX(storage->metrics.min_use, storage->space.vfs_used);
176}
177
178
179static int determine_space(Server *s, uint64_t *available, uint64_t *limit) {
266a4700 180 JournalStorage *js;
57f443a6 181 int r;
8580d1f7
LP
182
183 assert(s);
184
266a4700 185 js = s->system_journal ? &s->system_storage : &s->runtime_storage;
57f443a6
FB
186
187 r = cache_space_refresh(s, js);
188 if (r >= 0) {
189 if (available)
190 *available = js->space.available;
191 if (limit)
192 *limit = js->space.limit;
193 }
194 return r;
d025f1e4
ZJS
195}
196
cba5629e
FB
197void server_space_usage_message(Server *s, JournalStorage *storage) {
198 char fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
199 fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX], fb6[FORMAT_BYTES_MAX];
200 JournalMetrics *metrics;
cba5629e
FB
201
202 assert(s);
203
204 if (!storage)
205 storage = s->system_journal ? &s->system_storage : &s->runtime_storage;
206
57f443a6 207 if (cache_space_refresh(s, storage) < 0)
cba5629e
FB
208 return;
209
210 metrics = &storage->metrics;
23aba343 211 format_bytes(fb1, sizeof(fb1), storage->space.vfs_used);
cba5629e
FB
212 format_bytes(fb2, sizeof(fb2), metrics->max_use);
213 format_bytes(fb3, sizeof(fb3), metrics->keep_free);
23aba343 214 format_bytes(fb4, sizeof(fb4), storage->space.vfs_available);
cba5629e
FB
215 format_bytes(fb5, sizeof(fb5), storage->space.limit);
216 format_bytes(fb6, sizeof(fb6), storage->space.available);
217
2b044526 218 server_driver_message(s, "MESSAGE_ID=" SD_MESSAGE_JOURNAL_USAGE_STR,
cba5629e
FB
219 LOG_MESSAGE("%s (%s) is %s, max %s, %s free.",
220 storage->name, storage->path, fb1, fb5, fb6),
221 "JOURNAL_NAME=%s", storage->name,
222 "JOURNAL_PATH=%s", storage->path,
23aba343 223 "CURRENT_USE=%"PRIu64, storage->space.vfs_used,
cba5629e
FB
224 "CURRENT_USE_PRETTY=%s", fb1,
225 "MAX_USE=%"PRIu64, metrics->max_use,
226 "MAX_USE_PRETTY=%s", fb2,
227 "DISK_KEEP_FREE=%"PRIu64, metrics->keep_free,
228 "DISK_KEEP_FREE_PRETTY=%s", fb3,
23aba343 229 "DISK_AVAILABLE=%"PRIu64, storage->space.vfs_available,
cba5629e
FB
230 "DISK_AVAILABLE_PRETTY=%s", fb4,
231 "LIMIT=%"PRIu64, storage->space.limit,
232 "LIMIT_PRETTY=%s", fb5,
233 "AVAILABLE=%"PRIu64, storage->space.available,
234 "AVAILABLE_PRETTY=%s", fb6,
235 NULL);
236}
237
5c3bde3f 238static void server_add_acls(JournalFile *f, uid_t uid) {
d025f1e4 239#ifdef HAVE_ACL
5c3bde3f 240 int r;
d025f1e4 241#endif
d025f1e4
ZJS
242 assert(f);
243
d025f1e4 244#ifdef HAVE_ACL
34c10968 245 if (uid <= SYSTEM_UID_MAX)
d025f1e4
ZJS
246 return;
247
5c3bde3f
ZJS
248 r = add_acls_for_user(f->fd, uid);
249 if (r < 0)
250 log_warning_errno(r, "Failed to set ACL on %s, ignoring: %m", f->path);
d025f1e4
ZJS
251#endif
252}
253
7a24f3bf
VC
254static int open_journal(
255 Server *s,
256 bool reliably,
257 const char *fname,
258 int flags,
259 bool seal,
260 JournalMetrics *metrics,
7a24f3bf
VC
261 JournalFile **ret) {
262 int r;
e167d7fd 263 JournalFile *f;
7a24f3bf
VC
264
265 assert(s);
266 assert(fname);
267 assert(ret);
268
269 if (reliably)
b58c888f 270 r = journal_file_open_reliably(fname, flags, 0640, s->compress, seal, metrics, s->mmap, s->deferred_closes, NULL, &f);
7a24f3bf 271 else
5d1ce257 272 r = journal_file_open(-1, fname, flags, 0640, s->compress, seal, metrics, s->mmap, s->deferred_closes, NULL, &f);
7a24f3bf
VC
273 if (r < 0)
274 return r;
275
e167d7fd 276 r = journal_file_enable_post_change_timer(f, s->event, POST_CHANGE_TIMER_INTERVAL_USEC);
7a24f3bf 277 if (r < 0) {
69a3a6fd 278 (void) journal_file_close(f);
7a24f3bf
VC
279 return r;
280 }
281
e167d7fd 282 *ret = f;
7a24f3bf
VC
283 return r;
284}
285
6431c7e2 286static bool flushed_flag_is_set(void) {
f78273c8 287 return access("/run/systemd/journal/flushed", F_OK) >= 0;
6431c7e2
VC
288}
289
105bdb46
VC
290static int system_journal_open(Server *s, bool flush_requested) {
291 const char *fn;
292 int r = 0;
293
294 if (!s->system_journal &&
f78273c8
LP
295 IN_SET(s->storage, STORAGE_PERSISTENT, STORAGE_AUTO) &&
296 (flush_requested || flushed_flag_is_set())) {
105bdb46
VC
297
298 /* If in auto mode: first try to create the machine
299 * path, but not the prefix.
300 *
301 * If in persistent mode: create /var/log/journal and
302 * the machine path */
303
304 if (s->storage == STORAGE_PERSISTENT)
305 (void) mkdir_p("/var/log/journal/", 0755);
306
266a4700 307 (void) mkdir(s->system_storage.path, 0755);
105bdb46 308
266a4700
FB
309 fn = strjoina(s->system_storage.path, "/system.journal");
310 r = open_journal(s, true, fn, O_RDWR|O_CREAT, s->seal, &s->system_storage.metrics, &s->system_journal);
105bdb46
VC
311 if (r >= 0) {
312 server_add_acls(s->system_journal, 0);
57f443a6 313 (void) cache_space_refresh(s, &s->system_storage);
3a19f215 314 patch_min_use(&s->system_storage);
105bdb46
VC
315 } else if (r < 0) {
316 if (r != -ENOENT && r != -EROFS)
317 log_warning_errno(r, "Failed to open system journal: %m");
318
319 r = 0;
320 }
929eeb54
VC
321
322 /* If the runtime journal is open, and we're post-flush, we're
323 * recovering from a failed system journal rotate (ENOSPC)
324 * for which the runtime journal was reopened.
325 *
326 * Perform an implicit flush to var, leaving the runtime
327 * journal closed, now that the system journal is back.
328 */
f78273c8
LP
329 if (!flush_requested)
330 (void) server_flush_to_var(s, true);
105bdb46
VC
331 }
332
333 if (!s->runtime_journal &&
334 (s->storage != STORAGE_NONE)) {
335
266a4700 336 fn = strjoina(s->runtime_storage.path, "/system.journal");
105bdb46
VC
337
338 if (s->system_journal) {
339
340 /* Try to open the runtime journal, but only
341 * if it already exists, so that we can flush
342 * it into the system journal */
343
266a4700 344 r = open_journal(s, false, fn, O_RDWR, false, &s->runtime_storage.metrics, &s->runtime_journal);
105bdb46
VC
345 if (r < 0) {
346 if (r != -ENOENT)
347 log_warning_errno(r, "Failed to open runtime journal: %m");
348
349 r = 0;
350 }
351
352 } else {
353
354 /* OK, we really need the runtime journal, so create
355 * it if necessary. */
356
357 (void) mkdir("/run/log", 0755);
358 (void) mkdir("/run/log/journal", 0755);
359 (void) mkdir_parents(fn, 0750);
360
266a4700 361 r = open_journal(s, true, fn, O_RDWR|O_CREAT, false, &s->runtime_storage.metrics, &s->runtime_journal);
105bdb46
VC
362 if (r < 0)
363 return log_error_errno(r, "Failed to open runtime journal: %m");
364 }
365
366 if (s->runtime_journal) {
367 server_add_acls(s->runtime_journal, 0);
57f443a6 368 (void) cache_space_refresh(s, &s->runtime_storage);
3a19f215 369 patch_min_use(&s->runtime_storage);
105bdb46
VC
370 }
371 }
372
373 return r;
374}
375
d025f1e4 376static JournalFile* find_journal(Server *s, uid_t uid) {
ed375beb 377 _cleanup_free_ char *p = NULL;
d025f1e4
ZJS
378 int r;
379 JournalFile *f;
380 sd_id128_t machine;
381
382 assert(s);
383
105bdb46
VC
384 /* A rotate that fails to create the new journal (ENOSPC) leaves the
385 * rotated journal as NULL. Unless we revisit opening, even after
386 * space is made available we'll continue to return NULL indefinitely.
387 *
388 * system_journal_open() is a noop if the journals are already open, so
389 * we can just call it here to recover from failed rotates (or anything
390 * else that's left the journals as NULL).
391 *
392 * Fixes https://github.com/systemd/systemd/issues/3968 */
393 (void) system_journal_open(s, false);
394
d025f1e4
ZJS
395 /* We split up user logs only on /var, not on /run. If the
396 * runtime file is open, we write to it exclusively, in order
397 * to guarantee proper order as soon as we flush /run to
398 * /var and close the runtime file. */
399
400 if (s->runtime_journal)
401 return s->runtime_journal;
402
61755fda 403 if (uid <= SYSTEM_UID_MAX || uid_is_dynamic(uid))
d025f1e4
ZJS
404 return s->system_journal;
405
406 r = sd_id128_get_machine(&machine);
407 if (r < 0)
408 return s->system_journal;
409
4a0b58c4 410 f = ordered_hashmap_get(s->user_journals, UID_TO_PTR(uid));
d025f1e4
ZJS
411 if (f)
412 return f;
413
de0671ee
ZJS
414 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-"UID_FMT".journal",
415 SD_ID128_FORMAT_VAL(machine), uid) < 0)
d025f1e4
ZJS
416 return s->system_journal;
417
43cf8388 418 while (ordered_hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
d025f1e4 419 /* Too many open? Then let's close one */
43cf8388 420 f = ordered_hashmap_steal_first(s->user_journals);
d025f1e4 421 assert(f);
69a3a6fd 422 (void) journal_file_close(f);
d025f1e4
ZJS
423 }
424
266a4700 425 r = open_journal(s, true, p, O_RDWR|O_CREAT, s->seal, &s->system_storage.metrics, &f);
d025f1e4
ZJS
426 if (r < 0)
427 return s->system_journal;
428
5c3bde3f 429 server_add_acls(f, uid);
d025f1e4 430
4a0b58c4 431 r = ordered_hashmap_put(s->user_journals, UID_TO_PTR(uid), f);
d025f1e4 432 if (r < 0) {
69a3a6fd 433 (void) journal_file_close(f);
d025f1e4
ZJS
434 return s->system_journal;
435 }
436
437 return f;
438}
439
ea69bd41
LP
440static int do_rotate(
441 Server *s,
442 JournalFile **f,
443 const char* name,
444 bool seal,
445 uint32_t uid) {
446
fc55baee
ZJS
447 int r;
448 assert(s);
449
450 if (!*f)
451 return -EINVAL;
452
b58c888f 453 r = journal_file_rotate(f, s->compress, seal, s->deferred_closes);
fc55baee
ZJS
454 if (r < 0)
455 if (*f)
ea69bd41 456 log_error_errno(r, "Failed to rotate %s: %m", (*f)->path);
fc55baee 457 else
ea69bd41 458 log_error_errno(r, "Failed to create new %s journal: %m", name);
fc55baee 459 else
5c3bde3f 460 server_add_acls(*f, uid);
2678031a 461
fc55baee
ZJS
462 return r;
463}
464
d025f1e4
ZJS
465void server_rotate(Server *s) {
466 JournalFile *f;
467 void *k;
468 Iterator i;
469 int r;
470
471 log_debug("Rotating...");
472
8580d1f7
LP
473 (void) do_rotate(s, &s->runtime_journal, "runtime", false, 0);
474 (void) do_rotate(s, &s->system_journal, "system", s->seal, 0);
d025f1e4 475
43cf8388 476 ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
4a0b58c4 477 r = do_rotate(s, &f, "user", s->seal, PTR_TO_UID(k));
fc55baee 478 if (r >= 0)
43cf8388 479 ordered_hashmap_replace(s->user_journals, k, f);
fc55baee
ZJS
480 else if (!f)
481 /* Old file has been closed and deallocated */
43cf8388 482 ordered_hashmap_remove(s->user_journals, k);
d025f1e4 483 }
b58c888f
VC
484
485 /* Perform any deferred closes which aren't still offlining. */
486 SET_FOREACH(f, s->deferred_closes, i)
487 if (!journal_file_is_offlining(f)) {
488 (void) set_remove(s->deferred_closes, f);
489 (void) journal_file_close(f);
490 }
d025f1e4
ZJS
491}
492
26687bf8
OS
493void server_sync(Server *s) {
494 JournalFile *f;
26687bf8
OS
495 Iterator i;
496 int r;
497
26687bf8 498 if (s->system_journal) {
ac2e41f5 499 r = journal_file_set_offline(s->system_journal, false);
26687bf8 500 if (r < 0)
65089b82 501 log_warning_errno(r, "Failed to sync system journal, ignoring: %m");
26687bf8
OS
502 }
503
65c1d46b 504 ORDERED_HASHMAP_FOREACH(f, s->user_journals, i) {
ac2e41f5 505 r = journal_file_set_offline(f, false);
26687bf8 506 if (r < 0)
65089b82 507 log_warning_errno(r, "Failed to sync user journal, ignoring: %m");
26687bf8
OS
508 }
509
f9a810be
LP
510 if (s->sync_event_source) {
511 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
512 if (r < 0)
da927ba9 513 log_error_errno(r, "Failed to disable sync timer source: %m");
f9a810be 514 }
26687bf8
OS
515
516 s->sync_scheduled = false;
517}
518
3a19f215 519static void do_vacuum(Server *s, JournalStorage *storage, bool verbose) {
ea69bd41 520
63c8666b
ZJS
521 int r;
522
8580d1f7 523 assert(s);
266a4700 524 assert(storage);
8580d1f7 525
57f443a6 526 (void) cache_space_refresh(s, storage);
18e758bf
FB
527
528 if (verbose)
529 server_space_usage_message(s, storage);
8580d1f7 530
57f443a6
FB
531 r = journal_directory_vacuum(storage->path, storage->space.limit,
532 storage->metrics.n_max_files, s->max_retention_usec,
533 &s->oldest_file_usec, verbose);
63c8666b 534 if (r < 0 && r != -ENOENT)
266a4700
FB
535 log_warning_errno(r, "Failed to vacuum %s, ignoring: %m", storage->path);
536
a0edc477 537 cache_space_invalidate(&storage->space);
63c8666b
ZJS
538}
539
3a19f215 540int server_vacuum(Server *s, bool verbose) {
8580d1f7 541 assert(s);
d025f1e4
ZJS
542
543 log_debug("Vacuuming...");
544
545 s->oldest_file_usec = 0;
546
266a4700 547 if (s->system_journal)
3a19f215 548 do_vacuum(s, &s->system_storage, verbose);
266a4700 549 if (s->runtime_journal)
3a19f215 550 do_vacuum(s, &s->runtime_storage, verbose);
d025f1e4 551
8580d1f7 552 return 0;
d025f1e4
ZJS
553}
554
0c24bb23
LP
555static void server_cache_machine_id(Server *s) {
556 sd_id128_t id;
557 int r;
558
559 assert(s);
560
561 r = sd_id128_get_machine(&id);
562 if (r < 0)
563 return;
564
565 sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
566}
567
568static void server_cache_boot_id(Server *s) {
569 sd_id128_t id;
570 int r;
571
572 assert(s);
573
574 r = sd_id128_get_boot(&id);
575 if (r < 0)
576 return;
577
578 sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
579}
580
581static void server_cache_hostname(Server *s) {
582 _cleanup_free_ char *t = NULL;
583 char *x;
584
585 assert(s);
586
587 t = gethostname_malloc();
588 if (!t)
589 return;
590
591 x = strappend("_HOSTNAME=", t);
592 if (!x)
593 return;
594
595 free(s->hostname_field);
596 s->hostname_field = x;
597}
598
8531ae70 599static bool shall_try_append_again(JournalFile *f, int r) {
6e1045e5 600 switch(r) {
ae739cc1 601
6e1045e5
ZJS
602 case -E2BIG: /* Hit configured limit */
603 case -EFBIG: /* Hit fs limit */
604 case -EDQUOT: /* Quota limit hit */
605 case -ENOSPC: /* Disk full */
d025f1e4 606 log_debug("%s: Allocation limit reached, rotating.", f->path);
6e1045e5 607 return true;
ae739cc1 608
6e1045e5
ZJS
609 case -EIO: /* I/O error of some kind (mmap) */
610 log_warning("%s: IO error, rotating.", f->path);
611 return true;
ae739cc1 612
6e1045e5 613 case -EHOSTDOWN: /* Other machine */
d025f1e4 614 log_info("%s: Journal file from other machine, rotating.", f->path);
6e1045e5 615 return true;
ae739cc1 616
6e1045e5 617 case -EBUSY: /* Unclean shutdown */
d025f1e4 618 log_info("%s: Unclean shutdown, rotating.", f->path);
6e1045e5 619 return true;
ae739cc1 620
6e1045e5 621 case -EPROTONOSUPPORT: /* Unsupported feature */
d025f1e4 622 log_info("%s: Unsupported feature, rotating.", f->path);
6e1045e5 623 return true;
ae739cc1 624
6e1045e5
ZJS
625 case -EBADMSG: /* Corrupted */
626 case -ENODATA: /* Truncated */
627 case -ESHUTDOWN: /* Already archived */
d025f1e4 628 log_warning("%s: Journal file corrupted, rotating.", f->path);
6e1045e5 629 return true;
ae739cc1 630
6e1045e5 631 case -EIDRM: /* Journal file has been deleted */
2678031a 632 log_warning("%s: Journal file has been deleted, rotating.", f->path);
6e1045e5 633 return true;
ae739cc1
LP
634
635 case -ETXTBSY: /* Journal file is from the future */
c1a9199e 636 log_warning("%s: Journal file is from the future, rotating.", f->path);
ae739cc1
LP
637 return true;
638
6e1045e5 639 default:
d025f1e4 640 return false;
6e1045e5 641 }
d025f1e4
ZJS
642}
643
d07f7b9e 644static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
7c070017 645 bool vacuumed = false, rotate = false;
0f972d66 646 struct dual_timestamp ts;
d025f1e4 647 JournalFile *f;
d025f1e4
ZJS
648 int r;
649
650 assert(s);
651 assert(iovec);
652 assert(n > 0);
653
0f972d66
LP
654 /* Get the closest, linearized time we have for this log event from the event loop. (Note that we do not use
655 * the source time, and not even the time the event was originally seen, but instead simply the time we started
656 * processing it, as we want strictly linear ordering in what we write out.) */
657 assert_se(sd_event_now(s->event, CLOCK_REALTIME, &ts.realtime) >= 0);
658 assert_se(sd_event_now(s->event, CLOCK_MONOTONIC, &ts.monotonic) >= 0);
659
7c070017
LP
660 if (ts.realtime < s->last_realtime_clock) {
661 /* When the time jumps backwards, let's immediately rotate. Of course, this should not happen during
662 * regular operation. However, when it does happen, then we should make sure that we start fresh files
663 * to ensure that the entries in the journal files are strictly ordered by time, in order to ensure
664 * bisection works correctly. */
d025f1e4 665
7c070017
LP
666 log_debug("Time jumped backwards, rotating.");
667 rotate = true;
668 } else {
669
670 f = find_journal(s, uid);
671 if (!f)
672 return;
673
674 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
675 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
676 rotate = true;
677 }
678 }
d025f1e4 679
7c070017 680 if (rotate) {
d025f1e4 681 server_rotate(s);
3a19f215 682 server_vacuum(s, false);
d025f1e4
ZJS
683 vacuumed = true;
684
685 f = find_journal(s, uid);
686 if (!f)
687 return;
688 }
689
7c070017
LP
690 s->last_realtime_clock = ts.realtime;
691
0f972d66 692 r = journal_file_append_entry(f, &ts, iovec, n, &s->seqnum, NULL, NULL);
26687bf8 693 if (r >= 0) {
d07f7b9e 694 server_schedule_sync(s, priority);
d025f1e4 695 return;
26687bf8 696 }
d025f1e4
ZJS
697
698 if (vacuumed || !shall_try_append_again(f, r)) {
8266e1c0 699 log_error_errno(r, "Failed to write entry (%d items, %zu bytes), ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
d025f1e4
ZJS
700 return;
701 }
702
703 server_rotate(s);
3a19f215 704 server_vacuum(s, false);
d025f1e4
ZJS
705
706 f = find_journal(s, uid);
707 if (!f)
708 return;
709
710 log_debug("Retrying write.");
0f972d66 711 r = journal_file_append_entry(f, &ts, iovec, n, &s->seqnum, NULL, NULL);
8266e1c0
LP
712 if (r < 0)
713 log_error_errno(r, "Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
714 else
d07f7b9e 715 server_schedule_sync(s, priority);
d025f1e4
ZJS
716}
717
22e3a02b
LP
718#define IOVEC_ADD_NUMERIC_FIELD(iovec, n, value, type, isset, format, field) \
719 if (isset(value)) { \
720 char *k; \
721 k = newa(char, strlen(field "=") + DECIMAL_STR_MAX(type) + 1); \
722 sprintf(k, field "=" format, value); \
723 IOVEC_SET_STRING(iovec[n++], k); \
724 }
4b58153d 725
22e3a02b
LP
726#define IOVEC_ADD_STRING_FIELD(iovec, n, value, field) \
727 if (!isempty(value)) { \
728 char *k; \
729 k = strjoina(field "=", value); \
730 IOVEC_SET_STRING(iovec[n++], k); \
731 }
4b58153d 732
22e3a02b
LP
733#define IOVEC_ADD_ID128_FIELD(iovec, n, value, field) \
734 if (!sd_id128_is_null(value)) { \
735 char *k; \
736 k = newa(char, strlen(field "=") + SD_ID128_STRING_MAX); \
737 sd_id128_to_string(value, stpcpy(k, field "=")); \
738 IOVEC_SET_STRING(iovec[n++], k); \
739 }
4b58153d 740
22e3a02b
LP
741#define IOVEC_ADD_SIZED_FIELD(iovec, n, value, value_size, field) \
742 if (value_size > 0) { \
743 char *k; \
744 k = newa(char, strlen(field "=") + value_size + 1); \
745 *((char*) mempcpy(stpcpy(k, field "="), value, value_size)) = 0; \
746 IOVEC_SET_STRING(iovec[n++], k); \
747 } \
4b58153d 748
d025f1e4
ZJS
749static void dispatch_message_real(
750 Server *s,
751 struct iovec *iovec, unsigned n, unsigned m,
22e3a02b 752 const ClientContext *c,
3b3154df 753 const struct timeval *tv,
d07f7b9e 754 int priority,
22e3a02b
LP
755 pid_t object_pid) {
756
757 char source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)];
758 uid_t journal_uid;
759 ClientContext *o;
d025f1e4
ZJS
760
761 assert(s);
762 assert(iovec);
763 assert(n > 0);
22e3a02b 764 assert(n + N_IOVEC_META_FIELDS + (pid_is_valid(object_pid) ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
19cace37 765
22e3a02b
LP
766 if (c) {
767 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->pid, pid_t, pid_is_valid, PID_FMT, "_PID");
768 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->uid, uid_t, uid_is_valid, UID_FMT, "_UID");
769 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->gid, gid_t, gid_is_valid, GID_FMT, "_GID");
4b58153d 770
22e3a02b
LP
771 IOVEC_ADD_STRING_FIELD(iovec, n, c->comm, "_COMM");
772 IOVEC_ADD_STRING_FIELD(iovec, n, c->exe, "_EXE");
773 IOVEC_ADD_STRING_FIELD(iovec, n, c->cmdline, "_CMDLINE");
774 IOVEC_ADD_STRING_FIELD(iovec, n, c->capeff, "_CAP_EFFECTIVE");
d025f1e4 775
22e3a02b 776 IOVEC_ADD_SIZED_FIELD(iovec, n, c->label, c->label_size, "_SELINUX_CONTEXT");
ae018d9b 777
22e3a02b
LP
778 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->auditid, uint32_t, audit_session_is_valid, "%" PRIu32, "_AUDIT_SESSION");
779 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->loginuid, uid_t, uid_is_valid, UID_FMT, "_AUDIT_LOGINUID");
d025f1e4 780
22e3a02b
LP
781 IOVEC_ADD_STRING_FIELD(iovec, n, c->cgroup, "_SYSTEMD_CGROUP");
782 IOVEC_ADD_STRING_FIELD(iovec, n, c->session, "_SYSTEMD_SESSION");
783 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->owner_uid, uid_t, uid_is_valid, UID_FMT, "_SYSTEMD_OWNER_UID");
784 IOVEC_ADD_STRING_FIELD(iovec, n, c->unit, "_SYSTEMD_UNIT");
785 IOVEC_ADD_STRING_FIELD(iovec, n, c->user_unit, "_SYSTEMD_USER_UNIT");
786 IOVEC_ADD_STRING_FIELD(iovec, n, c->slice, "_SYSTEMD_SLICE");
787 IOVEC_ADD_STRING_FIELD(iovec, n, c->user_slice, "_SYSTEMD_USER_SLICE");
e7ff4e7f 788
22e3a02b 789 IOVEC_ADD_ID128_FIELD(iovec, n, c->invocation_id, "_SYSTEMD_INVOCATION_ID");
d025f1e4 790 }
968f3196 791
22e3a02b 792 assert(n <= m);
968f3196 793
22e3a02b 794 if (pid_is_valid(object_pid) && client_context_get(s, object_pid, NULL, NULL, 0, NULL, &o) >= 0) {
968f3196 795
22e3a02b
LP
796 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->pid, pid_t, pid_is_valid, PID_FMT, "OBJECT_PID");
797 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->uid, uid_t, uid_is_valid, UID_FMT, "OBJECT_UID");
798 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->gid, gid_t, gid_is_valid, GID_FMT, "OBJECT_GID");
968f3196 799
22e3a02b
LP
800 IOVEC_ADD_STRING_FIELD(iovec, n, o->comm, "OBJECT_COMM");
801 IOVEC_ADD_STRING_FIELD(iovec, n, o->exe, "OBJECT_EXE");
802 IOVEC_ADD_STRING_FIELD(iovec, n, o->cmdline, "OBJECT_CMDLINE");
803 IOVEC_ADD_STRING_FIELD(iovec, n, o->capeff, "OBJECT_CAP_EFFECTIVE");
968f3196 804
22e3a02b 805 IOVEC_ADD_SIZED_FIELD(iovec, n, o->label, o->label_size, "OBJECT_SELINUX_CONTEXT");
19cace37 806
22e3a02b
LP
807 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->auditid, uint32_t, audit_session_is_valid, "%" PRIu32, "OBJECT_AUDIT_SESSION");
808 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->loginuid, uid_t, uid_is_valid, UID_FMT, "OBJECT_AUDIT_LOGINUID");
d473176a 809
22e3a02b
LP
810 IOVEC_ADD_STRING_FIELD(iovec, n, o->cgroup, "OBJECT_SYSTEMD_CGROUP");
811 IOVEC_ADD_STRING_FIELD(iovec, n, o->session, "OBJECT_SYSTEMD_SESSION");
812 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->owner_uid, uid_t, uid_is_valid, UID_FMT, "OBJECT_SYSTEMD_OWNER_UID");
813 IOVEC_ADD_STRING_FIELD(iovec, n, o->unit, "OBJECT_SYSTEMD_UNIT");
814 IOVEC_ADD_STRING_FIELD(iovec, n, o->user_unit, "OBJECT_SYSTEMD_USER_UNIT");
815 IOVEC_ADD_STRING_FIELD(iovec, n, o->slice, "OBJECT_SYSTEMD_SLICE");
816 IOVEC_ADD_STRING_FIELD(iovec, n, o->user_slice, "OBJECT_SYSTEMD_USER_SLICE");
d473176a 817
22e3a02b 818 IOVEC_ADD_ID128_FIELD(iovec, n, o->invocation_id, "OBJECT_SYSTEMD_INVOCATION_ID=");
968f3196 819 }
22e3a02b 820
968f3196 821 assert(n <= m);
d025f1e4
ZJS
822
823 if (tv) {
398a50cd 824 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=" USEC_FMT, timeval_load(tv));
a5693989 825 IOVEC_SET_STRING(iovec[n++], source_time);
d025f1e4
ZJS
826 }
827
828 /* Note that strictly speaking storing the boot id here is
829 * redundant since the entry includes this in-line
830 * anyway. However, we need this indexed, too. */
0c24bb23
LP
831 if (!isempty(s->boot_id_field))
832 IOVEC_SET_STRING(iovec[n++], s->boot_id_field);
d025f1e4 833
0c24bb23
LP
834 if (!isempty(s->machine_id_field))
835 IOVEC_SET_STRING(iovec[n++], s->machine_id_field);
d025f1e4 836
0c24bb23
LP
837 if (!isempty(s->hostname_field))
838 IOVEC_SET_STRING(iovec[n++], s->hostname_field);
d025f1e4
ZJS
839
840 assert(n <= m);
841
22e3a02b
LP
842 if (s->split_mode == SPLIT_UID && c && uid_is_valid(c->uid))
843 /* Split up strictly by (non-root) UID */
844 journal_uid = c->uid;
845 else if (s->split_mode == SPLIT_LOGIN && c && c->uid > 0 && uid_is_valid(c->owner_uid))
edc3797f
LP
846 /* Split up by login UIDs. We do this only if the
847 * realuid is not root, in order not to accidentally
848 * leak privileged information to the user that is
849 * logged by a privileged process that is part of an
7517e174 850 * unprivileged session. */
22e3a02b 851 journal_uid = c->owner_uid;
da499392
KS
852 else
853 journal_uid = 0;
759c945a 854
d07f7b9e 855 write_to_journal(s, journal_uid, iovec, n, priority);
d025f1e4
ZJS
856}
857
2b044526 858void server_driver_message(Server *s, const char *message_id, const char *format, ...) {
22e3a02b 859
8a03c9ef
ZJS
860 struct iovec iovec[N_IOVEC_META_FIELDS + 5 + N_IOVEC_PAYLOAD_FIELDS];
861 unsigned n = 0, m;
d025f1e4 862 va_list ap;
22e3a02b 863 int r;
d025f1e4
ZJS
864
865 assert(s);
866 assert(format);
867
4850d39a 868 assert_cc(3 == LOG_FAC(LOG_DAEMON));
b6fa2555
EV
869 IOVEC_SET_STRING(iovec[n++], "SYSLOG_FACILITY=3");
870 IOVEC_SET_STRING(iovec[n++], "SYSLOG_IDENTIFIER=systemd-journald");
871
d025f1e4 872 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
4850d39a 873 assert_cc(6 == LOG_INFO);
32917e33 874 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
d025f1e4 875
2b044526
ZJS
876 if (message_id)
877 IOVEC_SET_STRING(iovec[n++], message_id);
8a03c9ef
ZJS
878 m = n;
879
880 va_start(ap, format);
32917e33
ZJS
881 r = log_format_iovec(iovec, ELEMENTSOF(iovec), &n, false, 0, format, ap);
882 /* Error handling below */
8a03c9ef
ZJS
883 va_end(ap);
884
32917e33 885 if (r >= 0)
22e3a02b 886 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), s->my_context, NULL, LOG_INFO, 0);
8a03c9ef
ZJS
887
888 while (m < n)
889 free(iovec[m++].iov_base);
32917e33
ZJS
890
891 if (r < 0) {
892 /* We failed to format the message. Emit a warning instead. */
893 char buf[LINE_MAX];
894
895 xsprintf(buf, "MESSAGE=Entry printing failed: %s", strerror(-r));
896
897 n = 3;
898 IOVEC_SET_STRING(iovec[n++], "PRIORITY=4");
899 IOVEC_SET_STRING(iovec[n++], buf);
22e3a02b 900 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), s->my_context, NULL, LOG_INFO, 0);
32917e33 901 }
d025f1e4
ZJS
902}
903
904void server_dispatch_message(
905 Server *s,
906 struct iovec *iovec, unsigned n, unsigned m,
22e3a02b 907 ClientContext *c,
3b3154df 908 const struct timeval *tv,
968f3196
ZJS
909 int priority,
910 pid_t object_pid) {
d025f1e4 911
8580d1f7 912 uint64_t available = 0;
22e3a02b 913 int rl;
d025f1e4
ZJS
914
915 assert(s);
916 assert(iovec || n == 0);
917
918 if (n == 0)
919 return;
920
921 if (LOG_PRI(priority) > s->max_level_store)
922 return;
923
2f5df74a
HHPF
924 /* Stop early in case the information will not be stored
925 * in a journal. */
926 if (s->storage == STORAGE_NONE)
927 return;
928
22e3a02b
LP
929 if (c && c->unit) {
930 (void) determine_space(s, &available, NULL);
d025f1e4 931
22e3a02b
LP
932 rl = journal_rate_limit_test(s->rate_limit, c->unit, priority & LOG_PRIMASK, available);
933 if (rl == 0)
934 return;
d025f1e4 935
22e3a02b
LP
936 /* Write a suppression message if we suppressed something */
937 if (rl > 1)
938 server_driver_message(s, "MESSAGE_ID=" SD_MESSAGE_JOURNAL_DROPPED_STR,
939 LOG_MESSAGE("Suppressed %u messages from %s", rl - 1, c->unit),
940 NULL);
d025f1e4
ZJS
941 }
942
22e3a02b 943 dispatch_message_real(s, iovec, n, m, c, tv, priority, object_pid);
d025f1e4
ZJS
944}
945
f78273c8 946int server_flush_to_var(Server *s, bool require_flag_file) {
d025f1e4
ZJS
947 sd_id128_t machine;
948 sd_journal *j = NULL;
fbb63411
LP
949 char ts[FORMAT_TIMESPAN_MAX];
950 usec_t start;
951 unsigned n = 0;
952 int r;
d025f1e4
ZJS
953
954 assert(s);
955
f78273c8 956 if (!IN_SET(s->storage, STORAGE_AUTO, STORAGE_PERSISTENT))
d025f1e4
ZJS
957 return 0;
958
959 if (!s->runtime_journal)
960 return 0;
961
f78273c8
LP
962 if (require_flag_file && !flushed_flag_is_set())
963 return 0;
964
8580d1f7 965 (void) system_journal_open(s, true);
d025f1e4
ZJS
966
967 if (!s->system_journal)
968 return 0;
969
970 log_debug("Flushing to /var...");
971
fbb63411
LP
972 start = now(CLOCK_MONOTONIC);
973
d025f1e4 974 r = sd_id128_get_machine(&machine);
00a16861 975 if (r < 0)
d025f1e4 976 return r;
d025f1e4
ZJS
977
978 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
23bbb0de
MS
979 if (r < 0)
980 return log_error_errno(r, "Failed to read runtime journal: %m");
d025f1e4 981
93b73b06
LP
982 sd_journal_set_data_threshold(j, 0);
983
d025f1e4
ZJS
984 SD_JOURNAL_FOREACH(j) {
985 Object *o = NULL;
986 JournalFile *f;
987
988 f = j->current_file;
989 assert(f && f->current_offset > 0);
990
fbb63411
LP
991 n++;
992
d025f1e4
ZJS
993 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
994 if (r < 0) {
da927ba9 995 log_error_errno(r, "Can't read entry: %m");
d025f1e4
ZJS
996 goto finish;
997 }
998
999 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1000 if (r >= 0)
1001 continue;
1002
1003 if (!shall_try_append_again(s->system_journal, r)) {
da927ba9 1004 log_error_errno(r, "Can't write entry: %m");
d025f1e4
ZJS
1005 goto finish;
1006 }
1007
1008 server_rotate(s);
3a19f215 1009 server_vacuum(s, false);
d025f1e4 1010
253f59df
LP
1011 if (!s->system_journal) {
1012 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1013 r = -EIO;
1014 goto finish;
1015 }
1016
d025f1e4
ZJS
1017 log_debug("Retrying write.");
1018 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1019 if (r < 0) {
da927ba9 1020 log_error_errno(r, "Can't write entry: %m");
d025f1e4
ZJS
1021 goto finish;
1022 }
1023 }
1024
804ae586
LP
1025 r = 0;
1026
d025f1e4
ZJS
1027finish:
1028 journal_file_post_change(s->system_journal);
1029
804ae586 1030 s->runtime_journal = journal_file_close(s->runtime_journal);
d025f1e4
ZJS
1031
1032 if (r >= 0)
c6878637 1033 (void) rm_rf("/run/log/journal", REMOVE_ROOT);
d025f1e4 1034
763c7aa2 1035 sd_journal_close(j);
d025f1e4 1036
2b044526 1037 server_driver_message(s, NULL,
8a03c9ef
ZJS
1038 LOG_MESSAGE("Time spent on flushing to /var is %s for %u entries.",
1039 format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0),
1040 n),
1041 NULL);
fbb63411 1042
d025f1e4
ZJS
1043 return r;
1044}
1045
8531ae70 1046int server_process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
f9a810be 1047 Server *s = userdata;
a315ac4e
LP
1048 struct ucred *ucred = NULL;
1049 struct timeval *tv = NULL;
1050 struct cmsghdr *cmsg;
1051 char *label = NULL;
1052 size_t label_len = 0, m;
1053 struct iovec iovec;
1054 ssize_t n;
1055 int *fds = NULL, v = 0;
1056 unsigned n_fds = 0;
1057
1058 union {
1059 struct cmsghdr cmsghdr;
1060
1061 /* We use NAME_MAX space for the SELinux label
1062 * here. The kernel currently enforces no
1063 * limit, but according to suggestions from
1064 * the SELinux people this will change and it
1065 * will probably be identical to NAME_MAX. For
1066 * now we use that, but this should be updated
1067 * one day when the final limit is known. */
1068 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1069 CMSG_SPACE(sizeof(struct timeval)) +
1070 CMSG_SPACE(sizeof(int)) + /* fd */
1071 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1072 } control = {};
1073
1074 union sockaddr_union sa = {};
1075
1076 struct msghdr msghdr = {
1077 .msg_iov = &iovec,
1078 .msg_iovlen = 1,
1079 .msg_control = &control,
1080 .msg_controllen = sizeof(control),
1081 .msg_name = &sa,
1082 .msg_namelen = sizeof(sa),
1083 };
f9a810be 1084
d025f1e4 1085 assert(s);
875c2e22 1086 assert(fd == s->native_fd || fd == s->syslog_fd || fd == s->audit_fd);
f9a810be
LP
1087
1088 if (revents != EPOLLIN) {
1089 log_error("Got invalid event from epoll for datagram fd: %"PRIx32, revents);
1090 return -EIO;
1091 }
1092
22e3a02b
LP
1093 /* Try to get the right size, if we can. (Not all sockets support SIOCINQ, hence we just try, but don't rely on
1094 * it.) */
a315ac4e 1095 (void) ioctl(fd, SIOCINQ, &v);
d025f1e4 1096
a315ac4e
LP
1097 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1098 m = PAGE_ALIGN(MAX3((size_t) v + 1,
1099 (size_t) LINE_MAX,
1100 ALIGN(sizeof(struct nlmsghdr)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH)) + 1);
d025f1e4 1101
a315ac4e
LP
1102 if (!GREEDY_REALLOC(s->buffer, s->buffer_size, m))
1103 return log_oom();
875c2e22 1104
a315ac4e
LP
1105 iovec.iov_base = s->buffer;
1106 iovec.iov_len = s->buffer_size - 1; /* Leave room for trailing NUL we add later */
d025f1e4 1107
a315ac4e
LP
1108 n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1109 if (n < 0) {
1110 if (errno == EINTR || errno == EAGAIN)
1111 return 0;
875c2e22 1112
a315ac4e
LP
1113 return log_error_errno(errno, "recvmsg() failed: %m");
1114 }
875c2e22 1115
a315ac4e
LP
1116 CMSG_FOREACH(cmsg, &msghdr) {
1117
1118 if (cmsg->cmsg_level == SOL_SOCKET &&
1119 cmsg->cmsg_type == SCM_CREDENTIALS &&
1120 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1121 ucred = (struct ucred*) CMSG_DATA(cmsg);
1122 else if (cmsg->cmsg_level == SOL_SOCKET &&
1123 cmsg->cmsg_type == SCM_SECURITY) {
1124 label = (char*) CMSG_DATA(cmsg);
1125 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1126 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1127 cmsg->cmsg_type == SO_TIMESTAMP &&
1128 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1129 tv = (struct timeval*) CMSG_DATA(cmsg);
1130 else if (cmsg->cmsg_level == SOL_SOCKET &&
1131 cmsg->cmsg_type == SCM_RIGHTS) {
1132 fds = (int*) CMSG_DATA(cmsg);
1133 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
d025f1e4 1134 }
a315ac4e 1135 }
d025f1e4 1136
a315ac4e
LP
1137 /* And a trailing NUL, just in case */
1138 s->buffer[n] = 0;
1139
1140 if (fd == s->syslog_fd) {
1141 if (n > 0 && n_fds == 0)
1142 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1143 else if (n_fds > 0)
1144 log_warning("Got file descriptors via syslog socket. Ignoring.");
1145
1146 } else if (fd == s->native_fd) {
1147 if (n > 0 && n_fds == 0)
1148 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1149 else if (n == 0 && n_fds == 1)
1150 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1151 else if (n_fds > 0)
1152 log_warning("Got too many file descriptors via native socket. Ignoring.");
1153
1154 } else {
1155 assert(fd == s->audit_fd);
1156
1157 if (n > 0 && n_fds == 0)
1158 server_process_audit_message(s, s->buffer, n, ucred, &sa, msghdr.msg_namelen);
1159 else if (n_fds > 0)
1160 log_warning("Got file descriptors via audit socket. Ignoring.");
f9a810be 1161 }
a315ac4e
LP
1162
1163 close_many(fds, n_fds);
1164 return 0;
f9a810be 1165}
d025f1e4 1166
f9a810be
LP
1167static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1168 Server *s = userdata;
33d52ab9 1169 int r;
d025f1e4 1170
f9a810be 1171 assert(s);
d025f1e4 1172
94b65516 1173 log_info("Received request to flush runtime journal from PID " PID_FMT, si->ssi_pid);
d025f1e4 1174
f78273c8 1175 (void) server_flush_to_var(s, false);
f9a810be 1176 server_sync(s);
3a19f215 1177 server_vacuum(s, false);
d025f1e4 1178
33d52ab9
LP
1179 r = touch("/run/systemd/journal/flushed");
1180 if (r < 0)
1181 log_warning_errno(r, "Failed to touch /run/systemd/journal/flushed, ignoring: %m");
74055aa7 1182
18e758bf 1183 server_space_usage_message(s, NULL);
f9a810be
LP
1184 return 0;
1185}
d025f1e4 1186
f9a810be
LP
1187static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1188 Server *s = userdata;
33d52ab9 1189 int r;
d025f1e4 1190
f9a810be 1191 assert(s);
d025f1e4 1192
94b65516 1193 log_info("Received request to rotate journal from PID " PID_FMT, si->ssi_pid);
f9a810be 1194 server_rotate(s);
3a19f215
FB
1195 server_vacuum(s, true);
1196
1197 if (s->system_journal)
1198 patch_min_use(&s->system_storage);
1199 if (s->runtime_journal)
1200 patch_min_use(&s->runtime_storage);
d025f1e4 1201
dbd6e31c 1202 /* Let clients know when the most recent rotation happened. */
33d52ab9
LP
1203 r = write_timestamp_file_atomic("/run/systemd/journal/rotated", now(CLOCK_MONOTONIC));
1204 if (r < 0)
1205 log_warning_errno(r, "Failed to write /run/systemd/journal/rotated, ignoring: %m");
dbd6e31c 1206
f9a810be
LP
1207 return 0;
1208}
d025f1e4 1209
f9a810be
LP
1210static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1211 Server *s = userdata;
d025f1e4 1212
f9a810be 1213 assert(s);
d025f1e4 1214
4daf54a8 1215 log_received_signal(LOG_INFO, si);
d025f1e4 1216
6203e07a 1217 sd_event_exit(s->event, 0);
d025f1e4
ZJS
1218 return 0;
1219}
1220
94b65516
LP
1221static int dispatch_sigrtmin1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1222 Server *s = userdata;
33d52ab9 1223 int r;
94b65516
LP
1224
1225 assert(s);
1226
1227 log_debug("Received request to sync from PID " PID_FMT, si->ssi_pid);
1228
1229 server_sync(s);
1230
1231 /* Let clients know when the most recent sync happened. */
33d52ab9
LP
1232 r = write_timestamp_file_atomic("/run/systemd/journal/synced", now(CLOCK_MONOTONIC));
1233 if (r < 0)
1234 log_warning_errno(r, "Failed to write /run/systemd/journal/synced, ignoring: %m");
94b65516
LP
1235
1236 return 0;
1237}
1238
f9a810be 1239static int setup_signals(Server *s) {
f9a810be 1240 int r;
d025f1e4
ZJS
1241
1242 assert(s);
1243
9bab3b65 1244 assert_se(sigprocmask_many(SIG_SETMASK, NULL, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, SIGRTMIN+1, -1) >= 0);
d025f1e4 1245
151b9b96 1246 r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
f9a810be
LP
1247 if (r < 0)
1248 return r;
1249
151b9b96 1250 r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s);
f9a810be
LP
1251 if (r < 0)
1252 return r;
d025f1e4 1253
151b9b96 1254 r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s);
f9a810be
LP
1255 if (r < 0)
1256 return r;
d025f1e4 1257
b374689c
LP
1258 /* Let's process SIGTERM late, so that we flush all queued
1259 * messages to disk before we exit */
1260 r = sd_event_source_set_priority(s->sigterm_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1261 if (r < 0)
1262 return r;
1263
1264 /* When journald is invoked on the terminal (when debugging),
1265 * it's useful if C-c is handled equivalent to SIGTERM. */
151b9b96 1266 r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s);
f9a810be
LP
1267 if (r < 0)
1268 return r;
d025f1e4 1269
b374689c
LP
1270 r = sd_event_source_set_priority(s->sigint_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1271 if (r < 0)
1272 return r;
1273
94b65516
LP
1274 /* SIGRTMIN+1 causes an immediate sync. We process this very
1275 * late, so that everything else queued at this point is
1276 * really written to disk. Clients can watch
1277 * /run/systemd/journal/synced with inotify until its mtime
1278 * changes to see when a sync happened. */
1279 r = sd_event_add_signal(s->event, &s->sigrtmin1_event_source, SIGRTMIN+1, dispatch_sigrtmin1, s);
1280 if (r < 0)
1281 return r;
1282
1283 r = sd_event_source_set_priority(s->sigrtmin1_event_source, SD_EVENT_PRIORITY_NORMAL+15);
1284 if (r < 0)
1285 return r;
1286
d025f1e4
ZJS
1287 return 0;
1288}
1289
5707ecf3
ZJS
1290static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
1291 Server *s = data;
74df0fca 1292 int r;
d025f1e4 1293
5707ecf3 1294 assert(s);
d025f1e4 1295
1d84ad94
LP
1296 if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_syslog")) {
1297
5707ecf3 1298 r = value ? parse_boolean(value) : true;
d581d9d9 1299 if (r < 0)
5707ecf3
ZJS
1300 log_warning("Failed to parse forward to syslog switch \"%s\". Ignoring.", value);
1301 else
1302 s->forward_to_syslog = r;
1d84ad94
LP
1303
1304 } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_kmsg")) {
1305
5707ecf3
ZJS
1306 r = value ? parse_boolean(value) : true;
1307 if (r < 0)
1308 log_warning("Failed to parse forward to kmsg switch \"%s\". Ignoring.", value);
1309 else
1310 s->forward_to_kmsg = r;
1d84ad94
LP
1311
1312 } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_console")) {
1313
5707ecf3
ZJS
1314 r = value ? parse_boolean(value) : true;
1315 if (r < 0)
1316 log_warning("Failed to parse forward to console switch \"%s\". Ignoring.", value);
1317 else
1318 s->forward_to_console = r;
1d84ad94
LP
1319
1320 } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_wall")) {
1321
5707ecf3
ZJS
1322 r = value ? parse_boolean(value) : true;
1323 if (r < 0)
1324 log_warning("Failed to parse forward to wall switch \"%s\". Ignoring.", value);
1325 else
1326 s->forward_to_wall = r;
1d84ad94
LP
1327
1328 } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_console")) {
1329
1330 if (proc_cmdline_value_missing(key, value))
1331 return 0;
1332
5707ecf3
ZJS
1333 r = log_level_from_string(value);
1334 if (r < 0)
1335 log_warning("Failed to parse max level console value \"%s\". Ignoring.", value);
1336 else
1337 s->max_level_console = r;
1d84ad94
LP
1338
1339 } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_store")) {
1340
1341 if (proc_cmdline_value_missing(key, value))
1342 return 0;
1343
5707ecf3
ZJS
1344 r = log_level_from_string(value);
1345 if (r < 0)
1346 log_warning("Failed to parse max level store value \"%s\". Ignoring.", value);
1347 else
1348 s->max_level_store = r;
1d84ad94
LP
1349
1350 } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_syslog")) {
1351
1352 if (proc_cmdline_value_missing(key, value))
1353 return 0;
1354
5707ecf3
ZJS
1355 r = log_level_from_string(value);
1356 if (r < 0)
1357 log_warning("Failed to parse max level syslog value \"%s\". Ignoring.", value);
1358 else
1359 s->max_level_syslog = r;
1d84ad94
LP
1360
1361 } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_kmsg")) {
1362
1363 if (proc_cmdline_value_missing(key, value))
1364 return 0;
1365
5707ecf3
ZJS
1366 r = log_level_from_string(value);
1367 if (r < 0)
1368 log_warning("Failed to parse max level kmsg value \"%s\". Ignoring.", value);
1369 else
1370 s->max_level_kmsg = r;
1d84ad94
LP
1371
1372 } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_wall")) {
1373
1374 if (proc_cmdline_value_missing(key, value))
1375 return 0;
1376
5707ecf3
ZJS
1377 r = log_level_from_string(value);
1378 if (r < 0)
1379 log_warning("Failed to parse max level wall value \"%s\". Ignoring.", value);
1380 else
1381 s->max_level_wall = r;
1d84ad94 1382
5707ecf3
ZJS
1383 } else if (startswith(key, "systemd.journald"))
1384 log_warning("Unknown journald kernel command line option \"%s\". Ignoring.", key);
d025f1e4 1385
804ae586 1386 /* do not warn about state here, since probably systemd already did */
db91ea32 1387 return 0;
d025f1e4
ZJS
1388}
1389
1390static int server_parse_config_file(Server *s) {
d025f1e4
ZJS
1391 assert(s);
1392
43688c49 1393 return config_parse_many_nulstr(PKGSYSCONFDIR "/journald.conf",
da412854
YW
1394 CONF_PATHS_NULSTR("systemd/journald.conf.d"),
1395 "Journal\0",
1396 config_item_perf_lookup, journald_gperf_lookup,
1397 false, s);
d025f1e4
ZJS
1398}
1399
f9a810be
LP
1400static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
1401 Server *s = userdata;
26687bf8
OS
1402
1403 assert(s);
1404
f9a810be 1405 server_sync(s);
26687bf8
OS
1406 return 0;
1407}
1408
d07f7b9e 1409int server_schedule_sync(Server *s, int priority) {
26687bf8
OS
1410 int r;
1411
26687bf8
OS
1412 assert(s);
1413
d07f7b9e
LP
1414 if (priority <= LOG_CRIT) {
1415 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1416 server_sync(s);
1417 return 0;
1418 }
1419
26687bf8
OS
1420 if (s->sync_scheduled)
1421 return 0;
1422
f9a810be
LP
1423 if (s->sync_interval_usec > 0) {
1424 usec_t when;
ca267016 1425
6a0f1f6d 1426 r = sd_event_now(s->event, CLOCK_MONOTONIC, &when);
f9a810be
LP
1427 if (r < 0)
1428 return r;
26687bf8 1429
f9a810be
LP
1430 when += s->sync_interval_usec;
1431
1432 if (!s->sync_event_source) {
6a0f1f6d
LP
1433 r = sd_event_add_time(
1434 s->event,
1435 &s->sync_event_source,
1436 CLOCK_MONOTONIC,
1437 when, 0,
1438 server_dispatch_sync, s);
f9a810be
LP
1439 if (r < 0)
1440 return r;
1441
1442 r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
1443 } else {
1444 r = sd_event_source_set_time(s->sync_event_source, when);
1445 if (r < 0)
1446 return r;
1447
1448 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
1449 }
26687bf8 1450 if (r < 0)
f9a810be 1451 return r;
26687bf8 1452
f9a810be
LP
1453 s->sync_scheduled = true;
1454 }
26687bf8
OS
1455
1456 return 0;
1457}
1458
0c24bb23
LP
1459static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1460 Server *s = userdata;
1461
1462 assert(s);
1463
1464 server_cache_hostname(s);
1465 return 0;
1466}
1467
1468static int server_open_hostname(Server *s) {
1469 int r;
1470
1471 assert(s);
1472
1473 s->hostname_fd = open("/proc/sys/kernel/hostname", O_RDONLY|O_CLOEXEC|O_NDELAY|O_NOCTTY);
4a62c710
MS
1474 if (s->hostname_fd < 0)
1475 return log_error_errno(errno, "Failed to open /proc/sys/kernel/hostname: %m");
0c24bb23 1476
151b9b96 1477 r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s);
0c24bb23 1478 if (r < 0) {
28def94c
DR
1479 /* kernels prior to 3.2 don't support polling this file. Ignore
1480 * the failure. */
1481 if (r == -EPERM) {
e53fc357 1482 log_warning_errno(r, "Failed to register hostname fd in event loop, ignoring: %m");
03e334a1 1483 s->hostname_fd = safe_close(s->hostname_fd);
28def94c
DR
1484 return 0;
1485 }
1486
23bbb0de 1487 return log_error_errno(r, "Failed to register hostname fd in event loop: %m");
0c24bb23
LP
1488 }
1489
1490 r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
23bbb0de
MS
1491 if (r < 0)
1492 return log_error_errno(r, "Failed to adjust priority of host name event source: %m");
0c24bb23
LP
1493
1494 return 0;
1495}
1496
e22aa3d3
LP
1497static int dispatch_notify_event(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1498 Server *s = userdata;
1499 int r;
1500
1501 assert(s);
1502 assert(s->notify_event_source == es);
1503 assert(s->notify_fd == fd);
1504
e22aa3d3 1505 /* The $NOTIFY_SOCKET is writable again, now send exactly one
dd835265 1506 * message on it. Either it's the watchdog event, the initial
119e9655
LP
1507 * READY=1 event or an stdout stream event. If there's nothing
1508 * to write anymore, turn our event source off. The next time
1509 * there's something to send it will be turned on again. */
e22aa3d3
LP
1510
1511 if (!s->sent_notify_ready) {
1512 static const char p[] =
1513 "READY=1\n"
1514 "STATUS=Processing requests...";
1515 ssize_t l;
1516
1517 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1518 if (l < 0) {
1519 if (errno == EAGAIN)
1520 return 0;
1521
1522 return log_error_errno(errno, "Failed to send READY=1 notification message: %m");
1523 }
1524
1525 s->sent_notify_ready = true;
1526 log_debug("Sent READY=1 notification.");
1527
119e9655
LP
1528 } else if (s->send_watchdog) {
1529
1530 static const char p[] =
1531 "WATCHDOG=1";
1532
1533 ssize_t l;
1534
1535 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1536 if (l < 0) {
1537 if (errno == EAGAIN)
1538 return 0;
1539
1540 return log_error_errno(errno, "Failed to send WATCHDOG=1 notification message: %m");
1541 }
1542
1543 s->send_watchdog = false;
1544 log_debug("Sent WATCHDOG=1 notification.");
1545
e22aa3d3
LP
1546 } else if (s->stdout_streams_notify_queue)
1547 /* Dispatch one stream notification event */
1548 stdout_stream_send_notify(s->stdout_streams_notify_queue);
1549
61233823 1550 /* Leave us enabled if there's still more to do. */
119e9655 1551 if (s->send_watchdog || s->stdout_streams_notify_queue)
e22aa3d3
LP
1552 return 0;
1553
1554 /* There was nothing to do anymore, let's turn ourselves off. */
1555 r = sd_event_source_set_enabled(es, SD_EVENT_OFF);
1556 if (r < 0)
1557 return log_error_errno(r, "Failed to turn off notify event source: %m");
1558
1559 return 0;
1560}
1561
119e9655
LP
1562static int dispatch_watchdog(sd_event_source *es, uint64_t usec, void *userdata) {
1563 Server *s = userdata;
1564 int r;
1565
1566 assert(s);
1567
1568 s->send_watchdog = true;
1569
1570 r = sd_event_source_set_enabled(s->notify_event_source, SD_EVENT_ON);
1571 if (r < 0)
1572 log_warning_errno(r, "Failed to turn on notify event source: %m");
1573
1574 r = sd_event_source_set_time(s->watchdog_event_source, usec + s->watchdog_usec / 2);
1575 if (r < 0)
1576 return log_error_errno(r, "Failed to restart watchdog event source: %m");
1577
1578 r = sd_event_source_set_enabled(s->watchdog_event_source, SD_EVENT_ON);
1579 if (r < 0)
1580 return log_error_errno(r, "Failed to enable watchdog event source: %m");
1581
1582 return 0;
1583}
1584
e22aa3d3
LP
1585static int server_connect_notify(Server *s) {
1586 union sockaddr_union sa = {
1587 .un.sun_family = AF_UNIX,
1588 };
1589 const char *e;
1590 int r;
1591
1592 assert(s);
1593 assert(s->notify_fd < 0);
1594 assert(!s->notify_event_source);
1595
1596 /*
1597 So here's the problem: we'd like to send notification
1598 messages to PID 1, but we cannot do that via sd_notify(),
1599 since that's synchronous, and we might end up blocking on
1600 it. Specifically: given that PID 1 might block on
1601 dbus-daemon during IPC, and dbus-daemon is logging to us,
1602 and might hence block on us, we might end up in a deadlock
ccddd104 1603 if we block on sending PID 1 notification messages — by
e22aa3d3
LP
1604 generating a full blocking circle. To avoid this, let's
1605 create a non-blocking socket, and connect it to the
1606 notification socket, and then wait for POLLOUT before we
1607 send anything. This should efficiently avoid any deadlocks,
1608 as we'll never block on PID 1, hence PID 1 can safely block
1609 on dbus-daemon which can safely block on us again.
1610
1611 Don't think that this issue is real? It is, see:
1612 https://github.com/systemd/systemd/issues/1505
1613 */
1614
1615 e = getenv("NOTIFY_SOCKET");
1616 if (!e)
1617 return 0;
1618
1619 if ((e[0] != '@' && e[0] != '/') || e[1] == 0) {
1620 log_error("NOTIFY_SOCKET set to an invalid value: %s", e);
1621 return -EINVAL;
1622 }
1623
1624 if (strlen(e) > sizeof(sa.un.sun_path)) {
1625 log_error("NOTIFY_SOCKET path too long: %s", e);
1626 return -EINVAL;
1627 }
1628
1629 s->notify_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
1630 if (s->notify_fd < 0)
1631 return log_error_errno(errno, "Failed to create notify socket: %m");
1632
1633 (void) fd_inc_sndbuf(s->notify_fd, NOTIFY_SNDBUF_SIZE);
1634
1635 strncpy(sa.un.sun_path, e, sizeof(sa.un.sun_path));
1636 if (sa.un.sun_path[0] == '@')
1637 sa.un.sun_path[0] = 0;
1638
fc2fffe7 1639 r = connect(s->notify_fd, &sa.sa, SOCKADDR_UN_LEN(sa.un));
e22aa3d3
LP
1640 if (r < 0)
1641 return log_error_errno(errno, "Failed to connect to notify socket: %m");
1642
1643 r = sd_event_add_io(s->event, &s->notify_event_source, s->notify_fd, EPOLLOUT, dispatch_notify_event, s);
1644 if (r < 0)
1645 return log_error_errno(r, "Failed to watch notification socket: %m");
1646
119e9655
LP
1647 if (sd_watchdog_enabled(false, &s->watchdog_usec) > 0) {
1648 s->send_watchdog = true;
1649
4de2402b 1650 r = sd_event_add_time(s->event, &s->watchdog_event_source, CLOCK_MONOTONIC, now(CLOCK_MONOTONIC) + s->watchdog_usec/2, s->watchdog_usec/4, dispatch_watchdog, s);
119e9655
LP
1651 if (r < 0)
1652 return log_error_errno(r, "Failed to add watchdog time event: %m");
1653 }
1654
e22aa3d3
LP
1655 /* This should fire pretty soon, which we'll use to send the
1656 * READY=1 event. */
1657
1658 return 0;
1659}
1660
d025f1e4 1661int server_init(Server *s) {
13790add 1662 _cleanup_fdset_free_ FDSet *fds = NULL;
d025f1e4 1663 int n, r, fd;
7d18d348 1664 bool no_sockets;
d025f1e4
ZJS
1665
1666 assert(s);
1667
1668 zero(*s);
e22aa3d3 1669 s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->audit_fd = s->hostname_fd = s->notify_fd = -1;
d025f1e4
ZJS
1670 s->compress = true;
1671 s->seal = true;
b2392ff3 1672 s->read_kmsg = true;
d025f1e4 1673
119e9655
LP
1674 s->watchdog_usec = USEC_INFINITY;
1675
26687bf8
OS
1676 s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1677 s->sync_scheduled = false;
1678
d025f1e4
ZJS
1679 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1680 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1681
40b71e89 1682 s->forward_to_wall = true;
d025f1e4 1683
e150e820
MB
1684 s->max_file_usec = DEFAULT_MAX_FILE_USEC;
1685
d025f1e4
ZJS
1686 s->max_level_store = LOG_DEBUG;
1687 s->max_level_syslog = LOG_DEBUG;
1688 s->max_level_kmsg = LOG_NOTICE;
1689 s->max_level_console = LOG_INFO;
40b71e89 1690 s->max_level_wall = LOG_EMERG;
d025f1e4 1691
266a4700
FB
1692 journal_reset_metrics(&s->system_storage.metrics);
1693 journal_reset_metrics(&s->runtime_storage.metrics);
d025f1e4
ZJS
1694
1695 server_parse_config_file(s);
1d84ad94
LP
1696
1697 r = proc_cmdline_parse(parse_proc_cmdline_item, s, PROC_CMDLINE_STRIP_RD_PREFIX);
1698 if (r < 0)
1699 log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
8580d1f7 1700
d288f79f 1701 if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
b1389b0d
ZJS
1702 log_debug("Setting both rate limit interval and burst from "USEC_FMT",%u to 0,0",
1703 s->rate_limit_interval, s->rate_limit_burst);
d288f79f
ZJS
1704 s->rate_limit_interval = s->rate_limit_burst = 0;
1705 }
d025f1e4 1706
8580d1f7 1707 (void) mkdir_p("/run/systemd/journal", 0755);
d025f1e4 1708
43cf8388 1709 s->user_journals = ordered_hashmap_new(NULL);
d025f1e4
ZJS
1710 if (!s->user_journals)
1711 return log_oom();
1712
1713 s->mmap = mmap_cache_new();
1714 if (!s->mmap)
1715 return log_oom();
1716
b58c888f
VC
1717 s->deferred_closes = set_new(NULL);
1718 if (!s->deferred_closes)
1719 return log_oom();
1720
f9a810be 1721 r = sd_event_default(&s->event);
23bbb0de
MS
1722 if (r < 0)
1723 return log_error_errno(r, "Failed to create event loop: %m");
d025f1e4
ZJS
1724
1725 n = sd_listen_fds(true);
23bbb0de
MS
1726 if (n < 0)
1727 return log_error_errno(n, "Failed to read listening file descriptors from environment: %m");
d025f1e4
ZJS
1728
1729 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1730
1731 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1732
1733 if (s->native_fd >= 0) {
1734 log_error("Too many native sockets passed.");
1735 return -EINVAL;
1736 }
1737
1738 s->native_fd = fd;
1739
1740 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1741
1742 if (s->stdout_fd >= 0) {
1743 log_error("Too many stdout sockets passed.");
1744 return -EINVAL;
1745 }
1746
1747 s->stdout_fd = fd;
1748
03ee5c38
LP
1749 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0 ||
1750 sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/dev-log", 0) > 0) {
d025f1e4
ZJS
1751
1752 if (s->syslog_fd >= 0) {
1753 log_error("Too many /dev/log sockets passed.");
1754 return -EINVAL;
1755 }
1756
1757 s->syslog_fd = fd;
1758
875c2e22
LP
1759 } else if (sd_is_socket(fd, AF_NETLINK, SOCK_RAW, -1) > 0) {
1760
1761 if (s->audit_fd >= 0) {
1762 log_error("Too many audit sockets passed.");
1763 return -EINVAL;
1764 }
1765
1766 s->audit_fd = fd;
1767
4ec3cd73 1768 } else {
4ec3cd73 1769
13790add
LP
1770 if (!fds) {
1771 fds = fdset_new();
1772 if (!fds)
1773 return log_oom();
1774 }
4ec3cd73 1775
13790add
LP
1776 r = fdset_put(fds, fd);
1777 if (r < 0)
1778 return log_oom();
4ec3cd73 1779 }
d025f1e4
ZJS
1780 }
1781
15d91bff
ZJS
1782 /* Try to restore streams, but don't bother if this fails */
1783 (void) server_restore_streams(s, fds);
d025f1e4 1784
13790add
LP
1785 if (fdset_size(fds) > 0) {
1786 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds));
1787 fds = fdset_free(fds);
1788 }
1789
7d18d348
ZJS
1790 no_sockets = s->native_fd < 0 && s->stdout_fd < 0 && s->syslog_fd < 0 && s->audit_fd < 0;
1791
1792 /* always open stdout, syslog, native, and kmsg sockets */
37b7affe
ZJS
1793
1794 /* systemd-journald.socket: /run/systemd/journal/stdout */
15d91bff
ZJS
1795 r = server_open_stdout_socket(s);
1796 if (r < 0)
1797 return r;
1798
37b7affe 1799 /* systemd-journald-dev-log.socket: /run/systemd/journal/dev-log */
13790add 1800 r = server_open_syslog_socket(s);
d025f1e4
ZJS
1801 if (r < 0)
1802 return r;
1803
37b7affe 1804 /* systemd-journald.socket: /run/systemd/journal/socket */
13790add 1805 r = server_open_native_socket(s);
d025f1e4
ZJS
1806 if (r < 0)
1807 return r;
1808
b2392ff3 1809 /* /dev/kmsg */
d025f1e4
ZJS
1810 r = server_open_dev_kmsg(s);
1811 if (r < 0)
1812 return r;
1813
7d18d348
ZJS
1814 /* Unless we got *some* sockets and not audit, open audit socket */
1815 if (s->audit_fd >= 0 || no_sockets) {
1816 r = server_open_audit(s);
1817 if (r < 0)
1818 return r;
1819 }
875c2e22 1820
d025f1e4
ZJS
1821 r = server_open_kernel_seqnum(s);
1822 if (r < 0)
1823 return r;
1824
0c24bb23
LP
1825 r = server_open_hostname(s);
1826 if (r < 0)
1827 return r;
1828
f9a810be 1829 r = setup_signals(s);
d025f1e4
ZJS
1830 if (r < 0)
1831 return r;
1832
1833 s->udev = udev_new();
1834 if (!s->udev)
1835 return -ENOMEM;
1836
f9a810be 1837 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
d025f1e4
ZJS
1838 if (!s->rate_limit)
1839 return -ENOMEM;
1840
e9174f29
LP
1841 r = cg_get_root_path(&s->cgroup_root);
1842 if (r < 0)
1843 return r;
1844
0c24bb23
LP
1845 server_cache_hostname(s);
1846 server_cache_boot_id(s);
1847 server_cache_machine_id(s);
1848
266a4700
FB
1849 s->runtime_storage.name = "Runtime journal";
1850 s->system_storage.name = "System journal";
1851
605405c6
ZJS
1852 s->runtime_storage.path = strjoin("/run/log/journal/", SERVER_MACHINE_ID(s));
1853 s->system_storage.path = strjoin("/var/log/journal/", SERVER_MACHINE_ID(s));
266a4700
FB
1854 if (!s->runtime_storage.path || !s->system_storage.path)
1855 return -ENOMEM;
1856
e22aa3d3
LP
1857 (void) server_connect_notify(s);
1858
22e3a02b
LP
1859 (void) client_context_acquire_default(s);
1860
804ae586 1861 return system_journal_open(s, false);
d025f1e4
ZJS
1862}
1863
1864void server_maybe_append_tags(Server *s) {
1865#ifdef HAVE_GCRYPT
1866 JournalFile *f;
1867 Iterator i;
1868 usec_t n;
1869
1870 n = now(CLOCK_REALTIME);
1871
1872 if (s->system_journal)
1873 journal_file_maybe_append_tag(s->system_journal, n);
1874
43cf8388 1875 ORDERED_HASHMAP_FOREACH(f, s->user_journals, i)
d025f1e4
ZJS
1876 journal_file_maybe_append_tag(f, n);
1877#endif
1878}
1879
1880void server_done(Server *s) {
1881 JournalFile *f;
1882 assert(s);
1883
b58c888f
VC
1884 if (s->deferred_closes) {
1885 journal_file_close_set(s->deferred_closes);
1886 set_free(s->deferred_closes);
1887 }
1888
d025f1e4
ZJS
1889 while (s->stdout_streams)
1890 stdout_stream_free(s->stdout_streams);
1891
22e3a02b
LP
1892 client_context_flush_all(s);
1893
d025f1e4 1894 if (s->system_journal)
69a3a6fd 1895 (void) journal_file_close(s->system_journal);
d025f1e4
ZJS
1896
1897 if (s->runtime_journal)
69a3a6fd 1898 (void) journal_file_close(s->runtime_journal);
d025f1e4 1899
43cf8388 1900 while ((f = ordered_hashmap_steal_first(s->user_journals)))
69a3a6fd 1901 (void) journal_file_close(f);
d025f1e4 1902
43cf8388 1903 ordered_hashmap_free(s->user_journals);
d025f1e4 1904
f9a810be
LP
1905 sd_event_source_unref(s->syslog_event_source);
1906 sd_event_source_unref(s->native_event_source);
1907 sd_event_source_unref(s->stdout_event_source);
1908 sd_event_source_unref(s->dev_kmsg_event_source);
875c2e22 1909 sd_event_source_unref(s->audit_event_source);
f9a810be
LP
1910 sd_event_source_unref(s->sync_event_source);
1911 sd_event_source_unref(s->sigusr1_event_source);
1912 sd_event_source_unref(s->sigusr2_event_source);
1913 sd_event_source_unref(s->sigterm_event_source);
1914 sd_event_source_unref(s->sigint_event_source);
94b65516 1915 sd_event_source_unref(s->sigrtmin1_event_source);
0c24bb23 1916 sd_event_source_unref(s->hostname_event_source);
e22aa3d3 1917 sd_event_source_unref(s->notify_event_source);
119e9655 1918 sd_event_source_unref(s->watchdog_event_source);
f9a810be 1919 sd_event_unref(s->event);
d025f1e4 1920
03e334a1
LP
1921 safe_close(s->syslog_fd);
1922 safe_close(s->native_fd);
1923 safe_close(s->stdout_fd);
1924 safe_close(s->dev_kmsg_fd);
875c2e22 1925 safe_close(s->audit_fd);
03e334a1 1926 safe_close(s->hostname_fd);
e22aa3d3 1927 safe_close(s->notify_fd);
0c24bb23 1928
d025f1e4
ZJS
1929 if (s->rate_limit)
1930 journal_rate_limit_free(s->rate_limit);
1931
1932 if (s->kernel_seqnum)
1933 munmap(s->kernel_seqnum, sizeof(uint64_t));
1934
1935 free(s->buffer);
1936 free(s->tty_path);
e9174f29 1937 free(s->cgroup_root);
99d0966e 1938 free(s->hostname_field);
c6e9e16f
ZJS
1939 free(s->runtime_storage.path);
1940 free(s->system_storage.path);
d025f1e4
ZJS
1941
1942 if (s->mmap)
1943 mmap_cache_unref(s->mmap);
1944
3e044c49 1945 udev_unref(s->udev);
d025f1e4 1946}
8580d1f7
LP
1947
1948static const char* const storage_table[_STORAGE_MAX] = {
1949 [STORAGE_AUTO] = "auto",
1950 [STORAGE_VOLATILE] = "volatile",
1951 [STORAGE_PERSISTENT] = "persistent",
1952 [STORAGE_NONE] = "none"
1953};
1954
1955DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
1956DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
1957
1958static const char* const split_mode_table[_SPLIT_MAX] = {
1959 [SPLIT_LOGIN] = "login",
1960 [SPLIT_UID] = "uid",
1961 [SPLIT_NONE] = "none",
1962};
1963
1964DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
1965DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");