]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/journal/journald-server.c
Add SPDX license identifiers to source files under the LGPL
[thirdparty/systemd.git] / src / journal / journald-server.c
CommitLineData
53e1b683 1/* SPDX-License-Identifier: LGPL-2.1+ */
d025f1e4
ZJS
2/***
3 This file is part of systemd.
4
5 Copyright 2011 Lennart Poettering
6
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
11
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
19***/
20
349cc4a5 21#if HAVE_SELINUX
24882e06
LP
22#include <selinux/selinux.h>
23#endif
8580d1f7
LP
24#include <sys/ioctl.h>
25#include <sys/mman.h>
26#include <sys/signalfd.h>
27#include <sys/statvfs.h>
07630cea 28#include <linux/sockios.h>
24882e06 29
b4bbcaa9 30#include "libudev.h"
8580d1f7 31#include "sd-daemon.h"
74df0fca
LP
32#include "sd-journal.h"
33#include "sd-messages.h"
8580d1f7
LP
34
35#include "acl-util.h"
b5efdb8a 36#include "alloc-util.h"
430f0182 37#include "audit-util.h"
d025f1e4 38#include "cgroup-util.h"
d025f1e4 39#include "conf-parser.h"
a0956174 40#include "dirent-util.h"
0dec689b 41#include "extract-word.h"
3ffd4af2 42#include "fd-util.h"
33d52ab9 43#include "fileio.h"
f97b34a6 44#include "format-util.h"
f4f15635 45#include "fs-util.h"
8580d1f7 46#include "hashmap.h"
958b66ea 47#include "hostname-util.h"
4b58153d 48#include "id128-util.h"
afc5dbf3 49#include "io-util.h"
8580d1f7
LP
50#include "journal-authenticate.h"
51#include "journal-file.h"
d025f1e4
ZJS
52#include "journal-internal.h"
53#include "journal-vacuum.h"
8580d1f7 54#include "journald-audit.h"
22e3a02b 55#include "journald-context.h"
d025f1e4 56#include "journald-kmsg.h"
d025f1e4 57#include "journald-native.h"
8580d1f7 58#include "journald-rate-limit.h"
3ffd4af2 59#include "journald-server.h"
8580d1f7
LP
60#include "journald-stream.h"
61#include "journald-syslog.h"
4b58153d 62#include "log.h"
07630cea
LP
63#include "missing.h"
64#include "mkdir.h"
6bedfcbb 65#include "parse-util.h"
4e731273 66#include "proc-cmdline.h"
07630cea
LP
67#include "process-util.h"
68#include "rm-rf.h"
69#include "selinux-util.h"
70#include "signal-util.h"
71#include "socket-util.h"
32917e33 72#include "stdio-util.h"
8b43440b 73#include "string-table.h"
07630cea 74#include "string-util.h"
863a5610 75#include "syslog-util.h"
22e3a02b 76#include "user-util.h"
d025f1e4 77
d025f1e4
ZJS
78#define USER_JOURNALS_MAX 1024
79
26687bf8 80#define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
7f1ad696
LP
81#define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
82#define DEFAULT_RATE_LIMIT_BURST 1000
e150e820 83#define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
d025f1e4 84
8580d1f7 85#define RECHECK_SPACE_USEC (30*USEC_PER_SEC)
d025f1e4 86
e22aa3d3
LP
87#define NOTIFY_SNDBUF_SIZE (8*1024*1024)
88
7a24f3bf
VC
89/* The period to insert between posting changes for coalescing */
90#define POST_CHANGE_TIMER_INTERVAL_USEC (250*USEC_PER_MSEC)
91
ec20fe5f
LP
92/* Pick a good default that is likely to fit into AF_UNIX and AF_INET SOCK_DGRAM datagrams, and even leaves some room
93 * for a bit of additional metadata. */
94#define DEFAULT_LINE_MAX (48*1024)
95
e0ed6db9
FB
96static int determine_path_usage(Server *s, const char *path, uint64_t *ret_used, uint64_t *ret_free) {
97 _cleanup_closedir_ DIR *d = NULL;
98 struct dirent *de;
99 struct statvfs ss;
e0ed6db9
FB
100
101 assert(ret_used);
102 assert(ret_free);
103
266a4700 104 d = opendir(path);
e0ed6db9
FB
105 if (!d)
106 return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR,
266a4700 107 errno, "Failed to open %s: %m", path);
e0ed6db9
FB
108
109 if (fstatvfs(dirfd(d), &ss) < 0)
266a4700 110 return log_error_errno(errno, "Failed to fstatvfs(%s): %m", path);
e0ed6db9
FB
111
112 *ret_free = ss.f_bsize * ss.f_bavail;
113 *ret_used = 0;
114 FOREACH_DIRENT_ALL(de, d, break) {
115 struct stat st;
116
117 if (!endswith(de->d_name, ".journal") &&
118 !endswith(de->d_name, ".journal~"))
119 continue;
120
121 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) {
266a4700 122 log_debug_errno(errno, "Failed to stat %s/%s, ignoring: %m", path, de->d_name);
e0ed6db9
FB
123 continue;
124 }
125
126 if (!S_ISREG(st.st_mode))
127 continue;
128
129 *ret_used += (uint64_t) st.st_blocks * 512UL;
130 }
131
132 return 0;
133}
134
a0edc477
FB
135static void cache_space_invalidate(JournalStorageSpace *space) {
136 memset(space, 0, sizeof(*space));
137}
138
57f443a6 139static int cache_space_refresh(Server *s, JournalStorage *storage) {
23aba343 140 JournalStorageSpace *space;
266a4700 141 JournalMetrics *metrics;
23aba343 142 uint64_t vfs_used, vfs_avail, avail;
d025f1e4 143 usec_t ts;
e0ed6db9 144 int r;
d025f1e4 145
8580d1f7 146 assert(s);
266a4700 147
266a4700 148 metrics = &storage->metrics;
23aba343 149 space = &storage->space;
d025f1e4 150
8580d1f7 151 ts = now(CLOCK_MONOTONIC);
d025f1e4 152
3099caf2 153 if (space->timestamp != 0 && space->timestamp + RECHECK_SPACE_USEC > ts)
d025f1e4
ZJS
154 return 0;
155
23aba343 156 r = determine_path_usage(s, storage->path, &vfs_used, &vfs_avail);
e0ed6db9
FB
157 if (r < 0)
158 return r;
d025f1e4 159
23aba343
FB
160 space->vfs_used = vfs_used;
161 space->vfs_available = vfs_avail;
162
163 avail = LESS_BY(vfs_avail, metrics->keep_free);
164
23aba343
FB
165 space->limit = MIN(MAX(vfs_used + avail, metrics->min_use), metrics->max_use);
166 space->available = LESS_BY(space->limit, vfs_used);
167 space->timestamp = ts;
8580d1f7
LP
168 return 1;
169}
170
3a19f215
FB
171static void patch_min_use(JournalStorage *storage) {
172 assert(storage);
173
174 /* Let's bump the min_use limit to the current usage on disk. We do
175 * this when starting up and first opening the journal files. This way
176 * sudden spikes in disk usage will not cause journald to vacuum files
177 * without bounds. Note that this means that only a restart of journald
178 * will make it reset this value. */
179
180 storage->metrics.min_use = MAX(storage->metrics.min_use, storage->space.vfs_used);
181}
182
183
184static int determine_space(Server *s, uint64_t *available, uint64_t *limit) {
266a4700 185 JournalStorage *js;
57f443a6 186 int r;
8580d1f7
LP
187
188 assert(s);
189
266a4700 190 js = s->system_journal ? &s->system_storage : &s->runtime_storage;
57f443a6
FB
191
192 r = cache_space_refresh(s, js);
193 if (r >= 0) {
194 if (available)
195 *available = js->space.available;
196 if (limit)
197 *limit = js->space.limit;
198 }
199 return r;
d025f1e4
ZJS
200}
201
cba5629e
FB
202void server_space_usage_message(Server *s, JournalStorage *storage) {
203 char fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
204 fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX], fb6[FORMAT_BYTES_MAX];
205 JournalMetrics *metrics;
cba5629e
FB
206
207 assert(s);
208
209 if (!storage)
210 storage = s->system_journal ? &s->system_storage : &s->runtime_storage;
211
57f443a6 212 if (cache_space_refresh(s, storage) < 0)
cba5629e
FB
213 return;
214
215 metrics = &storage->metrics;
23aba343 216 format_bytes(fb1, sizeof(fb1), storage->space.vfs_used);
cba5629e
FB
217 format_bytes(fb2, sizeof(fb2), metrics->max_use);
218 format_bytes(fb3, sizeof(fb3), metrics->keep_free);
23aba343 219 format_bytes(fb4, sizeof(fb4), storage->space.vfs_available);
cba5629e
FB
220 format_bytes(fb5, sizeof(fb5), storage->space.limit);
221 format_bytes(fb6, sizeof(fb6), storage->space.available);
222
13181942
LP
223 server_driver_message(s, 0,
224 "MESSAGE_ID=" SD_MESSAGE_JOURNAL_USAGE_STR,
cba5629e
FB
225 LOG_MESSAGE("%s (%s) is %s, max %s, %s free.",
226 storage->name, storage->path, fb1, fb5, fb6),
227 "JOURNAL_NAME=%s", storage->name,
228 "JOURNAL_PATH=%s", storage->path,
23aba343 229 "CURRENT_USE=%"PRIu64, storage->space.vfs_used,
cba5629e
FB
230 "CURRENT_USE_PRETTY=%s", fb1,
231 "MAX_USE=%"PRIu64, metrics->max_use,
232 "MAX_USE_PRETTY=%s", fb2,
233 "DISK_KEEP_FREE=%"PRIu64, metrics->keep_free,
234 "DISK_KEEP_FREE_PRETTY=%s", fb3,
23aba343 235 "DISK_AVAILABLE=%"PRIu64, storage->space.vfs_available,
cba5629e
FB
236 "DISK_AVAILABLE_PRETTY=%s", fb4,
237 "LIMIT=%"PRIu64, storage->space.limit,
238 "LIMIT_PRETTY=%s", fb5,
239 "AVAILABLE=%"PRIu64, storage->space.available,
240 "AVAILABLE_PRETTY=%s", fb6,
241 NULL);
242}
243
5c3bde3f 244static void server_add_acls(JournalFile *f, uid_t uid) {
349cc4a5 245#if HAVE_ACL
5c3bde3f 246 int r;
d025f1e4 247#endif
d025f1e4
ZJS
248 assert(f);
249
349cc4a5 250#if HAVE_ACL
34c10968 251 if (uid <= SYSTEM_UID_MAX)
d025f1e4
ZJS
252 return;
253
5c3bde3f
ZJS
254 r = add_acls_for_user(f->fd, uid);
255 if (r < 0)
256 log_warning_errno(r, "Failed to set ACL on %s, ignoring: %m", f->path);
d025f1e4
ZJS
257#endif
258}
259
7a24f3bf
VC
260static int open_journal(
261 Server *s,
262 bool reliably,
263 const char *fname,
264 int flags,
265 bool seal,
266 JournalMetrics *metrics,
7a24f3bf
VC
267 JournalFile **ret) {
268 int r;
e167d7fd 269 JournalFile *f;
7a24f3bf
VC
270
271 assert(s);
272 assert(fname);
273 assert(ret);
274
275 if (reliably)
b58c888f 276 r = journal_file_open_reliably(fname, flags, 0640, s->compress, seal, metrics, s->mmap, s->deferred_closes, NULL, &f);
7a24f3bf 277 else
5d1ce257 278 r = journal_file_open(-1, fname, flags, 0640, s->compress, seal, metrics, s->mmap, s->deferred_closes, NULL, &f);
7a24f3bf
VC
279 if (r < 0)
280 return r;
281
e167d7fd 282 r = journal_file_enable_post_change_timer(f, s->event, POST_CHANGE_TIMER_INTERVAL_USEC);
7a24f3bf 283 if (r < 0) {
69a3a6fd 284 (void) journal_file_close(f);
7a24f3bf
VC
285 return r;
286 }
287
e167d7fd 288 *ret = f;
7a24f3bf
VC
289 return r;
290}
291
6431c7e2 292static bool flushed_flag_is_set(void) {
f78273c8 293 return access("/run/systemd/journal/flushed", F_OK) >= 0;
6431c7e2
VC
294}
295
105bdb46
VC
296static int system_journal_open(Server *s, bool flush_requested) {
297 const char *fn;
298 int r = 0;
299
300 if (!s->system_journal &&
f78273c8
LP
301 IN_SET(s->storage, STORAGE_PERSISTENT, STORAGE_AUTO) &&
302 (flush_requested || flushed_flag_is_set())) {
105bdb46
VC
303
304 /* If in auto mode: first try to create the machine
305 * path, but not the prefix.
306 *
307 * If in persistent mode: create /var/log/journal and
308 * the machine path */
309
310 if (s->storage == STORAGE_PERSISTENT)
311 (void) mkdir_p("/var/log/journal/", 0755);
312
266a4700 313 (void) mkdir(s->system_storage.path, 0755);
105bdb46 314
266a4700
FB
315 fn = strjoina(s->system_storage.path, "/system.journal");
316 r = open_journal(s, true, fn, O_RDWR|O_CREAT, s->seal, &s->system_storage.metrics, &s->system_journal);
105bdb46
VC
317 if (r >= 0) {
318 server_add_acls(s->system_journal, 0);
57f443a6 319 (void) cache_space_refresh(s, &s->system_storage);
3a19f215 320 patch_min_use(&s->system_storage);
105bdb46 321 } else if (r < 0) {
4c701096 322 if (!IN_SET(r, -ENOENT, -EROFS))
105bdb46
VC
323 log_warning_errno(r, "Failed to open system journal: %m");
324
325 r = 0;
326 }
929eeb54
VC
327
328 /* If the runtime journal is open, and we're post-flush, we're
329 * recovering from a failed system journal rotate (ENOSPC)
330 * for which the runtime journal was reopened.
331 *
332 * Perform an implicit flush to var, leaving the runtime
333 * journal closed, now that the system journal is back.
334 */
f78273c8
LP
335 if (!flush_requested)
336 (void) server_flush_to_var(s, true);
105bdb46
VC
337 }
338
339 if (!s->runtime_journal &&
340 (s->storage != STORAGE_NONE)) {
341
266a4700 342 fn = strjoina(s->runtime_storage.path, "/system.journal");
105bdb46
VC
343
344 if (s->system_journal) {
345
346 /* Try to open the runtime journal, but only
347 * if it already exists, so that we can flush
348 * it into the system journal */
349
266a4700 350 r = open_journal(s, false, fn, O_RDWR, false, &s->runtime_storage.metrics, &s->runtime_journal);
105bdb46
VC
351 if (r < 0) {
352 if (r != -ENOENT)
353 log_warning_errno(r, "Failed to open runtime journal: %m");
354
355 r = 0;
356 }
357
358 } else {
359
360 /* OK, we really need the runtime journal, so create
361 * it if necessary. */
362
363 (void) mkdir("/run/log", 0755);
364 (void) mkdir("/run/log/journal", 0755);
365 (void) mkdir_parents(fn, 0750);
366
266a4700 367 r = open_journal(s, true, fn, O_RDWR|O_CREAT, false, &s->runtime_storage.metrics, &s->runtime_journal);
105bdb46
VC
368 if (r < 0)
369 return log_error_errno(r, "Failed to open runtime journal: %m");
370 }
371
372 if (s->runtime_journal) {
373 server_add_acls(s->runtime_journal, 0);
57f443a6 374 (void) cache_space_refresh(s, &s->runtime_storage);
3a19f215 375 patch_min_use(&s->runtime_storage);
105bdb46
VC
376 }
377 }
378
379 return r;
380}
381
d025f1e4 382static JournalFile* find_journal(Server *s, uid_t uid) {
ed375beb 383 _cleanup_free_ char *p = NULL;
d025f1e4
ZJS
384 int r;
385 JournalFile *f;
386 sd_id128_t machine;
387
388 assert(s);
389
105bdb46
VC
390 /* A rotate that fails to create the new journal (ENOSPC) leaves the
391 * rotated journal as NULL. Unless we revisit opening, even after
392 * space is made available we'll continue to return NULL indefinitely.
393 *
394 * system_journal_open() is a noop if the journals are already open, so
395 * we can just call it here to recover from failed rotates (or anything
396 * else that's left the journals as NULL).
397 *
398 * Fixes https://github.com/systemd/systemd/issues/3968 */
399 (void) system_journal_open(s, false);
400
d025f1e4
ZJS
401 /* We split up user logs only on /var, not on /run. If the
402 * runtime file is open, we write to it exclusively, in order
403 * to guarantee proper order as soon as we flush /run to
404 * /var and close the runtime file. */
405
406 if (s->runtime_journal)
407 return s->runtime_journal;
408
61755fda 409 if (uid <= SYSTEM_UID_MAX || uid_is_dynamic(uid))
d025f1e4
ZJS
410 return s->system_journal;
411
412 r = sd_id128_get_machine(&machine);
413 if (r < 0)
414 return s->system_journal;
415
4a0b58c4 416 f = ordered_hashmap_get(s->user_journals, UID_TO_PTR(uid));
d025f1e4
ZJS
417 if (f)
418 return f;
419
de0671ee
ZJS
420 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-"UID_FMT".journal",
421 SD_ID128_FORMAT_VAL(machine), uid) < 0)
d025f1e4
ZJS
422 return s->system_journal;
423
43cf8388 424 while (ordered_hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
d025f1e4 425 /* Too many open? Then let's close one */
43cf8388 426 f = ordered_hashmap_steal_first(s->user_journals);
d025f1e4 427 assert(f);
69a3a6fd 428 (void) journal_file_close(f);
d025f1e4
ZJS
429 }
430
266a4700 431 r = open_journal(s, true, p, O_RDWR|O_CREAT, s->seal, &s->system_storage.metrics, &f);
d025f1e4
ZJS
432 if (r < 0)
433 return s->system_journal;
434
5c3bde3f 435 server_add_acls(f, uid);
d025f1e4 436
4a0b58c4 437 r = ordered_hashmap_put(s->user_journals, UID_TO_PTR(uid), f);
d025f1e4 438 if (r < 0) {
69a3a6fd 439 (void) journal_file_close(f);
d025f1e4
ZJS
440 return s->system_journal;
441 }
442
443 return f;
444}
445
ea69bd41
LP
446static int do_rotate(
447 Server *s,
448 JournalFile **f,
449 const char* name,
450 bool seal,
451 uint32_t uid) {
452
fc55baee
ZJS
453 int r;
454 assert(s);
455
456 if (!*f)
457 return -EINVAL;
458
b58c888f 459 r = journal_file_rotate(f, s->compress, seal, s->deferred_closes);
fc55baee
ZJS
460 if (r < 0)
461 if (*f)
ea69bd41 462 log_error_errno(r, "Failed to rotate %s: %m", (*f)->path);
fc55baee 463 else
ea69bd41 464 log_error_errno(r, "Failed to create new %s journal: %m", name);
fc55baee 465 else
5c3bde3f 466 server_add_acls(*f, uid);
2678031a 467
fc55baee
ZJS
468 return r;
469}
470
d025f1e4
ZJS
471void server_rotate(Server *s) {
472 JournalFile *f;
473 void *k;
474 Iterator i;
475 int r;
476
477 log_debug("Rotating...");
478
8580d1f7
LP
479 (void) do_rotate(s, &s->runtime_journal, "runtime", false, 0);
480 (void) do_rotate(s, &s->system_journal, "system", s->seal, 0);
d025f1e4 481
43cf8388 482 ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
4a0b58c4 483 r = do_rotate(s, &f, "user", s->seal, PTR_TO_UID(k));
fc55baee 484 if (r >= 0)
43cf8388 485 ordered_hashmap_replace(s->user_journals, k, f);
fc55baee
ZJS
486 else if (!f)
487 /* Old file has been closed and deallocated */
43cf8388 488 ordered_hashmap_remove(s->user_journals, k);
d025f1e4 489 }
b58c888f
VC
490
491 /* Perform any deferred closes which aren't still offlining. */
492 SET_FOREACH(f, s->deferred_closes, i)
493 if (!journal_file_is_offlining(f)) {
494 (void) set_remove(s->deferred_closes, f);
495 (void) journal_file_close(f);
496 }
d025f1e4
ZJS
497}
498
26687bf8
OS
499void server_sync(Server *s) {
500 JournalFile *f;
26687bf8
OS
501 Iterator i;
502 int r;
503
26687bf8 504 if (s->system_journal) {
ac2e41f5 505 r = journal_file_set_offline(s->system_journal, false);
26687bf8 506 if (r < 0)
65089b82 507 log_warning_errno(r, "Failed to sync system journal, ignoring: %m");
26687bf8
OS
508 }
509
65c1d46b 510 ORDERED_HASHMAP_FOREACH(f, s->user_journals, i) {
ac2e41f5 511 r = journal_file_set_offline(f, false);
26687bf8 512 if (r < 0)
65089b82 513 log_warning_errno(r, "Failed to sync user journal, ignoring: %m");
26687bf8
OS
514 }
515
f9a810be
LP
516 if (s->sync_event_source) {
517 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
518 if (r < 0)
da927ba9 519 log_error_errno(r, "Failed to disable sync timer source: %m");
f9a810be 520 }
26687bf8
OS
521
522 s->sync_scheduled = false;
523}
524
3a19f215 525static void do_vacuum(Server *s, JournalStorage *storage, bool verbose) {
ea69bd41 526
63c8666b
ZJS
527 int r;
528
8580d1f7 529 assert(s);
266a4700 530 assert(storage);
8580d1f7 531
57f443a6 532 (void) cache_space_refresh(s, storage);
18e758bf
FB
533
534 if (verbose)
535 server_space_usage_message(s, storage);
8580d1f7 536
57f443a6
FB
537 r = journal_directory_vacuum(storage->path, storage->space.limit,
538 storage->metrics.n_max_files, s->max_retention_usec,
539 &s->oldest_file_usec, verbose);
63c8666b 540 if (r < 0 && r != -ENOENT)
266a4700
FB
541 log_warning_errno(r, "Failed to vacuum %s, ignoring: %m", storage->path);
542
a0edc477 543 cache_space_invalidate(&storage->space);
63c8666b
ZJS
544}
545
3a19f215 546int server_vacuum(Server *s, bool verbose) {
8580d1f7 547 assert(s);
d025f1e4
ZJS
548
549 log_debug("Vacuuming...");
550
551 s->oldest_file_usec = 0;
552
266a4700 553 if (s->system_journal)
3a19f215 554 do_vacuum(s, &s->system_storage, verbose);
266a4700 555 if (s->runtime_journal)
3a19f215 556 do_vacuum(s, &s->runtime_storage, verbose);
d025f1e4 557
8580d1f7 558 return 0;
d025f1e4
ZJS
559}
560
0c24bb23
LP
561static void server_cache_machine_id(Server *s) {
562 sd_id128_t id;
563 int r;
564
565 assert(s);
566
567 r = sd_id128_get_machine(&id);
568 if (r < 0)
569 return;
570
571 sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
572}
573
574static void server_cache_boot_id(Server *s) {
575 sd_id128_t id;
576 int r;
577
578 assert(s);
579
580 r = sd_id128_get_boot(&id);
581 if (r < 0)
582 return;
583
584 sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
585}
586
587static void server_cache_hostname(Server *s) {
588 _cleanup_free_ char *t = NULL;
589 char *x;
590
591 assert(s);
592
593 t = gethostname_malloc();
594 if (!t)
595 return;
596
597 x = strappend("_HOSTNAME=", t);
598 if (!x)
599 return;
600
601 free(s->hostname_field);
602 s->hostname_field = x;
603}
604
8531ae70 605static bool shall_try_append_again(JournalFile *f, int r) {
6e1045e5 606 switch(r) {
ae739cc1 607
6e1045e5
ZJS
608 case -E2BIG: /* Hit configured limit */
609 case -EFBIG: /* Hit fs limit */
610 case -EDQUOT: /* Quota limit hit */
611 case -ENOSPC: /* Disk full */
d025f1e4 612 log_debug("%s: Allocation limit reached, rotating.", f->path);
6e1045e5 613 return true;
ae739cc1 614
6e1045e5
ZJS
615 case -EIO: /* I/O error of some kind (mmap) */
616 log_warning("%s: IO error, rotating.", f->path);
617 return true;
ae739cc1 618
6e1045e5 619 case -EHOSTDOWN: /* Other machine */
d025f1e4 620 log_info("%s: Journal file from other machine, rotating.", f->path);
6e1045e5 621 return true;
ae739cc1 622
6e1045e5 623 case -EBUSY: /* Unclean shutdown */
d025f1e4 624 log_info("%s: Unclean shutdown, rotating.", f->path);
6e1045e5 625 return true;
ae739cc1 626
6e1045e5 627 case -EPROTONOSUPPORT: /* Unsupported feature */
d025f1e4 628 log_info("%s: Unsupported feature, rotating.", f->path);
6e1045e5 629 return true;
ae739cc1 630
6e1045e5
ZJS
631 case -EBADMSG: /* Corrupted */
632 case -ENODATA: /* Truncated */
633 case -ESHUTDOWN: /* Already archived */
d025f1e4 634 log_warning("%s: Journal file corrupted, rotating.", f->path);
6e1045e5 635 return true;
ae739cc1 636
6e1045e5 637 case -EIDRM: /* Journal file has been deleted */
2678031a 638 log_warning("%s: Journal file has been deleted, rotating.", f->path);
6e1045e5 639 return true;
ae739cc1
LP
640
641 case -ETXTBSY: /* Journal file is from the future */
c1a9199e 642 log_warning("%s: Journal file is from the future, rotating.", f->path);
ae739cc1
LP
643 return true;
644
6e1045e5 645 default:
d025f1e4 646 return false;
6e1045e5 647 }
d025f1e4
ZJS
648}
649
d07f7b9e 650static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
7c070017 651 bool vacuumed = false, rotate = false;
0f972d66 652 struct dual_timestamp ts;
d025f1e4 653 JournalFile *f;
d025f1e4
ZJS
654 int r;
655
656 assert(s);
657 assert(iovec);
658 assert(n > 0);
659
0f972d66
LP
660 /* Get the closest, linearized time we have for this log event from the event loop. (Note that we do not use
661 * the source time, and not even the time the event was originally seen, but instead simply the time we started
662 * processing it, as we want strictly linear ordering in what we write out.) */
663 assert_se(sd_event_now(s->event, CLOCK_REALTIME, &ts.realtime) >= 0);
664 assert_se(sd_event_now(s->event, CLOCK_MONOTONIC, &ts.monotonic) >= 0);
665
7c070017
LP
666 if (ts.realtime < s->last_realtime_clock) {
667 /* When the time jumps backwards, let's immediately rotate. Of course, this should not happen during
668 * regular operation. However, when it does happen, then we should make sure that we start fresh files
669 * to ensure that the entries in the journal files are strictly ordered by time, in order to ensure
670 * bisection works correctly. */
d025f1e4 671
7c070017
LP
672 log_debug("Time jumped backwards, rotating.");
673 rotate = true;
674 } else {
675
676 f = find_journal(s, uid);
677 if (!f)
678 return;
679
680 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
681 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
682 rotate = true;
683 }
684 }
d025f1e4 685
7c070017 686 if (rotate) {
d025f1e4 687 server_rotate(s);
3a19f215 688 server_vacuum(s, false);
d025f1e4
ZJS
689 vacuumed = true;
690
691 f = find_journal(s, uid);
692 if (!f)
693 return;
694 }
695
7c070017
LP
696 s->last_realtime_clock = ts.realtime;
697
0f972d66 698 r = journal_file_append_entry(f, &ts, iovec, n, &s->seqnum, NULL, NULL);
26687bf8 699 if (r >= 0) {
d07f7b9e 700 server_schedule_sync(s, priority);
d025f1e4 701 return;
26687bf8 702 }
d025f1e4
ZJS
703
704 if (vacuumed || !shall_try_append_again(f, r)) {
8266e1c0 705 log_error_errno(r, "Failed to write entry (%d items, %zu bytes), ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
d025f1e4
ZJS
706 return;
707 }
708
709 server_rotate(s);
3a19f215 710 server_vacuum(s, false);
d025f1e4
ZJS
711
712 f = find_journal(s, uid);
713 if (!f)
714 return;
715
716 log_debug("Retrying write.");
0f972d66 717 r = journal_file_append_entry(f, &ts, iovec, n, &s->seqnum, NULL, NULL);
8266e1c0
LP
718 if (r < 0)
719 log_error_errno(r, "Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
720 else
d07f7b9e 721 server_schedule_sync(s, priority);
d025f1e4
ZJS
722}
723
22e3a02b
LP
724#define IOVEC_ADD_NUMERIC_FIELD(iovec, n, value, type, isset, format, field) \
725 if (isset(value)) { \
726 char *k; \
727 k = newa(char, strlen(field "=") + DECIMAL_STR_MAX(type) + 1); \
728 sprintf(k, field "=" format, value); \
e6a7ec4b 729 iovec[n++] = IOVEC_MAKE_STRING(k); \
22e3a02b 730 }
4b58153d 731
22e3a02b
LP
732#define IOVEC_ADD_STRING_FIELD(iovec, n, value, field) \
733 if (!isempty(value)) { \
734 char *k; \
735 k = strjoina(field "=", value); \
e6a7ec4b 736 iovec[n++] = IOVEC_MAKE_STRING(k); \
22e3a02b 737 }
4b58153d 738
22e3a02b
LP
739#define IOVEC_ADD_ID128_FIELD(iovec, n, value, field) \
740 if (!sd_id128_is_null(value)) { \
741 char *k; \
742 k = newa(char, strlen(field "=") + SD_ID128_STRING_MAX); \
743 sd_id128_to_string(value, stpcpy(k, field "=")); \
e6a7ec4b 744 iovec[n++] = IOVEC_MAKE_STRING(k); \
22e3a02b 745 }
4b58153d 746
22e3a02b
LP
747#define IOVEC_ADD_SIZED_FIELD(iovec, n, value, value_size, field) \
748 if (value_size > 0) { \
749 char *k; \
750 k = newa(char, strlen(field "=") + value_size + 1); \
751 *((char*) mempcpy(stpcpy(k, field "="), value, value_size)) = 0; \
e6a7ec4b 752 iovec[n++] = IOVEC_MAKE_STRING(k); \
22e3a02b 753 } \
4b58153d 754
d025f1e4
ZJS
755static void dispatch_message_real(
756 Server *s,
d3070fbd 757 struct iovec *iovec, size_t n, size_t m,
22e3a02b 758 const ClientContext *c,
3b3154df 759 const struct timeval *tv,
d07f7b9e 760 int priority,
22e3a02b
LP
761 pid_t object_pid) {
762
763 char source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)];
764 uid_t journal_uid;
765 ClientContext *o;
d025f1e4
ZJS
766
767 assert(s);
768 assert(iovec);
769 assert(n > 0);
d3070fbd
LP
770 assert(n +
771 N_IOVEC_META_FIELDS +
772 (pid_is_valid(object_pid) ? N_IOVEC_OBJECT_FIELDS : 0) +
773 client_context_extra_fields_n_iovec(c) <= m);
19cace37 774
22e3a02b
LP
775 if (c) {
776 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->pid, pid_t, pid_is_valid, PID_FMT, "_PID");
777 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->uid, uid_t, uid_is_valid, UID_FMT, "_UID");
778 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->gid, gid_t, gid_is_valid, GID_FMT, "_GID");
4b58153d 779
22e3a02b
LP
780 IOVEC_ADD_STRING_FIELD(iovec, n, c->comm, "_COMM");
781 IOVEC_ADD_STRING_FIELD(iovec, n, c->exe, "_EXE");
782 IOVEC_ADD_STRING_FIELD(iovec, n, c->cmdline, "_CMDLINE");
783 IOVEC_ADD_STRING_FIELD(iovec, n, c->capeff, "_CAP_EFFECTIVE");
d025f1e4 784
22e3a02b 785 IOVEC_ADD_SIZED_FIELD(iovec, n, c->label, c->label_size, "_SELINUX_CONTEXT");
ae018d9b 786
22e3a02b
LP
787 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->auditid, uint32_t, audit_session_is_valid, "%" PRIu32, "_AUDIT_SESSION");
788 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->loginuid, uid_t, uid_is_valid, UID_FMT, "_AUDIT_LOGINUID");
d025f1e4 789
22e3a02b
LP
790 IOVEC_ADD_STRING_FIELD(iovec, n, c->cgroup, "_SYSTEMD_CGROUP");
791 IOVEC_ADD_STRING_FIELD(iovec, n, c->session, "_SYSTEMD_SESSION");
792 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->owner_uid, uid_t, uid_is_valid, UID_FMT, "_SYSTEMD_OWNER_UID");
793 IOVEC_ADD_STRING_FIELD(iovec, n, c->unit, "_SYSTEMD_UNIT");
794 IOVEC_ADD_STRING_FIELD(iovec, n, c->user_unit, "_SYSTEMD_USER_UNIT");
795 IOVEC_ADD_STRING_FIELD(iovec, n, c->slice, "_SYSTEMD_SLICE");
796 IOVEC_ADD_STRING_FIELD(iovec, n, c->user_slice, "_SYSTEMD_USER_SLICE");
e7ff4e7f 797
22e3a02b 798 IOVEC_ADD_ID128_FIELD(iovec, n, c->invocation_id, "_SYSTEMD_INVOCATION_ID");
d3070fbd
LP
799
800 if (c->extra_fields_n_iovec > 0) {
801 memcpy(iovec + n, c->extra_fields_iovec, c->extra_fields_n_iovec * sizeof(struct iovec));
802 n += c->extra_fields_n_iovec;
803 }
d025f1e4 804 }
968f3196 805
22e3a02b 806 assert(n <= m);
968f3196 807
22e3a02b 808 if (pid_is_valid(object_pid) && client_context_get(s, object_pid, NULL, NULL, 0, NULL, &o) >= 0) {
968f3196 809
22e3a02b
LP
810 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->pid, pid_t, pid_is_valid, PID_FMT, "OBJECT_PID");
811 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->uid, uid_t, uid_is_valid, UID_FMT, "OBJECT_UID");
812 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->gid, gid_t, gid_is_valid, GID_FMT, "OBJECT_GID");
968f3196 813
22e3a02b
LP
814 IOVEC_ADD_STRING_FIELD(iovec, n, o->comm, "OBJECT_COMM");
815 IOVEC_ADD_STRING_FIELD(iovec, n, o->exe, "OBJECT_EXE");
816 IOVEC_ADD_STRING_FIELD(iovec, n, o->cmdline, "OBJECT_CMDLINE");
817 IOVEC_ADD_STRING_FIELD(iovec, n, o->capeff, "OBJECT_CAP_EFFECTIVE");
968f3196 818
22e3a02b 819 IOVEC_ADD_SIZED_FIELD(iovec, n, o->label, o->label_size, "OBJECT_SELINUX_CONTEXT");
19cace37 820
22e3a02b
LP
821 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->auditid, uint32_t, audit_session_is_valid, "%" PRIu32, "OBJECT_AUDIT_SESSION");
822 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->loginuid, uid_t, uid_is_valid, UID_FMT, "OBJECT_AUDIT_LOGINUID");
d473176a 823
22e3a02b
LP
824 IOVEC_ADD_STRING_FIELD(iovec, n, o->cgroup, "OBJECT_SYSTEMD_CGROUP");
825 IOVEC_ADD_STRING_FIELD(iovec, n, o->session, "OBJECT_SYSTEMD_SESSION");
826 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->owner_uid, uid_t, uid_is_valid, UID_FMT, "OBJECT_SYSTEMD_OWNER_UID");
827 IOVEC_ADD_STRING_FIELD(iovec, n, o->unit, "OBJECT_SYSTEMD_UNIT");
828 IOVEC_ADD_STRING_FIELD(iovec, n, o->user_unit, "OBJECT_SYSTEMD_USER_UNIT");
829 IOVEC_ADD_STRING_FIELD(iovec, n, o->slice, "OBJECT_SYSTEMD_SLICE");
830 IOVEC_ADD_STRING_FIELD(iovec, n, o->user_slice, "OBJECT_SYSTEMD_USER_SLICE");
d473176a 831
22e3a02b 832 IOVEC_ADD_ID128_FIELD(iovec, n, o->invocation_id, "OBJECT_SYSTEMD_INVOCATION_ID=");
968f3196 833 }
22e3a02b 834
968f3196 835 assert(n <= m);
d025f1e4
ZJS
836
837 if (tv) {
398a50cd 838 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=" USEC_FMT, timeval_load(tv));
e6a7ec4b 839 iovec[n++] = IOVEC_MAKE_STRING(source_time);
d025f1e4
ZJS
840 }
841
842 /* Note that strictly speaking storing the boot id here is
843 * redundant since the entry includes this in-line
844 * anyway. However, we need this indexed, too. */
0c24bb23 845 if (!isempty(s->boot_id_field))
e6a7ec4b 846 iovec[n++] = IOVEC_MAKE_STRING(s->boot_id_field);
d025f1e4 847
0c24bb23 848 if (!isempty(s->machine_id_field))
e6a7ec4b 849 iovec[n++] = IOVEC_MAKE_STRING(s->machine_id_field);
d025f1e4 850
0c24bb23 851 if (!isempty(s->hostname_field))
e6a7ec4b 852 iovec[n++] = IOVEC_MAKE_STRING(s->hostname_field);
d025f1e4
ZJS
853
854 assert(n <= m);
855
22e3a02b
LP
856 if (s->split_mode == SPLIT_UID && c && uid_is_valid(c->uid))
857 /* Split up strictly by (non-root) UID */
858 journal_uid = c->uid;
859 else if (s->split_mode == SPLIT_LOGIN && c && c->uid > 0 && uid_is_valid(c->owner_uid))
edc3797f
LP
860 /* Split up by login UIDs. We do this only if the
861 * realuid is not root, in order not to accidentally
862 * leak privileged information to the user that is
863 * logged by a privileged process that is part of an
7517e174 864 * unprivileged session. */
22e3a02b 865 journal_uid = c->owner_uid;
da499392
KS
866 else
867 journal_uid = 0;
759c945a 868
d07f7b9e 869 write_to_journal(s, journal_uid, iovec, n, priority);
d025f1e4
ZJS
870}
871
13181942 872void server_driver_message(Server *s, pid_t object_pid, const char *message_id, const char *format, ...) {
22e3a02b 873
d3070fbd
LP
874 struct iovec *iovec;
875 size_t n = 0, k, m;
d025f1e4 876 va_list ap;
22e3a02b 877 int r;
d025f1e4
ZJS
878
879 assert(s);
880 assert(format);
881
d3070fbd
LP
882 m = N_IOVEC_META_FIELDS + 5 + N_IOVEC_PAYLOAD_FIELDS + client_context_extra_fields_n_iovec(s->my_context);
883 iovec = newa(struct iovec, m);
884
4850d39a 885 assert_cc(3 == LOG_FAC(LOG_DAEMON));
e6a7ec4b
LP
886 iovec[n++] = IOVEC_MAKE_STRING("SYSLOG_FACILITY=3");
887 iovec[n++] = IOVEC_MAKE_STRING("SYSLOG_IDENTIFIER=systemd-journald");
b6fa2555 888
e6a7ec4b 889 iovec[n++] = IOVEC_MAKE_STRING("_TRANSPORT=driver");
4850d39a 890 assert_cc(6 == LOG_INFO);
e6a7ec4b 891 iovec[n++] = IOVEC_MAKE_STRING("PRIORITY=6");
d025f1e4 892
2b044526 893 if (message_id)
e6a7ec4b 894 iovec[n++] = IOVEC_MAKE_STRING(message_id);
d3070fbd 895 k = n;
8a03c9ef
ZJS
896
897 va_start(ap, format);
d3070fbd 898 r = log_format_iovec(iovec, m, &n, false, 0, format, ap);
32917e33 899 /* Error handling below */
8a03c9ef
ZJS
900 va_end(ap);
901
32917e33 902 if (r >= 0)
d3070fbd 903 dispatch_message_real(s, iovec, n, m, s->my_context, NULL, LOG_INFO, object_pid);
8a03c9ef 904
d3070fbd
LP
905 while (k < n)
906 free(iovec[k++].iov_base);
32917e33
ZJS
907
908 if (r < 0) {
909 /* We failed to format the message. Emit a warning instead. */
910 char buf[LINE_MAX];
911
912 xsprintf(buf, "MESSAGE=Entry printing failed: %s", strerror(-r));
913
914 n = 3;
e6a7ec4b
LP
915 iovec[n++] = IOVEC_MAKE_STRING("PRIORITY=4");
916 iovec[n++] = IOVEC_MAKE_STRING(buf);
d3070fbd 917 dispatch_message_real(s, iovec, n, m, s->my_context, NULL, LOG_INFO, object_pid);
32917e33 918 }
d025f1e4
ZJS
919}
920
921void server_dispatch_message(
922 Server *s,
d3070fbd 923 struct iovec *iovec, size_t n, size_t m,
22e3a02b 924 ClientContext *c,
3b3154df 925 const struct timeval *tv,
968f3196
ZJS
926 int priority,
927 pid_t object_pid) {
d025f1e4 928
8580d1f7 929 uint64_t available = 0;
22e3a02b 930 int rl;
d025f1e4
ZJS
931
932 assert(s);
933 assert(iovec || n == 0);
934
935 if (n == 0)
936 return;
937
938 if (LOG_PRI(priority) > s->max_level_store)
939 return;
940
2f5df74a
HHPF
941 /* Stop early in case the information will not be stored
942 * in a journal. */
943 if (s->storage == STORAGE_NONE)
944 return;
945
22e3a02b
LP
946 if (c && c->unit) {
947 (void) determine_space(s, &available, NULL);
d025f1e4 948
22e3a02b
LP
949 rl = journal_rate_limit_test(s->rate_limit, c->unit, priority & LOG_PRIMASK, available);
950 if (rl == 0)
951 return;
d025f1e4 952
22e3a02b
LP
953 /* Write a suppression message if we suppressed something */
954 if (rl > 1)
13181942
LP
955 server_driver_message(s, c->pid,
956 "MESSAGE_ID=" SD_MESSAGE_JOURNAL_DROPPED_STR,
957 LOG_MESSAGE("Suppressed %i messages from %s", rl - 1, c->unit),
958 LOG_MESSAGE("N_DROPPED=%i", rl - 1),
22e3a02b 959 NULL);
d025f1e4
ZJS
960 }
961
22e3a02b 962 dispatch_message_real(s, iovec, n, m, c, tv, priority, object_pid);
d025f1e4
ZJS
963}
964
f78273c8 965int server_flush_to_var(Server *s, bool require_flag_file) {
d025f1e4
ZJS
966 sd_id128_t machine;
967 sd_journal *j = NULL;
fbb63411
LP
968 char ts[FORMAT_TIMESPAN_MAX];
969 usec_t start;
970 unsigned n = 0;
971 int r;
d025f1e4
ZJS
972
973 assert(s);
974
f78273c8 975 if (!IN_SET(s->storage, STORAGE_AUTO, STORAGE_PERSISTENT))
d025f1e4
ZJS
976 return 0;
977
978 if (!s->runtime_journal)
979 return 0;
980
f78273c8
LP
981 if (require_flag_file && !flushed_flag_is_set())
982 return 0;
983
8580d1f7 984 (void) system_journal_open(s, true);
d025f1e4
ZJS
985
986 if (!s->system_journal)
987 return 0;
988
989 log_debug("Flushing to /var...");
990
fbb63411
LP
991 start = now(CLOCK_MONOTONIC);
992
d025f1e4 993 r = sd_id128_get_machine(&machine);
00a16861 994 if (r < 0)
d025f1e4 995 return r;
d025f1e4
ZJS
996
997 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
23bbb0de
MS
998 if (r < 0)
999 return log_error_errno(r, "Failed to read runtime journal: %m");
d025f1e4 1000
93b73b06
LP
1001 sd_journal_set_data_threshold(j, 0);
1002
d025f1e4
ZJS
1003 SD_JOURNAL_FOREACH(j) {
1004 Object *o = NULL;
1005 JournalFile *f;
1006
1007 f = j->current_file;
1008 assert(f && f->current_offset > 0);
1009
fbb63411
LP
1010 n++;
1011
d025f1e4
ZJS
1012 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1013 if (r < 0) {
da927ba9 1014 log_error_errno(r, "Can't read entry: %m");
d025f1e4
ZJS
1015 goto finish;
1016 }
1017
1018 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1019 if (r >= 0)
1020 continue;
1021
1022 if (!shall_try_append_again(s->system_journal, r)) {
da927ba9 1023 log_error_errno(r, "Can't write entry: %m");
d025f1e4
ZJS
1024 goto finish;
1025 }
1026
1027 server_rotate(s);
3a19f215 1028 server_vacuum(s, false);
d025f1e4 1029
253f59df
LP
1030 if (!s->system_journal) {
1031 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1032 r = -EIO;
1033 goto finish;
1034 }
1035
d025f1e4
ZJS
1036 log_debug("Retrying write.");
1037 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1038 if (r < 0) {
da927ba9 1039 log_error_errno(r, "Can't write entry: %m");
d025f1e4
ZJS
1040 goto finish;
1041 }
1042 }
1043
804ae586
LP
1044 r = 0;
1045
d025f1e4
ZJS
1046finish:
1047 journal_file_post_change(s->system_journal);
1048
804ae586 1049 s->runtime_journal = journal_file_close(s->runtime_journal);
d025f1e4
ZJS
1050
1051 if (r >= 0)
c6878637 1052 (void) rm_rf("/run/log/journal", REMOVE_ROOT);
d025f1e4 1053
763c7aa2 1054 sd_journal_close(j);
d025f1e4 1055
13181942 1056 server_driver_message(s, 0, NULL,
8a03c9ef
ZJS
1057 LOG_MESSAGE("Time spent on flushing to /var is %s for %u entries.",
1058 format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0),
1059 n),
1060 NULL);
fbb63411 1061
d025f1e4
ZJS
1062 return r;
1063}
1064
8531ae70 1065int server_process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
f9a810be 1066 Server *s = userdata;
a315ac4e
LP
1067 struct ucred *ucred = NULL;
1068 struct timeval *tv = NULL;
1069 struct cmsghdr *cmsg;
1070 char *label = NULL;
1071 size_t label_len = 0, m;
1072 struct iovec iovec;
1073 ssize_t n;
1074 int *fds = NULL, v = 0;
1075 unsigned n_fds = 0;
1076
1077 union {
1078 struct cmsghdr cmsghdr;
1079
1080 /* We use NAME_MAX space for the SELinux label
1081 * here. The kernel currently enforces no
1082 * limit, but according to suggestions from
1083 * the SELinux people this will change and it
1084 * will probably be identical to NAME_MAX. For
1085 * now we use that, but this should be updated
1086 * one day when the final limit is known. */
1087 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1088 CMSG_SPACE(sizeof(struct timeval)) +
1089 CMSG_SPACE(sizeof(int)) + /* fd */
1090 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1091 } control = {};
1092
1093 union sockaddr_union sa = {};
1094
1095 struct msghdr msghdr = {
1096 .msg_iov = &iovec,
1097 .msg_iovlen = 1,
1098 .msg_control = &control,
1099 .msg_controllen = sizeof(control),
1100 .msg_name = &sa,
1101 .msg_namelen = sizeof(sa),
1102 };
f9a810be 1103
d025f1e4 1104 assert(s);
875c2e22 1105 assert(fd == s->native_fd || fd == s->syslog_fd || fd == s->audit_fd);
f9a810be
LP
1106
1107 if (revents != EPOLLIN) {
1108 log_error("Got invalid event from epoll for datagram fd: %"PRIx32, revents);
1109 return -EIO;
1110 }
1111
22e3a02b
LP
1112 /* Try to get the right size, if we can. (Not all sockets support SIOCINQ, hence we just try, but don't rely on
1113 * it.) */
a315ac4e 1114 (void) ioctl(fd, SIOCINQ, &v);
d025f1e4 1115
a315ac4e
LP
1116 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1117 m = PAGE_ALIGN(MAX3((size_t) v + 1,
1118 (size_t) LINE_MAX,
1119 ALIGN(sizeof(struct nlmsghdr)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH)) + 1);
d025f1e4 1120
a315ac4e
LP
1121 if (!GREEDY_REALLOC(s->buffer, s->buffer_size, m))
1122 return log_oom();
875c2e22 1123
a315ac4e
LP
1124 iovec.iov_base = s->buffer;
1125 iovec.iov_len = s->buffer_size - 1; /* Leave room for trailing NUL we add later */
d025f1e4 1126
a315ac4e
LP
1127 n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1128 if (n < 0) {
3742095b 1129 if (IN_SET(errno, EINTR, EAGAIN))
a315ac4e 1130 return 0;
875c2e22 1131
a315ac4e
LP
1132 return log_error_errno(errno, "recvmsg() failed: %m");
1133 }
875c2e22 1134
a315ac4e
LP
1135 CMSG_FOREACH(cmsg, &msghdr) {
1136
1137 if (cmsg->cmsg_level == SOL_SOCKET &&
1138 cmsg->cmsg_type == SCM_CREDENTIALS &&
1139 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1140 ucred = (struct ucred*) CMSG_DATA(cmsg);
1141 else if (cmsg->cmsg_level == SOL_SOCKET &&
1142 cmsg->cmsg_type == SCM_SECURITY) {
1143 label = (char*) CMSG_DATA(cmsg);
1144 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1145 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1146 cmsg->cmsg_type == SO_TIMESTAMP &&
1147 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1148 tv = (struct timeval*) CMSG_DATA(cmsg);
1149 else if (cmsg->cmsg_level == SOL_SOCKET &&
1150 cmsg->cmsg_type == SCM_RIGHTS) {
1151 fds = (int*) CMSG_DATA(cmsg);
1152 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
d025f1e4 1153 }
a315ac4e 1154 }
d025f1e4 1155
a315ac4e
LP
1156 /* And a trailing NUL, just in case */
1157 s->buffer[n] = 0;
1158
1159 if (fd == s->syslog_fd) {
1160 if (n > 0 && n_fds == 0)
1161 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1162 else if (n_fds > 0)
1163 log_warning("Got file descriptors via syslog socket. Ignoring.");
1164
1165 } else if (fd == s->native_fd) {
1166 if (n > 0 && n_fds == 0)
1167 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1168 else if (n == 0 && n_fds == 1)
1169 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1170 else if (n_fds > 0)
1171 log_warning("Got too many file descriptors via native socket. Ignoring.");
1172
1173 } else {
1174 assert(fd == s->audit_fd);
1175
1176 if (n > 0 && n_fds == 0)
1177 server_process_audit_message(s, s->buffer, n, ucred, &sa, msghdr.msg_namelen);
1178 else if (n_fds > 0)
1179 log_warning("Got file descriptors via audit socket. Ignoring.");
f9a810be 1180 }
a315ac4e
LP
1181
1182 close_many(fds, n_fds);
1183 return 0;
f9a810be 1184}
d025f1e4 1185
f9a810be
LP
1186static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1187 Server *s = userdata;
33d52ab9 1188 int r;
d025f1e4 1189
f9a810be 1190 assert(s);
d025f1e4 1191
94b65516 1192 log_info("Received request to flush runtime journal from PID " PID_FMT, si->ssi_pid);
d025f1e4 1193
f78273c8 1194 (void) server_flush_to_var(s, false);
f9a810be 1195 server_sync(s);
3a19f215 1196 server_vacuum(s, false);
d025f1e4 1197
33d52ab9
LP
1198 r = touch("/run/systemd/journal/flushed");
1199 if (r < 0)
1200 log_warning_errno(r, "Failed to touch /run/systemd/journal/flushed, ignoring: %m");
74055aa7 1201
18e758bf 1202 server_space_usage_message(s, NULL);
f9a810be
LP
1203 return 0;
1204}
d025f1e4 1205
f9a810be
LP
1206static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1207 Server *s = userdata;
33d52ab9 1208 int r;
d025f1e4 1209
f9a810be 1210 assert(s);
d025f1e4 1211
94b65516 1212 log_info("Received request to rotate journal from PID " PID_FMT, si->ssi_pid);
f9a810be 1213 server_rotate(s);
3a19f215
FB
1214 server_vacuum(s, true);
1215
1216 if (s->system_journal)
1217 patch_min_use(&s->system_storage);
1218 if (s->runtime_journal)
1219 patch_min_use(&s->runtime_storage);
d025f1e4 1220
dbd6e31c 1221 /* Let clients know when the most recent rotation happened. */
33d52ab9
LP
1222 r = write_timestamp_file_atomic("/run/systemd/journal/rotated", now(CLOCK_MONOTONIC));
1223 if (r < 0)
1224 log_warning_errno(r, "Failed to write /run/systemd/journal/rotated, ignoring: %m");
dbd6e31c 1225
f9a810be
LP
1226 return 0;
1227}
d025f1e4 1228
f9a810be
LP
1229static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1230 Server *s = userdata;
d025f1e4 1231
f9a810be 1232 assert(s);
d025f1e4 1233
4daf54a8 1234 log_received_signal(LOG_INFO, si);
d025f1e4 1235
6203e07a 1236 sd_event_exit(s->event, 0);
d025f1e4
ZJS
1237 return 0;
1238}
1239
94b65516
LP
1240static int dispatch_sigrtmin1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1241 Server *s = userdata;
33d52ab9 1242 int r;
94b65516
LP
1243
1244 assert(s);
1245
1246 log_debug("Received request to sync from PID " PID_FMT, si->ssi_pid);
1247
1248 server_sync(s);
1249
1250 /* Let clients know when the most recent sync happened. */
33d52ab9
LP
1251 r = write_timestamp_file_atomic("/run/systemd/journal/synced", now(CLOCK_MONOTONIC));
1252 if (r < 0)
1253 log_warning_errno(r, "Failed to write /run/systemd/journal/synced, ignoring: %m");
94b65516
LP
1254
1255 return 0;
1256}
1257
f9a810be 1258static int setup_signals(Server *s) {
f9a810be 1259 int r;
d025f1e4
ZJS
1260
1261 assert(s);
1262
9bab3b65 1263 assert_se(sigprocmask_many(SIG_SETMASK, NULL, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, SIGRTMIN+1, -1) >= 0);
d025f1e4 1264
151b9b96 1265 r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
f9a810be
LP
1266 if (r < 0)
1267 return r;
1268
151b9b96 1269 r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s);
f9a810be
LP
1270 if (r < 0)
1271 return r;
d025f1e4 1272
151b9b96 1273 r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s);
f9a810be
LP
1274 if (r < 0)
1275 return r;
d025f1e4 1276
b374689c
LP
1277 /* Let's process SIGTERM late, so that we flush all queued
1278 * messages to disk before we exit */
1279 r = sd_event_source_set_priority(s->sigterm_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1280 if (r < 0)
1281 return r;
1282
1283 /* When journald is invoked on the terminal (when debugging),
1284 * it's useful if C-c is handled equivalent to SIGTERM. */
151b9b96 1285 r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s);
f9a810be
LP
1286 if (r < 0)
1287 return r;
d025f1e4 1288
b374689c
LP
1289 r = sd_event_source_set_priority(s->sigint_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1290 if (r < 0)
1291 return r;
1292
94b65516
LP
1293 /* SIGRTMIN+1 causes an immediate sync. We process this very
1294 * late, so that everything else queued at this point is
1295 * really written to disk. Clients can watch
1296 * /run/systemd/journal/synced with inotify until its mtime
1297 * changes to see when a sync happened. */
1298 r = sd_event_add_signal(s->event, &s->sigrtmin1_event_source, SIGRTMIN+1, dispatch_sigrtmin1, s);
1299 if (r < 0)
1300 return r;
1301
1302 r = sd_event_source_set_priority(s->sigrtmin1_event_source, SD_EVENT_PRIORITY_NORMAL+15);
1303 if (r < 0)
1304 return r;
1305
d025f1e4
ZJS
1306 return 0;
1307}
1308
5707ecf3
ZJS
1309static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
1310 Server *s = data;
74df0fca 1311 int r;
d025f1e4 1312
5707ecf3 1313 assert(s);
d025f1e4 1314
1d84ad94
LP
1315 if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_syslog")) {
1316
5707ecf3 1317 r = value ? parse_boolean(value) : true;
d581d9d9 1318 if (r < 0)
5707ecf3
ZJS
1319 log_warning("Failed to parse forward to syslog switch \"%s\". Ignoring.", value);
1320 else
1321 s->forward_to_syslog = r;
1d84ad94
LP
1322
1323 } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_kmsg")) {
1324
5707ecf3
ZJS
1325 r = value ? parse_boolean(value) : true;
1326 if (r < 0)
1327 log_warning("Failed to parse forward to kmsg switch \"%s\". Ignoring.", value);
1328 else
1329 s->forward_to_kmsg = r;
1d84ad94
LP
1330
1331 } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_console")) {
1332
5707ecf3
ZJS
1333 r = value ? parse_boolean(value) : true;
1334 if (r < 0)
1335 log_warning("Failed to parse forward to console switch \"%s\". Ignoring.", value);
1336 else
1337 s->forward_to_console = r;
1d84ad94
LP
1338
1339 } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_wall")) {
1340
5707ecf3
ZJS
1341 r = value ? parse_boolean(value) : true;
1342 if (r < 0)
1343 log_warning("Failed to parse forward to wall switch \"%s\". Ignoring.", value);
1344 else
1345 s->forward_to_wall = r;
1d84ad94
LP
1346
1347 } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_console")) {
1348
1349 if (proc_cmdline_value_missing(key, value))
1350 return 0;
1351
5707ecf3
ZJS
1352 r = log_level_from_string(value);
1353 if (r < 0)
1354 log_warning("Failed to parse max level console value \"%s\". Ignoring.", value);
1355 else
1356 s->max_level_console = r;
1d84ad94
LP
1357
1358 } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_store")) {
1359
1360 if (proc_cmdline_value_missing(key, value))
1361 return 0;
1362
5707ecf3
ZJS
1363 r = log_level_from_string(value);
1364 if (r < 0)
1365 log_warning("Failed to parse max level store value \"%s\". Ignoring.", value);
1366 else
1367 s->max_level_store = r;
1d84ad94
LP
1368
1369 } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_syslog")) {
1370
1371 if (proc_cmdline_value_missing(key, value))
1372 return 0;
1373
5707ecf3
ZJS
1374 r = log_level_from_string(value);
1375 if (r < 0)
1376 log_warning("Failed to parse max level syslog value \"%s\". Ignoring.", value);
1377 else
1378 s->max_level_syslog = r;
1d84ad94
LP
1379
1380 } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_kmsg")) {
1381
1382 if (proc_cmdline_value_missing(key, value))
1383 return 0;
1384
5707ecf3
ZJS
1385 r = log_level_from_string(value);
1386 if (r < 0)
1387 log_warning("Failed to parse max level kmsg value \"%s\". Ignoring.", value);
1388 else
1389 s->max_level_kmsg = r;
1d84ad94
LP
1390
1391 } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_wall")) {
1392
1393 if (proc_cmdline_value_missing(key, value))
1394 return 0;
1395
5707ecf3
ZJS
1396 r = log_level_from_string(value);
1397 if (r < 0)
1398 log_warning("Failed to parse max level wall value \"%s\". Ignoring.", value);
1399 else
1400 s->max_level_wall = r;
1d84ad94 1401
5707ecf3
ZJS
1402 } else if (startswith(key, "systemd.journald"))
1403 log_warning("Unknown journald kernel command line option \"%s\". Ignoring.", key);
d025f1e4 1404
804ae586 1405 /* do not warn about state here, since probably systemd already did */
db91ea32 1406 return 0;
d025f1e4
ZJS
1407}
1408
1409static int server_parse_config_file(Server *s) {
d025f1e4
ZJS
1410 assert(s);
1411
43688c49 1412 return config_parse_many_nulstr(PKGSYSCONFDIR "/journald.conf",
da412854
YW
1413 CONF_PATHS_NULSTR("systemd/journald.conf.d"),
1414 "Journal\0",
1415 config_item_perf_lookup, journald_gperf_lookup,
bcde742e 1416 CONFIG_PARSE_WARN, s);
d025f1e4
ZJS
1417}
1418
f9a810be
LP
1419static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
1420 Server *s = userdata;
26687bf8
OS
1421
1422 assert(s);
1423
f9a810be 1424 server_sync(s);
26687bf8
OS
1425 return 0;
1426}
1427
d07f7b9e 1428int server_schedule_sync(Server *s, int priority) {
26687bf8
OS
1429 int r;
1430
26687bf8
OS
1431 assert(s);
1432
d07f7b9e
LP
1433 if (priority <= LOG_CRIT) {
1434 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1435 server_sync(s);
1436 return 0;
1437 }
1438
26687bf8
OS
1439 if (s->sync_scheduled)
1440 return 0;
1441
f9a810be
LP
1442 if (s->sync_interval_usec > 0) {
1443 usec_t when;
ca267016 1444
6a0f1f6d 1445 r = sd_event_now(s->event, CLOCK_MONOTONIC, &when);
f9a810be
LP
1446 if (r < 0)
1447 return r;
26687bf8 1448
f9a810be
LP
1449 when += s->sync_interval_usec;
1450
1451 if (!s->sync_event_source) {
6a0f1f6d
LP
1452 r = sd_event_add_time(
1453 s->event,
1454 &s->sync_event_source,
1455 CLOCK_MONOTONIC,
1456 when, 0,
1457 server_dispatch_sync, s);
f9a810be
LP
1458 if (r < 0)
1459 return r;
1460
1461 r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
1462 } else {
1463 r = sd_event_source_set_time(s->sync_event_source, when);
1464 if (r < 0)
1465 return r;
1466
1467 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
1468 }
26687bf8 1469 if (r < 0)
f9a810be 1470 return r;
26687bf8 1471
f9a810be
LP
1472 s->sync_scheduled = true;
1473 }
26687bf8
OS
1474
1475 return 0;
1476}
1477
0c24bb23
LP
1478static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1479 Server *s = userdata;
1480
1481 assert(s);
1482
1483 server_cache_hostname(s);
1484 return 0;
1485}
1486
1487static int server_open_hostname(Server *s) {
1488 int r;
1489
1490 assert(s);
1491
1492 s->hostname_fd = open("/proc/sys/kernel/hostname", O_RDONLY|O_CLOEXEC|O_NDELAY|O_NOCTTY);
4a62c710
MS
1493 if (s->hostname_fd < 0)
1494 return log_error_errno(errno, "Failed to open /proc/sys/kernel/hostname: %m");
0c24bb23 1495
151b9b96 1496 r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s);
0c24bb23 1497 if (r < 0) {
28def94c
DR
1498 /* kernels prior to 3.2 don't support polling this file. Ignore
1499 * the failure. */
1500 if (r == -EPERM) {
e53fc357 1501 log_warning_errno(r, "Failed to register hostname fd in event loop, ignoring: %m");
03e334a1 1502 s->hostname_fd = safe_close(s->hostname_fd);
28def94c
DR
1503 return 0;
1504 }
1505
23bbb0de 1506 return log_error_errno(r, "Failed to register hostname fd in event loop: %m");
0c24bb23
LP
1507 }
1508
1509 r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
23bbb0de
MS
1510 if (r < 0)
1511 return log_error_errno(r, "Failed to adjust priority of host name event source: %m");
0c24bb23
LP
1512
1513 return 0;
1514}
1515
e22aa3d3
LP
1516static int dispatch_notify_event(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1517 Server *s = userdata;
1518 int r;
1519
1520 assert(s);
1521 assert(s->notify_event_source == es);
1522 assert(s->notify_fd == fd);
1523
e22aa3d3 1524 /* The $NOTIFY_SOCKET is writable again, now send exactly one
dd835265 1525 * message on it. Either it's the watchdog event, the initial
119e9655
LP
1526 * READY=1 event or an stdout stream event. If there's nothing
1527 * to write anymore, turn our event source off. The next time
1528 * there's something to send it will be turned on again. */
e22aa3d3
LP
1529
1530 if (!s->sent_notify_ready) {
1531 static const char p[] =
1532 "READY=1\n"
1533 "STATUS=Processing requests...";
1534 ssize_t l;
1535
1536 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1537 if (l < 0) {
1538 if (errno == EAGAIN)
1539 return 0;
1540
1541 return log_error_errno(errno, "Failed to send READY=1 notification message: %m");
1542 }
1543
1544 s->sent_notify_ready = true;
1545 log_debug("Sent READY=1 notification.");
1546
119e9655
LP
1547 } else if (s->send_watchdog) {
1548
1549 static const char p[] =
1550 "WATCHDOG=1";
1551
1552 ssize_t l;
1553
1554 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1555 if (l < 0) {
1556 if (errno == EAGAIN)
1557 return 0;
1558
1559 return log_error_errno(errno, "Failed to send WATCHDOG=1 notification message: %m");
1560 }
1561
1562 s->send_watchdog = false;
1563 log_debug("Sent WATCHDOG=1 notification.");
1564
e22aa3d3
LP
1565 } else if (s->stdout_streams_notify_queue)
1566 /* Dispatch one stream notification event */
1567 stdout_stream_send_notify(s->stdout_streams_notify_queue);
1568
61233823 1569 /* Leave us enabled if there's still more to do. */
119e9655 1570 if (s->send_watchdog || s->stdout_streams_notify_queue)
e22aa3d3
LP
1571 return 0;
1572
1573 /* There was nothing to do anymore, let's turn ourselves off. */
1574 r = sd_event_source_set_enabled(es, SD_EVENT_OFF);
1575 if (r < 0)
1576 return log_error_errno(r, "Failed to turn off notify event source: %m");
1577
1578 return 0;
1579}
1580
119e9655
LP
1581static int dispatch_watchdog(sd_event_source *es, uint64_t usec, void *userdata) {
1582 Server *s = userdata;
1583 int r;
1584
1585 assert(s);
1586
1587 s->send_watchdog = true;
1588
1589 r = sd_event_source_set_enabled(s->notify_event_source, SD_EVENT_ON);
1590 if (r < 0)
1591 log_warning_errno(r, "Failed to turn on notify event source: %m");
1592
1593 r = sd_event_source_set_time(s->watchdog_event_source, usec + s->watchdog_usec / 2);
1594 if (r < 0)
1595 return log_error_errno(r, "Failed to restart watchdog event source: %m");
1596
1597 r = sd_event_source_set_enabled(s->watchdog_event_source, SD_EVENT_ON);
1598 if (r < 0)
1599 return log_error_errno(r, "Failed to enable watchdog event source: %m");
1600
1601 return 0;
1602}
1603
e22aa3d3
LP
1604static int server_connect_notify(Server *s) {
1605 union sockaddr_union sa = {
1606 .un.sun_family = AF_UNIX,
1607 };
1608 const char *e;
1609 int r;
1610
1611 assert(s);
1612 assert(s->notify_fd < 0);
1613 assert(!s->notify_event_source);
1614
1615 /*
1616 So here's the problem: we'd like to send notification
1617 messages to PID 1, but we cannot do that via sd_notify(),
1618 since that's synchronous, and we might end up blocking on
1619 it. Specifically: given that PID 1 might block on
1620 dbus-daemon during IPC, and dbus-daemon is logging to us,
1621 and might hence block on us, we might end up in a deadlock
ccddd104 1622 if we block on sending PID 1 notification messages — by
e22aa3d3
LP
1623 generating a full blocking circle. To avoid this, let's
1624 create a non-blocking socket, and connect it to the
1625 notification socket, and then wait for POLLOUT before we
1626 send anything. This should efficiently avoid any deadlocks,
1627 as we'll never block on PID 1, hence PID 1 can safely block
1628 on dbus-daemon which can safely block on us again.
1629
1630 Don't think that this issue is real? It is, see:
1631 https://github.com/systemd/systemd/issues/1505
1632 */
1633
1634 e = getenv("NOTIFY_SOCKET");
1635 if (!e)
1636 return 0;
1637
4c701096 1638 if (!IN_SET(e[0], '@', '/') || e[1] == 0) {
e22aa3d3
LP
1639 log_error("NOTIFY_SOCKET set to an invalid value: %s", e);
1640 return -EINVAL;
1641 }
1642
1643 if (strlen(e) > sizeof(sa.un.sun_path)) {
1644 log_error("NOTIFY_SOCKET path too long: %s", e);
1645 return -EINVAL;
1646 }
1647
1648 s->notify_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
1649 if (s->notify_fd < 0)
1650 return log_error_errno(errno, "Failed to create notify socket: %m");
1651
1652 (void) fd_inc_sndbuf(s->notify_fd, NOTIFY_SNDBUF_SIZE);
1653
1654 strncpy(sa.un.sun_path, e, sizeof(sa.un.sun_path));
1655 if (sa.un.sun_path[0] == '@')
1656 sa.un.sun_path[0] = 0;
1657
fc2fffe7 1658 r = connect(s->notify_fd, &sa.sa, SOCKADDR_UN_LEN(sa.un));
e22aa3d3
LP
1659 if (r < 0)
1660 return log_error_errno(errno, "Failed to connect to notify socket: %m");
1661
1662 r = sd_event_add_io(s->event, &s->notify_event_source, s->notify_fd, EPOLLOUT, dispatch_notify_event, s);
1663 if (r < 0)
1664 return log_error_errno(r, "Failed to watch notification socket: %m");
1665
119e9655
LP
1666 if (sd_watchdog_enabled(false, &s->watchdog_usec) > 0) {
1667 s->send_watchdog = true;
1668
4de2402b 1669 r = sd_event_add_time(s->event, &s->watchdog_event_source, CLOCK_MONOTONIC, now(CLOCK_MONOTONIC) + s->watchdog_usec/2, s->watchdog_usec/4, dispatch_watchdog, s);
119e9655
LP
1670 if (r < 0)
1671 return log_error_errno(r, "Failed to add watchdog time event: %m");
1672 }
1673
e22aa3d3
LP
1674 /* This should fire pretty soon, which we'll use to send the
1675 * READY=1 event. */
1676
1677 return 0;
1678}
1679
d025f1e4 1680int server_init(Server *s) {
13790add 1681 _cleanup_fdset_free_ FDSet *fds = NULL;
d025f1e4 1682 int n, r, fd;
7d18d348 1683 bool no_sockets;
d025f1e4
ZJS
1684
1685 assert(s);
1686
1687 zero(*s);
e22aa3d3 1688 s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->audit_fd = s->hostname_fd = s->notify_fd = -1;
d025f1e4
ZJS
1689 s->compress = true;
1690 s->seal = true;
b2392ff3 1691 s->read_kmsg = true;
d025f1e4 1692
119e9655
LP
1693 s->watchdog_usec = USEC_INFINITY;
1694
26687bf8
OS
1695 s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1696 s->sync_scheduled = false;
1697
d025f1e4
ZJS
1698 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1699 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1700
40b71e89 1701 s->forward_to_wall = true;
d025f1e4 1702
e150e820
MB
1703 s->max_file_usec = DEFAULT_MAX_FILE_USEC;
1704
d025f1e4
ZJS
1705 s->max_level_store = LOG_DEBUG;
1706 s->max_level_syslog = LOG_DEBUG;
1707 s->max_level_kmsg = LOG_NOTICE;
1708 s->max_level_console = LOG_INFO;
40b71e89 1709 s->max_level_wall = LOG_EMERG;
d025f1e4 1710
ec20fe5f
LP
1711 s->line_max = DEFAULT_LINE_MAX;
1712
266a4700
FB
1713 journal_reset_metrics(&s->system_storage.metrics);
1714 journal_reset_metrics(&s->runtime_storage.metrics);
d025f1e4
ZJS
1715
1716 server_parse_config_file(s);
1d84ad94
LP
1717
1718 r = proc_cmdline_parse(parse_proc_cmdline_item, s, PROC_CMDLINE_STRIP_RD_PREFIX);
1719 if (r < 0)
1720 log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
8580d1f7 1721
d288f79f 1722 if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
b1389b0d
ZJS
1723 log_debug("Setting both rate limit interval and burst from "USEC_FMT",%u to 0,0",
1724 s->rate_limit_interval, s->rate_limit_burst);
d288f79f
ZJS
1725 s->rate_limit_interval = s->rate_limit_burst = 0;
1726 }
d025f1e4 1727
8580d1f7 1728 (void) mkdir_p("/run/systemd/journal", 0755);
d025f1e4 1729
43cf8388 1730 s->user_journals = ordered_hashmap_new(NULL);
d025f1e4
ZJS
1731 if (!s->user_journals)
1732 return log_oom();
1733
1734 s->mmap = mmap_cache_new();
1735 if (!s->mmap)
1736 return log_oom();
1737
b58c888f
VC
1738 s->deferred_closes = set_new(NULL);
1739 if (!s->deferred_closes)
1740 return log_oom();
1741
f9a810be 1742 r = sd_event_default(&s->event);
23bbb0de
MS
1743 if (r < 0)
1744 return log_error_errno(r, "Failed to create event loop: %m");
d025f1e4
ZJS
1745
1746 n = sd_listen_fds(true);
23bbb0de
MS
1747 if (n < 0)
1748 return log_error_errno(n, "Failed to read listening file descriptors from environment: %m");
d025f1e4
ZJS
1749
1750 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1751
1752 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1753
1754 if (s->native_fd >= 0) {
1755 log_error("Too many native sockets passed.");
1756 return -EINVAL;
1757 }
1758
1759 s->native_fd = fd;
1760
1761 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1762
1763 if (s->stdout_fd >= 0) {
1764 log_error("Too many stdout sockets passed.");
1765 return -EINVAL;
1766 }
1767
1768 s->stdout_fd = fd;
1769
03ee5c38
LP
1770 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0 ||
1771 sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/dev-log", 0) > 0) {
d025f1e4
ZJS
1772
1773 if (s->syslog_fd >= 0) {
1774 log_error("Too many /dev/log sockets passed.");
1775 return -EINVAL;
1776 }
1777
1778 s->syslog_fd = fd;
1779
875c2e22
LP
1780 } else if (sd_is_socket(fd, AF_NETLINK, SOCK_RAW, -1) > 0) {
1781
1782 if (s->audit_fd >= 0) {
1783 log_error("Too many audit sockets passed.");
1784 return -EINVAL;
1785 }
1786
1787 s->audit_fd = fd;
1788
4ec3cd73 1789 } else {
4ec3cd73 1790
13790add
LP
1791 if (!fds) {
1792 fds = fdset_new();
1793 if (!fds)
1794 return log_oom();
1795 }
4ec3cd73 1796
13790add
LP
1797 r = fdset_put(fds, fd);
1798 if (r < 0)
1799 return log_oom();
4ec3cd73 1800 }
d025f1e4
ZJS
1801 }
1802
15d91bff
ZJS
1803 /* Try to restore streams, but don't bother if this fails */
1804 (void) server_restore_streams(s, fds);
d025f1e4 1805
13790add
LP
1806 if (fdset_size(fds) > 0) {
1807 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds));
1808 fds = fdset_free(fds);
1809 }
1810
7d18d348
ZJS
1811 no_sockets = s->native_fd < 0 && s->stdout_fd < 0 && s->syslog_fd < 0 && s->audit_fd < 0;
1812
1813 /* always open stdout, syslog, native, and kmsg sockets */
37b7affe
ZJS
1814
1815 /* systemd-journald.socket: /run/systemd/journal/stdout */
15d91bff
ZJS
1816 r = server_open_stdout_socket(s);
1817 if (r < 0)
1818 return r;
1819
37b7affe 1820 /* systemd-journald-dev-log.socket: /run/systemd/journal/dev-log */
13790add 1821 r = server_open_syslog_socket(s);
d025f1e4
ZJS
1822 if (r < 0)
1823 return r;
1824
37b7affe 1825 /* systemd-journald.socket: /run/systemd/journal/socket */
13790add 1826 r = server_open_native_socket(s);
d025f1e4
ZJS
1827 if (r < 0)
1828 return r;
1829
b2392ff3 1830 /* /dev/kmsg */
d025f1e4
ZJS
1831 r = server_open_dev_kmsg(s);
1832 if (r < 0)
1833 return r;
1834
7d18d348
ZJS
1835 /* Unless we got *some* sockets and not audit, open audit socket */
1836 if (s->audit_fd >= 0 || no_sockets) {
1837 r = server_open_audit(s);
1838 if (r < 0)
1839 return r;
1840 }
875c2e22 1841
d025f1e4
ZJS
1842 r = server_open_kernel_seqnum(s);
1843 if (r < 0)
1844 return r;
1845
0c24bb23
LP
1846 r = server_open_hostname(s);
1847 if (r < 0)
1848 return r;
1849
f9a810be 1850 r = setup_signals(s);
d025f1e4
ZJS
1851 if (r < 0)
1852 return r;
1853
1854 s->udev = udev_new();
1855 if (!s->udev)
1856 return -ENOMEM;
1857
f9a810be 1858 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
d025f1e4
ZJS
1859 if (!s->rate_limit)
1860 return -ENOMEM;
1861
e9174f29
LP
1862 r = cg_get_root_path(&s->cgroup_root);
1863 if (r < 0)
1864 return r;
1865
0c24bb23
LP
1866 server_cache_hostname(s);
1867 server_cache_boot_id(s);
1868 server_cache_machine_id(s);
1869
266a4700
FB
1870 s->runtime_storage.name = "Runtime journal";
1871 s->system_storage.name = "System journal";
1872
605405c6
ZJS
1873 s->runtime_storage.path = strjoin("/run/log/journal/", SERVER_MACHINE_ID(s));
1874 s->system_storage.path = strjoin("/var/log/journal/", SERVER_MACHINE_ID(s));
266a4700
FB
1875 if (!s->runtime_storage.path || !s->system_storage.path)
1876 return -ENOMEM;
1877
e22aa3d3
LP
1878 (void) server_connect_notify(s);
1879
22e3a02b
LP
1880 (void) client_context_acquire_default(s);
1881
804ae586 1882 return system_journal_open(s, false);
d025f1e4
ZJS
1883}
1884
1885void server_maybe_append_tags(Server *s) {
349cc4a5 1886#if HAVE_GCRYPT
d025f1e4
ZJS
1887 JournalFile *f;
1888 Iterator i;
1889 usec_t n;
1890
1891 n = now(CLOCK_REALTIME);
1892
1893 if (s->system_journal)
1894 journal_file_maybe_append_tag(s->system_journal, n);
1895
43cf8388 1896 ORDERED_HASHMAP_FOREACH(f, s->user_journals, i)
d025f1e4
ZJS
1897 journal_file_maybe_append_tag(f, n);
1898#endif
1899}
1900
1901void server_done(Server *s) {
1902 JournalFile *f;
1903 assert(s);
1904
b58c888f
VC
1905 if (s->deferred_closes) {
1906 journal_file_close_set(s->deferred_closes);
1907 set_free(s->deferred_closes);
1908 }
1909
d025f1e4
ZJS
1910 while (s->stdout_streams)
1911 stdout_stream_free(s->stdout_streams);
1912
22e3a02b
LP
1913 client_context_flush_all(s);
1914
d025f1e4 1915 if (s->system_journal)
69a3a6fd 1916 (void) journal_file_close(s->system_journal);
d025f1e4
ZJS
1917
1918 if (s->runtime_journal)
69a3a6fd 1919 (void) journal_file_close(s->runtime_journal);
d025f1e4 1920
43cf8388 1921 while ((f = ordered_hashmap_steal_first(s->user_journals)))
69a3a6fd 1922 (void) journal_file_close(f);
d025f1e4 1923
43cf8388 1924 ordered_hashmap_free(s->user_journals);
d025f1e4 1925
f9a810be
LP
1926 sd_event_source_unref(s->syslog_event_source);
1927 sd_event_source_unref(s->native_event_source);
1928 sd_event_source_unref(s->stdout_event_source);
1929 sd_event_source_unref(s->dev_kmsg_event_source);
875c2e22 1930 sd_event_source_unref(s->audit_event_source);
f9a810be
LP
1931 sd_event_source_unref(s->sync_event_source);
1932 sd_event_source_unref(s->sigusr1_event_source);
1933 sd_event_source_unref(s->sigusr2_event_source);
1934 sd_event_source_unref(s->sigterm_event_source);
1935 sd_event_source_unref(s->sigint_event_source);
94b65516 1936 sd_event_source_unref(s->sigrtmin1_event_source);
0c24bb23 1937 sd_event_source_unref(s->hostname_event_source);
e22aa3d3 1938 sd_event_source_unref(s->notify_event_source);
119e9655 1939 sd_event_source_unref(s->watchdog_event_source);
f9a810be 1940 sd_event_unref(s->event);
d025f1e4 1941
03e334a1
LP
1942 safe_close(s->syslog_fd);
1943 safe_close(s->native_fd);
1944 safe_close(s->stdout_fd);
1945 safe_close(s->dev_kmsg_fd);
875c2e22 1946 safe_close(s->audit_fd);
03e334a1 1947 safe_close(s->hostname_fd);
e22aa3d3 1948 safe_close(s->notify_fd);
0c24bb23 1949
d025f1e4
ZJS
1950 if (s->rate_limit)
1951 journal_rate_limit_free(s->rate_limit);
1952
1953 if (s->kernel_seqnum)
1954 munmap(s->kernel_seqnum, sizeof(uint64_t));
1955
1956 free(s->buffer);
1957 free(s->tty_path);
e9174f29 1958 free(s->cgroup_root);
99d0966e 1959 free(s->hostname_field);
c6e9e16f
ZJS
1960 free(s->runtime_storage.path);
1961 free(s->system_storage.path);
d025f1e4
ZJS
1962
1963 if (s->mmap)
1964 mmap_cache_unref(s->mmap);
1965
3e044c49 1966 udev_unref(s->udev);
d025f1e4 1967}
8580d1f7
LP
1968
1969static const char* const storage_table[_STORAGE_MAX] = {
1970 [STORAGE_AUTO] = "auto",
1971 [STORAGE_VOLATILE] = "volatile",
1972 [STORAGE_PERSISTENT] = "persistent",
1973 [STORAGE_NONE] = "none"
1974};
1975
1976DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
1977DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
1978
1979static const char* const split_mode_table[_SPLIT_MAX] = {
1980 [SPLIT_LOGIN] = "login",
1981 [SPLIT_UID] = "uid",
1982 [SPLIT_NONE] = "none",
1983};
1984
1985DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
1986DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
ec20fe5f
LP
1987
1988int config_parse_line_max(
1989 const char* unit,
1990 const char *filename,
1991 unsigned line,
1992 const char *section,
1993 unsigned section_line,
1994 const char *lvalue,
1995 int ltype,
1996 const char *rvalue,
1997 void *data,
1998 void *userdata) {
1999
2000 size_t *sz = data;
2001 int r;
2002
2003 assert(filename);
2004 assert(lvalue);
2005 assert(rvalue);
2006 assert(data);
2007
2008 if (isempty(rvalue))
2009 /* Empty assignment means default */
2010 *sz = DEFAULT_LINE_MAX;
2011 else {
2012 uint64_t v;
2013
2014 r = parse_size(rvalue, 1024, &v);
2015 if (r < 0) {
2016 log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse LineMax= value, ignoring: %s", rvalue);
2017 return 0;
2018 }
2019
2020 if (v < 79) {
2021 /* Why specify 79 here as minimum line length? Simply, because the most common traditional
2022 * terminal size is 80ch, and it might make sense to break one character before the natural
2023 * line break would occur on that. */
2024 log_syntax(unit, LOG_WARNING, filename, line, 0, "LineMax= too small, clamping to 79: %s", rvalue);
2025 *sz = 79;
2026 } else if (v > (uint64_t) (SSIZE_MAX-1)) {
2027 /* So, why specify SSIZE_MAX-1 here? Because that's one below the largest size value read()
2028 * can return, and we need one extra byte for the trailing NUL byte. Of course IRL such large
2029 * memory allocations will fail anyway, hence this limit is mostly theoretical anyway, as we'll
2030 * fail much earlier anyway. */
2031 log_syntax(unit, LOG_WARNING, filename, line, 0, "LineMax= too large, clamping to %" PRIu64 ": %s", (uint64_t) (SSIZE_MAX-1), rvalue);
2032 *sz = SSIZE_MAX-1;
2033 } else
2034 *sz = (size_t) v;
2035 }
2036
2037 return 0;
2038}