]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/journal/journald-server.c
tree-wide: make parse_proc_cmdline() strip "rd." prefix automatically
[thirdparty/systemd.git] / src / journal / journald-server.c
CommitLineData
d025f1e4
ZJS
1/***
2 This file is part of systemd.
3
4 Copyright 2011 Lennart Poettering
5
6 systemd is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
10
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public License
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
18***/
19
24882e06
LP
20#ifdef HAVE_SELINUX
21#include <selinux/selinux.h>
22#endif
8580d1f7
LP
23#include <sys/ioctl.h>
24#include <sys/mman.h>
25#include <sys/signalfd.h>
26#include <sys/statvfs.h>
07630cea 27#include <linux/sockios.h>
24882e06 28
b4bbcaa9 29#include "libudev.h"
8580d1f7 30#include "sd-daemon.h"
74df0fca
LP
31#include "sd-journal.h"
32#include "sd-messages.h"
8580d1f7
LP
33
34#include "acl-util.h"
b5efdb8a 35#include "alloc-util.h"
430f0182 36#include "audit-util.h"
d025f1e4 37#include "cgroup-util.h"
d025f1e4 38#include "conf-parser.h"
a0956174 39#include "dirent-util.h"
0dec689b 40#include "extract-word.h"
3ffd4af2 41#include "fd-util.h"
33d52ab9 42#include "fileio.h"
958b66ea 43#include "formats-util.h"
f4f15635 44#include "fs-util.h"
8580d1f7 45#include "hashmap.h"
958b66ea 46#include "hostname-util.h"
4b58153d 47#include "id128-util.h"
afc5dbf3 48#include "io-util.h"
8580d1f7
LP
49#include "journal-authenticate.h"
50#include "journal-file.h"
d025f1e4
ZJS
51#include "journal-internal.h"
52#include "journal-vacuum.h"
8580d1f7 53#include "journald-audit.h"
d025f1e4 54#include "journald-kmsg.h"
d025f1e4 55#include "journald-native.h"
8580d1f7 56#include "journald-rate-limit.h"
3ffd4af2 57#include "journald-server.h"
8580d1f7
LP
58#include "journald-stream.h"
59#include "journald-syslog.h"
4b58153d 60#include "log.h"
07630cea
LP
61#include "missing.h"
62#include "mkdir.h"
6bedfcbb 63#include "parse-util.h"
4e731273 64#include "proc-cmdline.h"
07630cea
LP
65#include "process-util.h"
66#include "rm-rf.h"
67#include "selinux-util.h"
68#include "signal-util.h"
69#include "socket-util.h"
32917e33 70#include "stdio-util.h"
8b43440b 71#include "string-table.h"
07630cea 72#include "string-util.h"
4a0b58c4 73#include "user-util.h"
863a5610 74#include "syslog-util.h"
d025f1e4 75
d025f1e4
ZJS
76#define USER_JOURNALS_MAX 1024
77
26687bf8 78#define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
7f1ad696
LP
79#define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
80#define DEFAULT_RATE_LIMIT_BURST 1000
e150e820 81#define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
d025f1e4 82
8580d1f7 83#define RECHECK_SPACE_USEC (30*USEC_PER_SEC)
d025f1e4 84
e22aa3d3
LP
85#define NOTIFY_SNDBUF_SIZE (8*1024*1024)
86
7a24f3bf
VC
87/* The period to insert between posting changes for coalescing */
88#define POST_CHANGE_TIMER_INTERVAL_USEC (250*USEC_PER_MSEC)
89
e0ed6db9
FB
90static int determine_path_usage(Server *s, const char *path, uint64_t *ret_used, uint64_t *ret_free) {
91 _cleanup_closedir_ DIR *d = NULL;
92 struct dirent *de;
93 struct statvfs ss;
e0ed6db9
FB
94
95 assert(ret_used);
96 assert(ret_free);
97
266a4700 98 d = opendir(path);
e0ed6db9
FB
99 if (!d)
100 return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR,
266a4700 101 errno, "Failed to open %s: %m", path);
e0ed6db9
FB
102
103 if (fstatvfs(dirfd(d), &ss) < 0)
266a4700 104 return log_error_errno(errno, "Failed to fstatvfs(%s): %m", path);
e0ed6db9
FB
105
106 *ret_free = ss.f_bsize * ss.f_bavail;
107 *ret_used = 0;
108 FOREACH_DIRENT_ALL(de, d, break) {
109 struct stat st;
110
111 if (!endswith(de->d_name, ".journal") &&
112 !endswith(de->d_name, ".journal~"))
113 continue;
114
115 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) {
266a4700 116 log_debug_errno(errno, "Failed to stat %s/%s, ignoring: %m", path, de->d_name);
e0ed6db9
FB
117 continue;
118 }
119
120 if (!S_ISREG(st.st_mode))
121 continue;
122
123 *ret_used += (uint64_t) st.st_blocks * 512UL;
124 }
125
126 return 0;
127}
128
a0edc477
FB
129static void cache_space_invalidate(JournalStorageSpace *space) {
130 memset(space, 0, sizeof(*space));
131}
132
57f443a6 133static int cache_space_refresh(Server *s, JournalStorage *storage) {
8580d1f7 134
7fd1b19b 135 _cleanup_closedir_ DIR *d = NULL;
23aba343 136 JournalStorageSpace *space;
266a4700 137 JournalMetrics *metrics;
23aba343 138 uint64_t vfs_used, vfs_avail, avail;
d025f1e4 139 usec_t ts;
e0ed6db9 140 int r;
d025f1e4 141
8580d1f7 142 assert(s);
266a4700 143
266a4700 144 metrics = &storage->metrics;
23aba343 145 space = &storage->space;
d025f1e4 146
8580d1f7 147 ts = now(CLOCK_MONOTONIC);
d025f1e4 148
57f443a6 149 if (space->timestamp + RECHECK_SPACE_USEC > ts)
d025f1e4
ZJS
150 return 0;
151
23aba343 152 r = determine_path_usage(s, storage->path, &vfs_used, &vfs_avail);
e0ed6db9
FB
153 if (r < 0)
154 return r;
d025f1e4 155
23aba343
FB
156 space->vfs_used = vfs_used;
157 space->vfs_available = vfs_avail;
158
159 avail = LESS_BY(vfs_avail, metrics->keep_free);
160
23aba343
FB
161 space->limit = MIN(MAX(vfs_used + avail, metrics->min_use), metrics->max_use);
162 space->available = LESS_BY(space->limit, vfs_used);
163 space->timestamp = ts;
8580d1f7
LP
164 return 1;
165}
166
3a19f215
FB
167static void patch_min_use(JournalStorage *storage) {
168 assert(storage);
169
170 /* Let's bump the min_use limit to the current usage on disk. We do
171 * this when starting up and first opening the journal files. This way
172 * sudden spikes in disk usage will not cause journald to vacuum files
173 * without bounds. Note that this means that only a restart of journald
174 * will make it reset this value. */
175
176 storage->metrics.min_use = MAX(storage->metrics.min_use, storage->space.vfs_used);
177}
178
179
180static int determine_space(Server *s, uint64_t *available, uint64_t *limit) {
266a4700 181 JournalStorage *js;
57f443a6 182 int r;
8580d1f7
LP
183
184 assert(s);
185
266a4700 186 js = s->system_journal ? &s->system_storage : &s->runtime_storage;
57f443a6
FB
187
188 r = cache_space_refresh(s, js);
189 if (r >= 0) {
190 if (available)
191 *available = js->space.available;
192 if (limit)
193 *limit = js->space.limit;
194 }
195 return r;
d025f1e4
ZJS
196}
197
cba5629e
FB
198void server_space_usage_message(Server *s, JournalStorage *storage) {
199 char fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
200 fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX], fb6[FORMAT_BYTES_MAX];
201 JournalMetrics *metrics;
cba5629e
FB
202
203 assert(s);
204
205 if (!storage)
206 storage = s->system_journal ? &s->system_storage : &s->runtime_storage;
207
57f443a6 208 if (cache_space_refresh(s, storage) < 0)
cba5629e
FB
209 return;
210
211 metrics = &storage->metrics;
23aba343 212 format_bytes(fb1, sizeof(fb1), storage->space.vfs_used);
cba5629e
FB
213 format_bytes(fb2, sizeof(fb2), metrics->max_use);
214 format_bytes(fb3, sizeof(fb3), metrics->keep_free);
23aba343 215 format_bytes(fb4, sizeof(fb4), storage->space.vfs_available);
cba5629e
FB
216 format_bytes(fb5, sizeof(fb5), storage->space.limit);
217 format_bytes(fb6, sizeof(fb6), storage->space.available);
218
219 server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
220 LOG_MESSAGE("%s (%s) is %s, max %s, %s free.",
221 storage->name, storage->path, fb1, fb5, fb6),
222 "JOURNAL_NAME=%s", storage->name,
223 "JOURNAL_PATH=%s", storage->path,
23aba343 224 "CURRENT_USE=%"PRIu64, storage->space.vfs_used,
cba5629e
FB
225 "CURRENT_USE_PRETTY=%s", fb1,
226 "MAX_USE=%"PRIu64, metrics->max_use,
227 "MAX_USE_PRETTY=%s", fb2,
228 "DISK_KEEP_FREE=%"PRIu64, metrics->keep_free,
229 "DISK_KEEP_FREE_PRETTY=%s", fb3,
23aba343 230 "DISK_AVAILABLE=%"PRIu64, storage->space.vfs_available,
cba5629e
FB
231 "DISK_AVAILABLE_PRETTY=%s", fb4,
232 "LIMIT=%"PRIu64, storage->space.limit,
233 "LIMIT_PRETTY=%s", fb5,
234 "AVAILABLE=%"PRIu64, storage->space.available,
235 "AVAILABLE_PRETTY=%s", fb6,
236 NULL);
237}
238
5c3bde3f 239static void server_add_acls(JournalFile *f, uid_t uid) {
d025f1e4 240#ifdef HAVE_ACL
5c3bde3f 241 int r;
d025f1e4 242#endif
d025f1e4
ZJS
243 assert(f);
244
d025f1e4 245#ifdef HAVE_ACL
34c10968 246 if (uid <= SYSTEM_UID_MAX)
d025f1e4
ZJS
247 return;
248
5c3bde3f
ZJS
249 r = add_acls_for_user(f->fd, uid);
250 if (r < 0)
251 log_warning_errno(r, "Failed to set ACL on %s, ignoring: %m", f->path);
d025f1e4
ZJS
252#endif
253}
254
7a24f3bf
VC
255static int open_journal(
256 Server *s,
257 bool reliably,
258 const char *fname,
259 int flags,
260 bool seal,
261 JournalMetrics *metrics,
7a24f3bf
VC
262 JournalFile **ret) {
263 int r;
e167d7fd 264 JournalFile *f;
7a24f3bf
VC
265
266 assert(s);
267 assert(fname);
268 assert(ret);
269
270 if (reliably)
b58c888f 271 r = journal_file_open_reliably(fname, flags, 0640, s->compress, seal, metrics, s->mmap, s->deferred_closes, NULL, &f);
7a24f3bf 272 else
5d1ce257 273 r = journal_file_open(-1, fname, flags, 0640, s->compress, seal, metrics, s->mmap, s->deferred_closes, NULL, &f);
7a24f3bf
VC
274 if (r < 0)
275 return r;
276
e167d7fd 277 r = journal_file_enable_post_change_timer(f, s->event, POST_CHANGE_TIMER_INTERVAL_USEC);
7a24f3bf 278 if (r < 0) {
69a3a6fd 279 (void) journal_file_close(f);
7a24f3bf
VC
280 return r;
281 }
282
e167d7fd 283 *ret = f;
7a24f3bf
VC
284 return r;
285}
286
6431c7e2
VC
287static bool flushed_flag_is_set(void) {
288 return (access("/run/systemd/journal/flushed", F_OK) >= 0);
289}
290
105bdb46 291static int system_journal_open(Server *s, bool flush_requested) {
929eeb54 292 bool flushed = false;
105bdb46
VC
293 const char *fn;
294 int r = 0;
295
296 if (!s->system_journal &&
297 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
6431c7e2 298 (flush_requested || (flushed = flushed_flag_is_set()))) {
105bdb46
VC
299
300 /* If in auto mode: first try to create the machine
301 * path, but not the prefix.
302 *
303 * If in persistent mode: create /var/log/journal and
304 * the machine path */
305
306 if (s->storage == STORAGE_PERSISTENT)
307 (void) mkdir_p("/var/log/journal/", 0755);
308
266a4700 309 (void) mkdir(s->system_storage.path, 0755);
105bdb46 310
266a4700
FB
311 fn = strjoina(s->system_storage.path, "/system.journal");
312 r = open_journal(s, true, fn, O_RDWR|O_CREAT, s->seal, &s->system_storage.metrics, &s->system_journal);
105bdb46
VC
313 if (r >= 0) {
314 server_add_acls(s->system_journal, 0);
57f443a6 315 (void) cache_space_refresh(s, &s->system_storage);
3a19f215 316 patch_min_use(&s->system_storage);
105bdb46
VC
317 } else if (r < 0) {
318 if (r != -ENOENT && r != -EROFS)
319 log_warning_errno(r, "Failed to open system journal: %m");
320
321 r = 0;
322 }
929eeb54
VC
323
324 /* If the runtime journal is open, and we're post-flush, we're
325 * recovering from a failed system journal rotate (ENOSPC)
326 * for which the runtime journal was reopened.
327 *
328 * Perform an implicit flush to var, leaving the runtime
329 * journal closed, now that the system journal is back.
330 */
331 if (s->runtime_journal && flushed)
332 (void) server_flush_to_var(s);
105bdb46
VC
333 }
334
335 if (!s->runtime_journal &&
336 (s->storage != STORAGE_NONE)) {
337
266a4700 338 fn = strjoina(s->runtime_storage.path, "/system.journal");
105bdb46
VC
339
340 if (s->system_journal) {
341
342 /* Try to open the runtime journal, but only
343 * if it already exists, so that we can flush
344 * it into the system journal */
345
266a4700 346 r = open_journal(s, false, fn, O_RDWR, false, &s->runtime_storage.metrics, &s->runtime_journal);
105bdb46
VC
347 if (r < 0) {
348 if (r != -ENOENT)
349 log_warning_errno(r, "Failed to open runtime journal: %m");
350
351 r = 0;
352 }
353
354 } else {
355
356 /* OK, we really need the runtime journal, so create
357 * it if necessary. */
358
359 (void) mkdir("/run/log", 0755);
360 (void) mkdir("/run/log/journal", 0755);
361 (void) mkdir_parents(fn, 0750);
362
266a4700 363 r = open_journal(s, true, fn, O_RDWR|O_CREAT, false, &s->runtime_storage.metrics, &s->runtime_journal);
105bdb46
VC
364 if (r < 0)
365 return log_error_errno(r, "Failed to open runtime journal: %m");
366 }
367
368 if (s->runtime_journal) {
369 server_add_acls(s->runtime_journal, 0);
57f443a6 370 (void) cache_space_refresh(s, &s->runtime_storage);
3a19f215 371 patch_min_use(&s->runtime_storage);
105bdb46
VC
372 }
373 }
374
375 return r;
376}
377
d025f1e4 378static JournalFile* find_journal(Server *s, uid_t uid) {
ed375beb 379 _cleanup_free_ char *p = NULL;
d025f1e4
ZJS
380 int r;
381 JournalFile *f;
382 sd_id128_t machine;
383
384 assert(s);
385
105bdb46
VC
386 /* A rotate that fails to create the new journal (ENOSPC) leaves the
387 * rotated journal as NULL. Unless we revisit opening, even after
388 * space is made available we'll continue to return NULL indefinitely.
389 *
390 * system_journal_open() is a noop if the journals are already open, so
391 * we can just call it here to recover from failed rotates (or anything
392 * else that's left the journals as NULL).
393 *
394 * Fixes https://github.com/systemd/systemd/issues/3968 */
395 (void) system_journal_open(s, false);
396
d025f1e4
ZJS
397 /* We split up user logs only on /var, not on /run. If the
398 * runtime file is open, we write to it exclusively, in order
399 * to guarantee proper order as soon as we flush /run to
400 * /var and close the runtime file. */
401
402 if (s->runtime_journal)
403 return s->runtime_journal;
404
61755fda 405 if (uid <= SYSTEM_UID_MAX || uid_is_dynamic(uid))
d025f1e4
ZJS
406 return s->system_journal;
407
408 r = sd_id128_get_machine(&machine);
409 if (r < 0)
410 return s->system_journal;
411
4a0b58c4 412 f = ordered_hashmap_get(s->user_journals, UID_TO_PTR(uid));
d025f1e4
ZJS
413 if (f)
414 return f;
415
de0671ee
ZJS
416 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-"UID_FMT".journal",
417 SD_ID128_FORMAT_VAL(machine), uid) < 0)
d025f1e4
ZJS
418 return s->system_journal;
419
43cf8388 420 while (ordered_hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
d025f1e4 421 /* Too many open? Then let's close one */
43cf8388 422 f = ordered_hashmap_steal_first(s->user_journals);
d025f1e4 423 assert(f);
69a3a6fd 424 (void) journal_file_close(f);
d025f1e4
ZJS
425 }
426
266a4700 427 r = open_journal(s, true, p, O_RDWR|O_CREAT, s->seal, &s->system_storage.metrics, &f);
d025f1e4
ZJS
428 if (r < 0)
429 return s->system_journal;
430
5c3bde3f 431 server_add_acls(f, uid);
d025f1e4 432
4a0b58c4 433 r = ordered_hashmap_put(s->user_journals, UID_TO_PTR(uid), f);
d025f1e4 434 if (r < 0) {
69a3a6fd 435 (void) journal_file_close(f);
d025f1e4
ZJS
436 return s->system_journal;
437 }
438
439 return f;
440}
441
ea69bd41
LP
442static int do_rotate(
443 Server *s,
444 JournalFile **f,
445 const char* name,
446 bool seal,
447 uint32_t uid) {
448
fc55baee
ZJS
449 int r;
450 assert(s);
451
452 if (!*f)
453 return -EINVAL;
454
b58c888f 455 r = journal_file_rotate(f, s->compress, seal, s->deferred_closes);
fc55baee
ZJS
456 if (r < 0)
457 if (*f)
ea69bd41 458 log_error_errno(r, "Failed to rotate %s: %m", (*f)->path);
fc55baee 459 else
ea69bd41 460 log_error_errno(r, "Failed to create new %s journal: %m", name);
fc55baee 461 else
5c3bde3f 462 server_add_acls(*f, uid);
2678031a 463
fc55baee
ZJS
464 return r;
465}
466
d025f1e4
ZJS
467void server_rotate(Server *s) {
468 JournalFile *f;
469 void *k;
470 Iterator i;
471 int r;
472
473 log_debug("Rotating...");
474
8580d1f7
LP
475 (void) do_rotate(s, &s->runtime_journal, "runtime", false, 0);
476 (void) do_rotate(s, &s->system_journal, "system", s->seal, 0);
d025f1e4 477
43cf8388 478 ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
4a0b58c4 479 r = do_rotate(s, &f, "user", s->seal, PTR_TO_UID(k));
fc55baee 480 if (r >= 0)
43cf8388 481 ordered_hashmap_replace(s->user_journals, k, f);
fc55baee
ZJS
482 else if (!f)
483 /* Old file has been closed and deallocated */
43cf8388 484 ordered_hashmap_remove(s->user_journals, k);
d025f1e4 485 }
b58c888f
VC
486
487 /* Perform any deferred closes which aren't still offlining. */
488 SET_FOREACH(f, s->deferred_closes, i)
489 if (!journal_file_is_offlining(f)) {
490 (void) set_remove(s->deferred_closes, f);
491 (void) journal_file_close(f);
492 }
d025f1e4
ZJS
493}
494
26687bf8
OS
495void server_sync(Server *s) {
496 JournalFile *f;
26687bf8
OS
497 Iterator i;
498 int r;
499
26687bf8 500 if (s->system_journal) {
ac2e41f5 501 r = journal_file_set_offline(s->system_journal, false);
26687bf8 502 if (r < 0)
65089b82 503 log_warning_errno(r, "Failed to sync system journal, ignoring: %m");
26687bf8
OS
504 }
505
65c1d46b 506 ORDERED_HASHMAP_FOREACH(f, s->user_journals, i) {
ac2e41f5 507 r = journal_file_set_offline(f, false);
26687bf8 508 if (r < 0)
65089b82 509 log_warning_errno(r, "Failed to sync user journal, ignoring: %m");
26687bf8
OS
510 }
511
f9a810be
LP
512 if (s->sync_event_source) {
513 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
514 if (r < 0)
da927ba9 515 log_error_errno(r, "Failed to disable sync timer source: %m");
f9a810be 516 }
26687bf8
OS
517
518 s->sync_scheduled = false;
519}
520
3a19f215 521static void do_vacuum(Server *s, JournalStorage *storage, bool verbose) {
ea69bd41 522
63c8666b
ZJS
523 int r;
524
8580d1f7 525 assert(s);
266a4700 526 assert(storage);
8580d1f7 527
57f443a6 528 (void) cache_space_refresh(s, storage);
18e758bf
FB
529
530 if (verbose)
531 server_space_usage_message(s, storage);
8580d1f7 532
57f443a6
FB
533 r = journal_directory_vacuum(storage->path, storage->space.limit,
534 storage->metrics.n_max_files, s->max_retention_usec,
535 &s->oldest_file_usec, verbose);
63c8666b 536 if (r < 0 && r != -ENOENT)
266a4700
FB
537 log_warning_errno(r, "Failed to vacuum %s, ignoring: %m", storage->path);
538
a0edc477 539 cache_space_invalidate(&storage->space);
63c8666b
ZJS
540}
541
3a19f215 542int server_vacuum(Server *s, bool verbose) {
8580d1f7 543 assert(s);
d025f1e4
ZJS
544
545 log_debug("Vacuuming...");
546
547 s->oldest_file_usec = 0;
548
266a4700 549 if (s->system_journal)
3a19f215 550 do_vacuum(s, &s->system_storage, verbose);
266a4700 551 if (s->runtime_journal)
3a19f215 552 do_vacuum(s, &s->runtime_storage, verbose);
d025f1e4 553
8580d1f7 554 return 0;
d025f1e4
ZJS
555}
556
0c24bb23
LP
557static void server_cache_machine_id(Server *s) {
558 sd_id128_t id;
559 int r;
560
561 assert(s);
562
563 r = sd_id128_get_machine(&id);
564 if (r < 0)
565 return;
566
567 sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
568}
569
570static void server_cache_boot_id(Server *s) {
571 sd_id128_t id;
572 int r;
573
574 assert(s);
575
576 r = sd_id128_get_boot(&id);
577 if (r < 0)
578 return;
579
580 sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
581}
582
583static void server_cache_hostname(Server *s) {
584 _cleanup_free_ char *t = NULL;
585 char *x;
586
587 assert(s);
588
589 t = gethostname_malloc();
590 if (!t)
591 return;
592
593 x = strappend("_HOSTNAME=", t);
594 if (!x)
595 return;
596
597 free(s->hostname_field);
598 s->hostname_field = x;
599}
600
8531ae70 601static bool shall_try_append_again(JournalFile *f, int r) {
6e1045e5 602 switch(r) {
ae739cc1 603
6e1045e5
ZJS
604 case -E2BIG: /* Hit configured limit */
605 case -EFBIG: /* Hit fs limit */
606 case -EDQUOT: /* Quota limit hit */
607 case -ENOSPC: /* Disk full */
d025f1e4 608 log_debug("%s: Allocation limit reached, rotating.", f->path);
6e1045e5 609 return true;
ae739cc1 610
6e1045e5
ZJS
611 case -EIO: /* I/O error of some kind (mmap) */
612 log_warning("%s: IO error, rotating.", f->path);
613 return true;
ae739cc1 614
6e1045e5 615 case -EHOSTDOWN: /* Other machine */
d025f1e4 616 log_info("%s: Journal file from other machine, rotating.", f->path);
6e1045e5 617 return true;
ae739cc1 618
6e1045e5 619 case -EBUSY: /* Unclean shutdown */
d025f1e4 620 log_info("%s: Unclean shutdown, rotating.", f->path);
6e1045e5 621 return true;
ae739cc1 622
6e1045e5 623 case -EPROTONOSUPPORT: /* Unsupported feature */
d025f1e4 624 log_info("%s: Unsupported feature, rotating.", f->path);
6e1045e5 625 return true;
ae739cc1 626
6e1045e5
ZJS
627 case -EBADMSG: /* Corrupted */
628 case -ENODATA: /* Truncated */
629 case -ESHUTDOWN: /* Already archived */
d025f1e4 630 log_warning("%s: Journal file corrupted, rotating.", f->path);
6e1045e5 631 return true;
ae739cc1 632
6e1045e5 633 case -EIDRM: /* Journal file has been deleted */
2678031a 634 log_warning("%s: Journal file has been deleted, rotating.", f->path);
6e1045e5 635 return true;
ae739cc1
LP
636
637 case -ETXTBSY: /* Journal file is from the future */
c1a9199e 638 log_warning("%s: Journal file is from the future, rotating.", f->path);
ae739cc1
LP
639 return true;
640
6e1045e5 641 default:
d025f1e4 642 return false;
6e1045e5 643 }
d025f1e4
ZJS
644}
645
d07f7b9e 646static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
7c070017 647 bool vacuumed = false, rotate = false;
0f972d66 648 struct dual_timestamp ts;
d025f1e4 649 JournalFile *f;
d025f1e4
ZJS
650 int r;
651
652 assert(s);
653 assert(iovec);
654 assert(n > 0);
655
0f972d66
LP
656 /* Get the closest, linearized time we have for this log event from the event loop. (Note that we do not use
657 * the source time, and not even the time the event was originally seen, but instead simply the time we started
658 * processing it, as we want strictly linear ordering in what we write out.) */
659 assert_se(sd_event_now(s->event, CLOCK_REALTIME, &ts.realtime) >= 0);
660 assert_se(sd_event_now(s->event, CLOCK_MONOTONIC, &ts.monotonic) >= 0);
661
7c070017
LP
662 if (ts.realtime < s->last_realtime_clock) {
663 /* When the time jumps backwards, let's immediately rotate. Of course, this should not happen during
664 * regular operation. However, when it does happen, then we should make sure that we start fresh files
665 * to ensure that the entries in the journal files are strictly ordered by time, in order to ensure
666 * bisection works correctly. */
d025f1e4 667
7c070017
LP
668 log_debug("Time jumped backwards, rotating.");
669 rotate = true;
670 } else {
671
672 f = find_journal(s, uid);
673 if (!f)
674 return;
675
676 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
677 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
678 rotate = true;
679 }
680 }
d025f1e4 681
7c070017 682 if (rotate) {
d025f1e4 683 server_rotate(s);
3a19f215 684 server_vacuum(s, false);
d025f1e4
ZJS
685 vacuumed = true;
686
687 f = find_journal(s, uid);
688 if (!f)
689 return;
690 }
691
7c070017
LP
692 s->last_realtime_clock = ts.realtime;
693
0f972d66 694 r = journal_file_append_entry(f, &ts, iovec, n, &s->seqnum, NULL, NULL);
26687bf8 695 if (r >= 0) {
d07f7b9e 696 server_schedule_sync(s, priority);
d025f1e4 697 return;
26687bf8 698 }
d025f1e4
ZJS
699
700 if (vacuumed || !shall_try_append_again(f, r)) {
8266e1c0 701 log_error_errno(r, "Failed to write entry (%d items, %zu bytes), ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
d025f1e4
ZJS
702 return;
703 }
704
705 server_rotate(s);
3a19f215 706 server_vacuum(s, false);
d025f1e4
ZJS
707
708 f = find_journal(s, uid);
709 if (!f)
710 return;
711
712 log_debug("Retrying write.");
0f972d66 713 r = journal_file_append_entry(f, &ts, iovec, n, &s->seqnum, NULL, NULL);
8266e1c0
LP
714 if (r < 0)
715 log_error_errno(r, "Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
716 else
d07f7b9e 717 server_schedule_sync(s, priority);
d025f1e4
ZJS
718}
719
4b58153d
LP
720static int get_invocation_id(const char *cgroup_root, const char *slice, const char *unit, char **ret) {
721 _cleanup_free_ char *escaped = NULL, *slice_path = NULL, *p = NULL;
722 char *copy, ids[SD_ID128_STRING_MAX];
723 int r;
724
725 /* Read the invocation ID of a unit off a unit. It's stored in the "trusted.invocation_id" extended attribute
726 * on the cgroup path. */
727
728 r = cg_slice_to_path(slice, &slice_path);
729 if (r < 0)
730 return r;
731
732 escaped = cg_escape(unit);
733 if (!escaped)
734 return -ENOMEM;
735
736 p = strjoin(cgroup_root, "/", slice_path, "/", escaped, NULL);
737 if (!p)
738 return -ENOMEM;
739
740 r = cg_get_xattr(SYSTEMD_CGROUP_CONTROLLER, p, "trusted.invocation_id", ids, 32);
741 if (r < 0)
742 return r;
743 if (r != 32)
744 return -EINVAL;
745 ids[32] = 0;
746
747 if (!id128_is_valid(ids))
748 return -EINVAL;
749
750 copy = strdup(ids);
751 if (!copy)
752 return -ENOMEM;
753
754 *ret = copy;
755 return 0;
756}
757
d025f1e4
ZJS
758static void dispatch_message_real(
759 Server *s,
760 struct iovec *iovec, unsigned n, unsigned m,
3b3154df
LP
761 const struct ucred *ucred,
762 const struct timeval *tv,
d025f1e4 763 const char *label, size_t label_len,
968f3196 764 const char *unit_id,
d07f7b9e 765 int priority,
968f3196 766 pid_t object_pid) {
d025f1e4 767
968f3196 768 char pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
ae018d9b
LP
769 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
770 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
771 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
d3789917 772 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
968f3196
ZJS
773 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
774 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
775 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
776 uid_t object_uid;
777 gid_t object_gid;
968f3196 778 char *x;
d025f1e4 779 int r;
ae018d9b 780 char *t, *c;
82499507
LP
781 uid_t realuid = 0, owner = 0, journal_uid;
782 bool owner_valid = false;
ae018d9b 783#ifdef HAVE_AUDIT
968f3196
ZJS
784 char audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
785 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
786 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
787 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
ae018d9b
LP
788
789 uint32_t audit;
790 uid_t loginuid;
791#endif
d025f1e4
ZJS
792
793 assert(s);
794 assert(iovec);
795 assert(n > 0);
d473176a 796 assert(n + N_IOVEC_META_FIELDS + (object_pid > 0 ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
d025f1e4
ZJS
797
798 if (ucred) {
d025f1e4
ZJS
799 realuid = ucred->uid;
800
de0671ee 801 sprintf(pid, "_PID="PID_FMT, ucred->pid);
c2457105 802 IOVEC_SET_STRING(iovec[n++], pid);
d025f1e4 803
de0671ee 804 sprintf(uid, "_UID="UID_FMT, ucred->uid);
c2457105 805 IOVEC_SET_STRING(iovec[n++], uid);
d025f1e4 806
de0671ee 807 sprintf(gid, "_GID="GID_FMT, ucred->gid);
c2457105 808 IOVEC_SET_STRING(iovec[n++], gid);
d025f1e4
ZJS
809
810 r = get_process_comm(ucred->pid, &t);
811 if (r >= 0) {
63c372cb 812 x = strjoina("_COMM=", t);
d025f1e4 813 free(t);
968f3196 814 IOVEC_SET_STRING(iovec[n++], x);
d025f1e4
ZJS
815 }
816
817 r = get_process_exe(ucred->pid, &t);
818 if (r >= 0) {
63c372cb 819 x = strjoina("_EXE=", t);
d025f1e4 820 free(t);
968f3196 821 IOVEC_SET_STRING(iovec[n++], x);
d025f1e4
ZJS
822 }
823
9bdbc2e2 824 r = get_process_cmdline(ucred->pid, 0, false, &t);
d025f1e4 825 if (r >= 0) {
63c372cb 826 x = strjoina("_CMDLINE=", t);
d025f1e4 827 free(t);
3a832116
SL
828 IOVEC_SET_STRING(iovec[n++], x);
829 }
830
831 r = get_process_capeff(ucred->pid, &t);
832 if (r >= 0) {
63c372cb 833 x = strjoina("_CAP_EFFECTIVE=", t);
3a832116 834 free(t);
968f3196 835 IOVEC_SET_STRING(iovec[n++], x);
d025f1e4
ZJS
836 }
837
0a20e3c1 838#ifdef HAVE_AUDIT
d025f1e4 839 r = audit_session_from_pid(ucred->pid, &audit);
ae018d9b 840 if (r >= 0) {
de0671ee 841 sprintf(audit_session, "_AUDIT_SESSION=%"PRIu32, audit);
ae018d9b
LP
842 IOVEC_SET_STRING(iovec[n++], audit_session);
843 }
d025f1e4
ZJS
844
845 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
7027ff61 846 if (r >= 0) {
de0671ee 847 sprintf(audit_loginuid, "_AUDIT_LOGINUID="UID_FMT, loginuid);
ae018d9b 848 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
d025f1e4 849 }
ae018d9b 850#endif
d025f1e4 851
e9174f29 852 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &c);
7027ff61 853 if (r >= 0) {
4b58153d 854 _cleanup_free_ char *raw_unit = NULL, *raw_slice = NULL;
968f3196
ZJS
855 char *session = NULL;
856
63c372cb 857 x = strjoina("_SYSTEMD_CGROUP=", c);
968f3196 858 IOVEC_SET_STRING(iovec[n++], x);
d025f1e4 859
ae018d9b
LP
860 r = cg_path_get_session(c, &t);
861 if (r >= 0) {
63c372cb 862 session = strjoina("_SYSTEMD_SESSION=", t);
ae018d9b 863 free(t);
d025f1e4 864 IOVEC_SET_STRING(iovec[n++], session);
ae018d9b
LP
865 }
866
867 if (cg_path_get_owner_uid(c, &owner) >= 0) {
868 owner_valid = true;
d025f1e4 869
de0671ee 870 sprintf(owner_uid, "_SYSTEMD_OWNER_UID="UID_FMT, owner);
d025f1e4 871 IOVEC_SET_STRING(iovec[n++], owner_uid);
ae018d9b 872 }
d025f1e4 873
4b58153d
LP
874 if (cg_path_get_unit(c, &raw_unit) >= 0) {
875 x = strjoina("_SYSTEMD_UNIT=", raw_unit);
19cace37
LP
876 IOVEC_SET_STRING(iovec[n++], x);
877 } else if (unit_id && !session) {
63c372cb 878 x = strjoina("_SYSTEMD_UNIT=", unit_id);
19cace37
LP
879 IOVEC_SET_STRING(iovec[n++], x);
880 }
881
882 if (cg_path_get_user_unit(c, &t) >= 0) {
63c372cb 883 x = strjoina("_SYSTEMD_USER_UNIT=", t);
ae018d9b 884 free(t);
968f3196 885 IOVEC_SET_STRING(iovec[n++], x);
19cace37 886 } else if (unit_id && session) {
63c372cb 887 x = strjoina("_SYSTEMD_USER_UNIT=", unit_id);
19cace37
LP
888 IOVEC_SET_STRING(iovec[n++], x);
889 }
ae018d9b 890
4b58153d
LP
891 if (cg_path_get_slice(c, &raw_slice) >= 0) {
892 x = strjoina("_SYSTEMD_SLICE=", raw_slice);
0a244b8e
LP
893 IOVEC_SET_STRING(iovec[n++], x);
894 }
895
d473176a
LP
896 if (cg_path_get_user_slice(c, &t) >= 0) {
897 x = strjoina("_SYSTEMD_USER_SLICE=", t);
898 free(t);
899 IOVEC_SET_STRING(iovec[n++], x);
900 }
901
4b58153d
LP
902 if (raw_slice && raw_unit) {
903 if (get_invocation_id(s->cgroup_root, raw_slice, raw_unit, &t) >= 0) {
904 x = strjoina("_SYSTEMD_INVOCATION_ID=", t);
905 free(t);
906 IOVEC_SET_STRING(iovec[n++], x);
907 }
908 }
909
ae018d9b 910 free(c);
2d43b190 911 } else if (unit_id) {
63c372cb 912 x = strjoina("_SYSTEMD_UNIT=", unit_id);
2d43b190 913 IOVEC_SET_STRING(iovec[n++], x);
ef1673d1 914 }
d025f1e4 915
d025f1e4 916#ifdef HAVE_SELINUX
6355e756 917 if (mac_selinux_have()) {
d682b3a7 918 if (label) {
f8294e41 919 x = alloca(strlen("_SELINUX_CONTEXT=") + label_len + 1);
ae018d9b 920
d682b3a7
LP
921 *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
922 IOVEC_SET_STRING(iovec[n++], x);
923 } else {
2ed96880 924 char *con;
d025f1e4 925
d682b3a7 926 if (getpidcon(ucred->pid, &con) >= 0) {
63c372cb 927 x = strjoina("_SELINUX_CONTEXT=", con);
e7ff4e7f 928
d682b3a7
LP
929 freecon(con);
930 IOVEC_SET_STRING(iovec[n++], x);
931 }
d025f1e4
ZJS
932 }
933 }
934#endif
935 }
968f3196
ZJS
936 assert(n <= m);
937
938 if (object_pid) {
939 r = get_process_uid(object_pid, &object_uid);
940 if (r >= 0) {
de0671ee 941 sprintf(o_uid, "OBJECT_UID="UID_FMT, object_uid);
968f3196
ZJS
942 IOVEC_SET_STRING(iovec[n++], o_uid);
943 }
944
945 r = get_process_gid(object_pid, &object_gid);
946 if (r >= 0) {
de0671ee 947 sprintf(o_gid, "OBJECT_GID="GID_FMT, object_gid);
968f3196
ZJS
948 IOVEC_SET_STRING(iovec[n++], o_gid);
949 }
950
951 r = get_process_comm(object_pid, &t);
952 if (r >= 0) {
63c372cb 953 x = strjoina("OBJECT_COMM=", t);
968f3196
ZJS
954 free(t);
955 IOVEC_SET_STRING(iovec[n++], x);
956 }
957
958 r = get_process_exe(object_pid, &t);
959 if (r >= 0) {
63c372cb 960 x = strjoina("OBJECT_EXE=", t);
968f3196
ZJS
961 free(t);
962 IOVEC_SET_STRING(iovec[n++], x);
963 }
964
965 r = get_process_cmdline(object_pid, 0, false, &t);
966 if (r >= 0) {
63c372cb 967 x = strjoina("OBJECT_CMDLINE=", t);
968f3196
ZJS
968 free(t);
969 IOVEC_SET_STRING(iovec[n++], x);
970 }
971
972#ifdef HAVE_AUDIT
973 r = audit_session_from_pid(object_pid, &audit);
974 if (r >= 0) {
de0671ee 975 sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%"PRIu32, audit);
968f3196
ZJS
976 IOVEC_SET_STRING(iovec[n++], o_audit_session);
977 }
978
979 r = audit_loginuid_from_pid(object_pid, &loginuid);
980 if (r >= 0) {
de0671ee 981 sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID="UID_FMT, loginuid);
968f3196
ZJS
982 IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
983 }
984#endif
985
e9174f29 986 r = cg_pid_get_path_shifted(object_pid, s->cgroup_root, &c);
968f3196 987 if (r >= 0) {
63c372cb 988 x = strjoina("OBJECT_SYSTEMD_CGROUP=", c);
968f3196
ZJS
989 IOVEC_SET_STRING(iovec[n++], x);
990
991 r = cg_path_get_session(c, &t);
992 if (r >= 0) {
63c372cb 993 x = strjoina("OBJECT_SYSTEMD_SESSION=", t);
968f3196
ZJS
994 free(t);
995 IOVEC_SET_STRING(iovec[n++], x);
996 }
997
998 if (cg_path_get_owner_uid(c, &owner) >= 0) {
de0671ee 999 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID="UID_FMT, owner);
968f3196
ZJS
1000 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
1001 }
1002
1003 if (cg_path_get_unit(c, &t) >= 0) {
63c372cb 1004 x = strjoina("OBJECT_SYSTEMD_UNIT=", t);
968f3196 1005 free(t);
19cace37
LP
1006 IOVEC_SET_STRING(iovec[n++], x);
1007 }
1008
1009 if (cg_path_get_user_unit(c, &t) >= 0) {
63c372cb 1010 x = strjoina("OBJECT_SYSTEMD_USER_UNIT=", t);
968f3196 1011 free(t);
968f3196 1012 IOVEC_SET_STRING(iovec[n++], x);
19cace37 1013 }
968f3196 1014
d473176a
LP
1015 if (cg_path_get_slice(c, &t) >= 0) {
1016 x = strjoina("OBJECT_SYSTEMD_SLICE=", t);
1017 free(t);
1018 IOVEC_SET_STRING(iovec[n++], x);
1019 }
1020
1021 if (cg_path_get_user_slice(c, &t) >= 0) {
1022 x = strjoina("OBJECT_SYSTEMD_USER_SLICE=", t);
1023 free(t);
1024 IOVEC_SET_STRING(iovec[n++], x);
1025 }
1026
968f3196
ZJS
1027 free(c);
1028 }
1029 }
1030 assert(n <= m);
d025f1e4
ZJS
1031
1032 if (tv) {
398a50cd 1033 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=" USEC_FMT, timeval_load(tv));
a5693989 1034 IOVEC_SET_STRING(iovec[n++], source_time);
d025f1e4
ZJS
1035 }
1036
1037 /* Note that strictly speaking storing the boot id here is
1038 * redundant since the entry includes this in-line
1039 * anyway. However, we need this indexed, too. */
0c24bb23
LP
1040 if (!isempty(s->boot_id_field))
1041 IOVEC_SET_STRING(iovec[n++], s->boot_id_field);
d025f1e4 1042
0c24bb23
LP
1043 if (!isempty(s->machine_id_field))
1044 IOVEC_SET_STRING(iovec[n++], s->machine_id_field);
d025f1e4 1045
0c24bb23
LP
1046 if (!isempty(s->hostname_field))
1047 IOVEC_SET_STRING(iovec[n++], s->hostname_field);
d025f1e4
ZJS
1048
1049 assert(n <= m);
1050
da499392 1051 if (s->split_mode == SPLIT_UID && realuid > 0)
40adcda8 1052 /* Split up strictly by any UID */
759c945a 1053 journal_uid = realuid;
82499507 1054 else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
edc3797f
LP
1055 /* Split up by login UIDs. We do this only if the
1056 * realuid is not root, in order not to accidentally
1057 * leak privileged information to the user that is
1058 * logged by a privileged process that is part of an
7517e174 1059 * unprivileged session. */
8a0889df 1060 journal_uid = owner;
da499392
KS
1061 else
1062 journal_uid = 0;
759c945a 1063
d07f7b9e 1064 write_to_journal(s, journal_uid, iovec, n, priority);
d025f1e4
ZJS
1065}
1066
1067void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
1068 char mid[11 + 32 + 1];
8a03c9ef
ZJS
1069 struct iovec iovec[N_IOVEC_META_FIELDS + 5 + N_IOVEC_PAYLOAD_FIELDS];
1070 unsigned n = 0, m;
32917e33 1071 int r;
d025f1e4 1072 va_list ap;
b92bea5d 1073 struct ucred ucred = {};
d025f1e4
ZJS
1074
1075 assert(s);
1076 assert(format);
1077
4850d39a 1078 assert_cc(3 == LOG_FAC(LOG_DAEMON));
b6fa2555
EV
1079 IOVEC_SET_STRING(iovec[n++], "SYSLOG_FACILITY=3");
1080 IOVEC_SET_STRING(iovec[n++], "SYSLOG_IDENTIFIER=systemd-journald");
1081
d025f1e4 1082 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
4850d39a 1083 assert_cc(6 == LOG_INFO);
32917e33 1084 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
d025f1e4 1085
3bbaff3e 1086 if (!sd_id128_is_null(message_id)) {
e2cc6eca 1087 snprintf(mid, sizeof(mid), LOG_MESSAGE_ID(message_id));
d025f1e4
ZJS
1088 IOVEC_SET_STRING(iovec[n++], mid);
1089 }
1090
8a03c9ef
ZJS
1091 m = n;
1092
1093 va_start(ap, format);
32917e33
ZJS
1094 r = log_format_iovec(iovec, ELEMENTSOF(iovec), &n, false, 0, format, ap);
1095 /* Error handling below */
8a03c9ef
ZJS
1096 va_end(ap);
1097
d025f1e4
ZJS
1098 ucred.pid = getpid();
1099 ucred.uid = getuid();
1100 ucred.gid = getgid();
1101
32917e33
ZJS
1102 if (r >= 0)
1103 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
8a03c9ef
ZJS
1104
1105 while (m < n)
1106 free(iovec[m++].iov_base);
32917e33
ZJS
1107
1108 if (r < 0) {
1109 /* We failed to format the message. Emit a warning instead. */
1110 char buf[LINE_MAX];
1111
1112 xsprintf(buf, "MESSAGE=Entry printing failed: %s", strerror(-r));
1113
1114 n = 3;
1115 IOVEC_SET_STRING(iovec[n++], "PRIORITY=4");
1116 IOVEC_SET_STRING(iovec[n++], buf);
1117 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
1118 }
d025f1e4
ZJS
1119}
1120
1121void server_dispatch_message(
1122 Server *s,
1123 struct iovec *iovec, unsigned n, unsigned m,
3b3154df
LP
1124 const struct ucred *ucred,
1125 const struct timeval *tv,
d025f1e4
ZJS
1126 const char *label, size_t label_len,
1127 const char *unit_id,
968f3196
ZJS
1128 int priority,
1129 pid_t object_pid) {
d025f1e4 1130
7027ff61 1131 int rl, r;
7fd1b19b 1132 _cleanup_free_ char *path = NULL;
8580d1f7 1133 uint64_t available = 0;
db91ea32 1134 char *c;
d025f1e4
ZJS
1135
1136 assert(s);
1137 assert(iovec || n == 0);
1138
1139 if (n == 0)
1140 return;
1141
1142 if (LOG_PRI(priority) > s->max_level_store)
1143 return;
1144
2f5df74a
HHPF
1145 /* Stop early in case the information will not be stored
1146 * in a journal. */
1147 if (s->storage == STORAGE_NONE)
1148 return;
1149
d025f1e4
ZJS
1150 if (!ucred)
1151 goto finish;
1152
e9174f29 1153 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &path);
7027ff61 1154 if (r < 0)
d025f1e4
ZJS
1155 goto finish;
1156
1157 /* example: /user/lennart/3/foobar
1158 * /system/dbus.service/foobar
1159 *
1160 * So let's cut of everything past the third /, since that is
1161 * where user directories start */
1162
1163 c = strchr(path, '/');
1164 if (c) {
1165 c = strchr(c+1, '/');
1166 if (c) {
1167 c = strchr(c+1, '/');
1168 if (c)
1169 *c = 0;
1170 }
1171 }
1172
3a19f215 1173 (void) determine_space(s, &available, NULL);
8580d1f7 1174 rl = journal_rate_limit_test(s->rate_limit, path, priority & LOG_PRIMASK, available);
db91ea32 1175 if (rl == 0)
d025f1e4 1176 return;
d025f1e4
ZJS
1177
1178 /* Write a suppression message if we suppressed something */
1179 if (rl > 1)
db91ea32 1180 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
8a03c9ef
ZJS
1181 LOG_MESSAGE("Suppressed %u messages from %s", rl - 1, path),
1182 NULL);
d025f1e4
ZJS
1183
1184finish:
d07f7b9e 1185 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, priority, object_pid);
d025f1e4
ZJS
1186}
1187
d025f1e4 1188int server_flush_to_var(Server *s) {
d025f1e4
ZJS
1189 sd_id128_t machine;
1190 sd_journal *j = NULL;
fbb63411
LP
1191 char ts[FORMAT_TIMESPAN_MAX];
1192 usec_t start;
1193 unsigned n = 0;
1194 int r;
d025f1e4
ZJS
1195
1196 assert(s);
1197
1198 if (s->storage != STORAGE_AUTO &&
1199 s->storage != STORAGE_PERSISTENT)
1200 return 0;
1201
1202 if (!s->runtime_journal)
1203 return 0;
1204
8580d1f7 1205 (void) system_journal_open(s, true);
d025f1e4
ZJS
1206
1207 if (!s->system_journal)
1208 return 0;
1209
1210 log_debug("Flushing to /var...");
1211
fbb63411
LP
1212 start = now(CLOCK_MONOTONIC);
1213
d025f1e4 1214 r = sd_id128_get_machine(&machine);
00a16861 1215 if (r < 0)
d025f1e4 1216 return r;
d025f1e4
ZJS
1217
1218 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
23bbb0de
MS
1219 if (r < 0)
1220 return log_error_errno(r, "Failed to read runtime journal: %m");
d025f1e4 1221
93b73b06
LP
1222 sd_journal_set_data_threshold(j, 0);
1223
d025f1e4
ZJS
1224 SD_JOURNAL_FOREACH(j) {
1225 Object *o = NULL;
1226 JournalFile *f;
1227
1228 f = j->current_file;
1229 assert(f && f->current_offset > 0);
1230
fbb63411
LP
1231 n++;
1232
d025f1e4
ZJS
1233 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1234 if (r < 0) {
da927ba9 1235 log_error_errno(r, "Can't read entry: %m");
d025f1e4
ZJS
1236 goto finish;
1237 }
1238
1239 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1240 if (r >= 0)
1241 continue;
1242
1243 if (!shall_try_append_again(s->system_journal, r)) {
da927ba9 1244 log_error_errno(r, "Can't write entry: %m");
d025f1e4
ZJS
1245 goto finish;
1246 }
1247
1248 server_rotate(s);
3a19f215 1249 server_vacuum(s, false);
d025f1e4 1250
253f59df
LP
1251 if (!s->system_journal) {
1252 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1253 r = -EIO;
1254 goto finish;
1255 }
1256
d025f1e4
ZJS
1257 log_debug("Retrying write.");
1258 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1259 if (r < 0) {
da927ba9 1260 log_error_errno(r, "Can't write entry: %m");
d025f1e4
ZJS
1261 goto finish;
1262 }
1263 }
1264
804ae586
LP
1265 r = 0;
1266
d025f1e4
ZJS
1267finish:
1268 journal_file_post_change(s->system_journal);
1269
804ae586 1270 s->runtime_journal = journal_file_close(s->runtime_journal);
d025f1e4
ZJS
1271
1272 if (r >= 0)
c6878637 1273 (void) rm_rf("/run/log/journal", REMOVE_ROOT);
d025f1e4 1274
763c7aa2 1275 sd_journal_close(j);
d025f1e4 1276
8a03c9ef
ZJS
1277 server_driver_message(s, SD_ID128_NULL,
1278 LOG_MESSAGE("Time spent on flushing to /var is %s for %u entries.",
1279 format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0),
1280 n),
1281 NULL);
fbb63411 1282
d025f1e4
ZJS
1283 return r;
1284}
1285
8531ae70 1286int server_process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
f9a810be 1287 Server *s = userdata;
a315ac4e
LP
1288 struct ucred *ucred = NULL;
1289 struct timeval *tv = NULL;
1290 struct cmsghdr *cmsg;
1291 char *label = NULL;
1292 size_t label_len = 0, m;
1293 struct iovec iovec;
1294 ssize_t n;
1295 int *fds = NULL, v = 0;
1296 unsigned n_fds = 0;
1297
1298 union {
1299 struct cmsghdr cmsghdr;
1300
1301 /* We use NAME_MAX space for the SELinux label
1302 * here. The kernel currently enforces no
1303 * limit, but according to suggestions from
1304 * the SELinux people this will change and it
1305 * will probably be identical to NAME_MAX. For
1306 * now we use that, but this should be updated
1307 * one day when the final limit is known. */
1308 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1309 CMSG_SPACE(sizeof(struct timeval)) +
1310 CMSG_SPACE(sizeof(int)) + /* fd */
1311 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1312 } control = {};
1313
1314 union sockaddr_union sa = {};
1315
1316 struct msghdr msghdr = {
1317 .msg_iov = &iovec,
1318 .msg_iovlen = 1,
1319 .msg_control = &control,
1320 .msg_controllen = sizeof(control),
1321 .msg_name = &sa,
1322 .msg_namelen = sizeof(sa),
1323 };
f9a810be 1324
d025f1e4 1325 assert(s);
875c2e22 1326 assert(fd == s->native_fd || fd == s->syslog_fd || fd == s->audit_fd);
f9a810be
LP
1327
1328 if (revents != EPOLLIN) {
1329 log_error("Got invalid event from epoll for datagram fd: %"PRIx32, revents);
1330 return -EIO;
1331 }
1332
a315ac4e
LP
1333 /* Try to get the right size, if we can. (Not all
1334 * sockets support SIOCINQ, hence we just try, but
1335 * don't rely on it. */
1336 (void) ioctl(fd, SIOCINQ, &v);
d025f1e4 1337
a315ac4e
LP
1338 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1339 m = PAGE_ALIGN(MAX3((size_t) v + 1,
1340 (size_t) LINE_MAX,
1341 ALIGN(sizeof(struct nlmsghdr)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH)) + 1);
d025f1e4 1342
a315ac4e
LP
1343 if (!GREEDY_REALLOC(s->buffer, s->buffer_size, m))
1344 return log_oom();
875c2e22 1345
a315ac4e
LP
1346 iovec.iov_base = s->buffer;
1347 iovec.iov_len = s->buffer_size - 1; /* Leave room for trailing NUL we add later */
d025f1e4 1348
a315ac4e
LP
1349 n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1350 if (n < 0) {
1351 if (errno == EINTR || errno == EAGAIN)
1352 return 0;
875c2e22 1353
a315ac4e
LP
1354 return log_error_errno(errno, "recvmsg() failed: %m");
1355 }
875c2e22 1356
a315ac4e
LP
1357 CMSG_FOREACH(cmsg, &msghdr) {
1358
1359 if (cmsg->cmsg_level == SOL_SOCKET &&
1360 cmsg->cmsg_type == SCM_CREDENTIALS &&
1361 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1362 ucred = (struct ucred*) CMSG_DATA(cmsg);
1363 else if (cmsg->cmsg_level == SOL_SOCKET &&
1364 cmsg->cmsg_type == SCM_SECURITY) {
1365 label = (char*) CMSG_DATA(cmsg);
1366 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1367 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1368 cmsg->cmsg_type == SO_TIMESTAMP &&
1369 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1370 tv = (struct timeval*) CMSG_DATA(cmsg);
1371 else if (cmsg->cmsg_level == SOL_SOCKET &&
1372 cmsg->cmsg_type == SCM_RIGHTS) {
1373 fds = (int*) CMSG_DATA(cmsg);
1374 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
d025f1e4 1375 }
a315ac4e 1376 }
d025f1e4 1377
a315ac4e
LP
1378 /* And a trailing NUL, just in case */
1379 s->buffer[n] = 0;
1380
1381 if (fd == s->syslog_fd) {
1382 if (n > 0 && n_fds == 0)
1383 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1384 else if (n_fds > 0)
1385 log_warning("Got file descriptors via syslog socket. Ignoring.");
1386
1387 } else if (fd == s->native_fd) {
1388 if (n > 0 && n_fds == 0)
1389 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1390 else if (n == 0 && n_fds == 1)
1391 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1392 else if (n_fds > 0)
1393 log_warning("Got too many file descriptors via native socket. Ignoring.");
1394
1395 } else {
1396 assert(fd == s->audit_fd);
1397
1398 if (n > 0 && n_fds == 0)
1399 server_process_audit_message(s, s->buffer, n, ucred, &sa, msghdr.msg_namelen);
1400 else if (n_fds > 0)
1401 log_warning("Got file descriptors via audit socket. Ignoring.");
f9a810be 1402 }
a315ac4e
LP
1403
1404 close_many(fds, n_fds);
1405 return 0;
f9a810be 1406}
d025f1e4 1407
f9a810be
LP
1408static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1409 Server *s = userdata;
33d52ab9 1410 int r;
d025f1e4 1411
f9a810be 1412 assert(s);
d025f1e4 1413
94b65516 1414 log_info("Received request to flush runtime journal from PID " PID_FMT, si->ssi_pid);
d025f1e4 1415
929eeb54 1416 (void) server_flush_to_var(s);
f9a810be 1417 server_sync(s);
3a19f215 1418 server_vacuum(s, false);
d025f1e4 1419
33d52ab9
LP
1420 r = touch("/run/systemd/journal/flushed");
1421 if (r < 0)
1422 log_warning_errno(r, "Failed to touch /run/systemd/journal/flushed, ignoring: %m");
74055aa7 1423
18e758bf 1424 server_space_usage_message(s, NULL);
f9a810be
LP
1425 return 0;
1426}
d025f1e4 1427
f9a810be
LP
1428static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1429 Server *s = userdata;
33d52ab9 1430 int r;
d025f1e4 1431
f9a810be 1432 assert(s);
d025f1e4 1433
94b65516 1434 log_info("Received request to rotate journal from PID " PID_FMT, si->ssi_pid);
f9a810be 1435 server_rotate(s);
3a19f215
FB
1436 server_vacuum(s, true);
1437
1438 if (s->system_journal)
1439 patch_min_use(&s->system_storage);
1440 if (s->runtime_journal)
1441 patch_min_use(&s->runtime_storage);
d025f1e4 1442
dbd6e31c 1443 /* Let clients know when the most recent rotation happened. */
33d52ab9
LP
1444 r = write_timestamp_file_atomic("/run/systemd/journal/rotated", now(CLOCK_MONOTONIC));
1445 if (r < 0)
1446 log_warning_errno(r, "Failed to write /run/systemd/journal/rotated, ignoring: %m");
dbd6e31c 1447
f9a810be
LP
1448 return 0;
1449}
d025f1e4 1450
f9a810be
LP
1451static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1452 Server *s = userdata;
d025f1e4 1453
f9a810be 1454 assert(s);
d025f1e4 1455
4daf54a8 1456 log_received_signal(LOG_INFO, si);
d025f1e4 1457
6203e07a 1458 sd_event_exit(s->event, 0);
d025f1e4
ZJS
1459 return 0;
1460}
1461
94b65516
LP
1462static int dispatch_sigrtmin1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1463 Server *s = userdata;
33d52ab9 1464 int r;
94b65516
LP
1465
1466 assert(s);
1467
1468 log_debug("Received request to sync from PID " PID_FMT, si->ssi_pid);
1469
1470 server_sync(s);
1471
1472 /* Let clients know when the most recent sync happened. */
33d52ab9
LP
1473 r = write_timestamp_file_atomic("/run/systemd/journal/synced", now(CLOCK_MONOTONIC));
1474 if (r < 0)
1475 log_warning_errno(r, "Failed to write /run/systemd/journal/synced, ignoring: %m");
94b65516
LP
1476
1477 return 0;
1478}
1479
f9a810be 1480static int setup_signals(Server *s) {
f9a810be 1481 int r;
d025f1e4
ZJS
1482
1483 assert(s);
1484
94b65516 1485 assert(sigprocmask_many(SIG_SETMASK, NULL, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, SIGRTMIN+1, -1) >= 0);
d025f1e4 1486
151b9b96 1487 r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
f9a810be
LP
1488 if (r < 0)
1489 return r;
1490
151b9b96 1491 r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s);
f9a810be
LP
1492 if (r < 0)
1493 return r;
d025f1e4 1494
151b9b96 1495 r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s);
f9a810be
LP
1496 if (r < 0)
1497 return r;
d025f1e4 1498
b374689c
LP
1499 /* Let's process SIGTERM late, so that we flush all queued
1500 * messages to disk before we exit */
1501 r = sd_event_source_set_priority(s->sigterm_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1502 if (r < 0)
1503 return r;
1504
1505 /* When journald is invoked on the terminal (when debugging),
1506 * it's useful if C-c is handled equivalent to SIGTERM. */
151b9b96 1507 r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s);
f9a810be
LP
1508 if (r < 0)
1509 return r;
d025f1e4 1510
b374689c
LP
1511 r = sd_event_source_set_priority(s->sigint_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1512 if (r < 0)
1513 return r;
1514
94b65516
LP
1515 /* SIGRTMIN+1 causes an immediate sync. We process this very
1516 * late, so that everything else queued at this point is
1517 * really written to disk. Clients can watch
1518 * /run/systemd/journal/synced with inotify until its mtime
1519 * changes to see when a sync happened. */
1520 r = sd_event_add_signal(s->event, &s->sigrtmin1_event_source, SIGRTMIN+1, dispatch_sigrtmin1, s);
1521 if (r < 0)
1522 return r;
1523
1524 r = sd_event_source_set_priority(s->sigrtmin1_event_source, SD_EVENT_PRIORITY_NORMAL+15);
1525 if (r < 0)
1526 return r;
1527
d025f1e4
ZJS
1528 return 0;
1529}
1530
5707ecf3
ZJS
1531static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
1532 Server *s = data;
74df0fca 1533 int r;
d025f1e4 1534
5707ecf3 1535 assert(s);
d025f1e4 1536
5707ecf3
ZJS
1537 if (streq(key, "systemd.journald.forward_to_syslog")) {
1538 r = value ? parse_boolean(value) : true;
d581d9d9 1539 if (r < 0)
5707ecf3
ZJS
1540 log_warning("Failed to parse forward to syslog switch \"%s\". Ignoring.", value);
1541 else
1542 s->forward_to_syslog = r;
1543 } else if (streq(key, "systemd.journald.forward_to_kmsg")) {
1544 r = value ? parse_boolean(value) : true;
1545 if (r < 0)
1546 log_warning("Failed to parse forward to kmsg switch \"%s\". Ignoring.", value);
1547 else
1548 s->forward_to_kmsg = r;
1549 } else if (streq(key, "systemd.journald.forward_to_console")) {
1550 r = value ? parse_boolean(value) : true;
1551 if (r < 0)
1552 log_warning("Failed to parse forward to console switch \"%s\". Ignoring.", value);
1553 else
1554 s->forward_to_console = r;
1555 } else if (streq(key, "systemd.journald.forward_to_wall")) {
1556 r = value ? parse_boolean(value) : true;
1557 if (r < 0)
1558 log_warning("Failed to parse forward to wall switch \"%s\". Ignoring.", value);
1559 else
1560 s->forward_to_wall = r;
1561 } else if (streq(key, "systemd.journald.max_level_console") && value) {
1562 r = log_level_from_string(value);
1563 if (r < 0)
1564 log_warning("Failed to parse max level console value \"%s\". Ignoring.", value);
1565 else
1566 s->max_level_console = r;
1567 } else if (streq(key, "systemd.journald.max_level_store") && value) {
1568 r = log_level_from_string(value);
1569 if (r < 0)
1570 log_warning("Failed to parse max level store value \"%s\". Ignoring.", value);
1571 else
1572 s->max_level_store = r;
1573 } else if (streq(key, "systemd.journald.max_level_syslog") && value) {
1574 r = log_level_from_string(value);
1575 if (r < 0)
1576 log_warning("Failed to parse max level syslog value \"%s\". Ignoring.", value);
1577 else
1578 s->max_level_syslog = r;
1579 } else if (streq(key, "systemd.journald.max_level_kmsg") && value) {
1580 r = log_level_from_string(value);
1581 if (r < 0)
1582 log_warning("Failed to parse max level kmsg value \"%s\". Ignoring.", value);
1583 else
1584 s->max_level_kmsg = r;
1585 } else if (streq(key, "systemd.journald.max_level_wall") && value) {
1586 r = log_level_from_string(value);
1587 if (r < 0)
1588 log_warning("Failed to parse max level wall value \"%s\". Ignoring.", value);
1589 else
1590 s->max_level_wall = r;
1591 } else if (startswith(key, "systemd.journald"))
1592 log_warning("Unknown journald kernel command line option \"%s\". Ignoring.", key);
d025f1e4 1593
804ae586 1594 /* do not warn about state here, since probably systemd already did */
db91ea32 1595 return 0;
d025f1e4
ZJS
1596}
1597
1598static int server_parse_config_file(Server *s) {
d025f1e4
ZJS
1599 assert(s);
1600
43688c49 1601 return config_parse_many_nulstr(PKGSYSCONFDIR "/journald.conf",
75eb6154 1602 CONF_PATHS_NULSTR("systemd/journald.conf.d"),
a9edaeff
JT
1603 "Journal\0",
1604 config_item_perf_lookup, journald_gperf_lookup,
1605 false, s);
d025f1e4
ZJS
1606}
1607
f9a810be
LP
1608static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
1609 Server *s = userdata;
26687bf8
OS
1610
1611 assert(s);
1612
f9a810be 1613 server_sync(s);
26687bf8
OS
1614 return 0;
1615}
1616
d07f7b9e 1617int server_schedule_sync(Server *s, int priority) {
26687bf8
OS
1618 int r;
1619
26687bf8
OS
1620 assert(s);
1621
d07f7b9e
LP
1622 if (priority <= LOG_CRIT) {
1623 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1624 server_sync(s);
1625 return 0;
1626 }
1627
26687bf8
OS
1628 if (s->sync_scheduled)
1629 return 0;
1630
f9a810be
LP
1631 if (s->sync_interval_usec > 0) {
1632 usec_t when;
ca267016 1633
6a0f1f6d 1634 r = sd_event_now(s->event, CLOCK_MONOTONIC, &when);
f9a810be
LP
1635 if (r < 0)
1636 return r;
26687bf8 1637
f9a810be
LP
1638 when += s->sync_interval_usec;
1639
1640 if (!s->sync_event_source) {
6a0f1f6d
LP
1641 r = sd_event_add_time(
1642 s->event,
1643 &s->sync_event_source,
1644 CLOCK_MONOTONIC,
1645 when, 0,
1646 server_dispatch_sync, s);
f9a810be
LP
1647 if (r < 0)
1648 return r;
1649
1650 r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
1651 } else {
1652 r = sd_event_source_set_time(s->sync_event_source, when);
1653 if (r < 0)
1654 return r;
1655
1656 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
1657 }
26687bf8 1658 if (r < 0)
f9a810be 1659 return r;
26687bf8 1660
f9a810be
LP
1661 s->sync_scheduled = true;
1662 }
26687bf8
OS
1663
1664 return 0;
1665}
1666
0c24bb23
LP
1667static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1668 Server *s = userdata;
1669
1670 assert(s);
1671
1672 server_cache_hostname(s);
1673 return 0;
1674}
1675
1676static int server_open_hostname(Server *s) {
1677 int r;
1678
1679 assert(s);
1680
1681 s->hostname_fd = open("/proc/sys/kernel/hostname", O_RDONLY|O_CLOEXEC|O_NDELAY|O_NOCTTY);
4a62c710
MS
1682 if (s->hostname_fd < 0)
1683 return log_error_errno(errno, "Failed to open /proc/sys/kernel/hostname: %m");
0c24bb23 1684
151b9b96 1685 r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s);
0c24bb23 1686 if (r < 0) {
28def94c
DR
1687 /* kernels prior to 3.2 don't support polling this file. Ignore
1688 * the failure. */
1689 if (r == -EPERM) {
e53fc357 1690 log_warning_errno(r, "Failed to register hostname fd in event loop, ignoring: %m");
03e334a1 1691 s->hostname_fd = safe_close(s->hostname_fd);
28def94c
DR
1692 return 0;
1693 }
1694
23bbb0de 1695 return log_error_errno(r, "Failed to register hostname fd in event loop: %m");
0c24bb23
LP
1696 }
1697
1698 r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
23bbb0de
MS
1699 if (r < 0)
1700 return log_error_errno(r, "Failed to adjust priority of host name event source: %m");
0c24bb23
LP
1701
1702 return 0;
1703}
1704
e22aa3d3
LP
1705static int dispatch_notify_event(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1706 Server *s = userdata;
1707 int r;
1708
1709 assert(s);
1710 assert(s->notify_event_source == es);
1711 assert(s->notify_fd == fd);
1712
e22aa3d3 1713 /* The $NOTIFY_SOCKET is writable again, now send exactly one
dd835265 1714 * message on it. Either it's the watchdog event, the initial
119e9655
LP
1715 * READY=1 event or an stdout stream event. If there's nothing
1716 * to write anymore, turn our event source off. The next time
1717 * there's something to send it will be turned on again. */
e22aa3d3
LP
1718
1719 if (!s->sent_notify_ready) {
1720 static const char p[] =
1721 "READY=1\n"
1722 "STATUS=Processing requests...";
1723 ssize_t l;
1724
1725 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1726 if (l < 0) {
1727 if (errno == EAGAIN)
1728 return 0;
1729
1730 return log_error_errno(errno, "Failed to send READY=1 notification message: %m");
1731 }
1732
1733 s->sent_notify_ready = true;
1734 log_debug("Sent READY=1 notification.");
1735
119e9655
LP
1736 } else if (s->send_watchdog) {
1737
1738 static const char p[] =
1739 "WATCHDOG=1";
1740
1741 ssize_t l;
1742
1743 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1744 if (l < 0) {
1745 if (errno == EAGAIN)
1746 return 0;
1747
1748 return log_error_errno(errno, "Failed to send WATCHDOG=1 notification message: %m");
1749 }
1750
1751 s->send_watchdog = false;
1752 log_debug("Sent WATCHDOG=1 notification.");
1753
e22aa3d3
LP
1754 } else if (s->stdout_streams_notify_queue)
1755 /* Dispatch one stream notification event */
1756 stdout_stream_send_notify(s->stdout_streams_notify_queue);
1757
61233823 1758 /* Leave us enabled if there's still more to do. */
119e9655 1759 if (s->send_watchdog || s->stdout_streams_notify_queue)
e22aa3d3
LP
1760 return 0;
1761
1762 /* There was nothing to do anymore, let's turn ourselves off. */
1763 r = sd_event_source_set_enabled(es, SD_EVENT_OFF);
1764 if (r < 0)
1765 return log_error_errno(r, "Failed to turn off notify event source: %m");
1766
1767 return 0;
1768}
1769
119e9655
LP
1770static int dispatch_watchdog(sd_event_source *es, uint64_t usec, void *userdata) {
1771 Server *s = userdata;
1772 int r;
1773
1774 assert(s);
1775
1776 s->send_watchdog = true;
1777
1778 r = sd_event_source_set_enabled(s->notify_event_source, SD_EVENT_ON);
1779 if (r < 0)
1780 log_warning_errno(r, "Failed to turn on notify event source: %m");
1781
1782 r = sd_event_source_set_time(s->watchdog_event_source, usec + s->watchdog_usec / 2);
1783 if (r < 0)
1784 return log_error_errno(r, "Failed to restart watchdog event source: %m");
1785
1786 r = sd_event_source_set_enabled(s->watchdog_event_source, SD_EVENT_ON);
1787 if (r < 0)
1788 return log_error_errno(r, "Failed to enable watchdog event source: %m");
1789
1790 return 0;
1791}
1792
e22aa3d3
LP
1793static int server_connect_notify(Server *s) {
1794 union sockaddr_union sa = {
1795 .un.sun_family = AF_UNIX,
1796 };
1797 const char *e;
1798 int r;
1799
1800 assert(s);
1801 assert(s->notify_fd < 0);
1802 assert(!s->notify_event_source);
1803
1804 /*
1805 So here's the problem: we'd like to send notification
1806 messages to PID 1, but we cannot do that via sd_notify(),
1807 since that's synchronous, and we might end up blocking on
1808 it. Specifically: given that PID 1 might block on
1809 dbus-daemon during IPC, and dbus-daemon is logging to us,
1810 and might hence block on us, we might end up in a deadlock
ccddd104 1811 if we block on sending PID 1 notification messages — by
e22aa3d3
LP
1812 generating a full blocking circle. To avoid this, let's
1813 create a non-blocking socket, and connect it to the
1814 notification socket, and then wait for POLLOUT before we
1815 send anything. This should efficiently avoid any deadlocks,
1816 as we'll never block on PID 1, hence PID 1 can safely block
1817 on dbus-daemon which can safely block on us again.
1818
1819 Don't think that this issue is real? It is, see:
1820 https://github.com/systemd/systemd/issues/1505
1821 */
1822
1823 e = getenv("NOTIFY_SOCKET");
1824 if (!e)
1825 return 0;
1826
1827 if ((e[0] != '@' && e[0] != '/') || e[1] == 0) {
1828 log_error("NOTIFY_SOCKET set to an invalid value: %s", e);
1829 return -EINVAL;
1830 }
1831
1832 if (strlen(e) > sizeof(sa.un.sun_path)) {
1833 log_error("NOTIFY_SOCKET path too long: %s", e);
1834 return -EINVAL;
1835 }
1836
1837 s->notify_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
1838 if (s->notify_fd < 0)
1839 return log_error_errno(errno, "Failed to create notify socket: %m");
1840
1841 (void) fd_inc_sndbuf(s->notify_fd, NOTIFY_SNDBUF_SIZE);
1842
1843 strncpy(sa.un.sun_path, e, sizeof(sa.un.sun_path));
1844 if (sa.un.sun_path[0] == '@')
1845 sa.un.sun_path[0] = 0;
1846
fc2fffe7 1847 r = connect(s->notify_fd, &sa.sa, SOCKADDR_UN_LEN(sa.un));
e22aa3d3
LP
1848 if (r < 0)
1849 return log_error_errno(errno, "Failed to connect to notify socket: %m");
1850
1851 r = sd_event_add_io(s->event, &s->notify_event_source, s->notify_fd, EPOLLOUT, dispatch_notify_event, s);
1852 if (r < 0)
1853 return log_error_errno(r, "Failed to watch notification socket: %m");
1854
119e9655
LP
1855 if (sd_watchdog_enabled(false, &s->watchdog_usec) > 0) {
1856 s->send_watchdog = true;
1857
4de2402b 1858 r = sd_event_add_time(s->event, &s->watchdog_event_source, CLOCK_MONOTONIC, now(CLOCK_MONOTONIC) + s->watchdog_usec/2, s->watchdog_usec/4, dispatch_watchdog, s);
119e9655
LP
1859 if (r < 0)
1860 return log_error_errno(r, "Failed to add watchdog time event: %m");
1861 }
1862
e22aa3d3
LP
1863 /* This should fire pretty soon, which we'll use to send the
1864 * READY=1 event. */
1865
1866 return 0;
1867}
1868
d025f1e4 1869int server_init(Server *s) {
13790add 1870 _cleanup_fdset_free_ FDSet *fds = NULL;
d025f1e4 1871 int n, r, fd;
7d18d348 1872 bool no_sockets;
d025f1e4
ZJS
1873
1874 assert(s);
1875
1876 zero(*s);
e22aa3d3 1877 s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->audit_fd = s->hostname_fd = s->notify_fd = -1;
d025f1e4
ZJS
1878 s->compress = true;
1879 s->seal = true;
1880
119e9655
LP
1881 s->watchdog_usec = USEC_INFINITY;
1882
26687bf8
OS
1883 s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1884 s->sync_scheduled = false;
1885
d025f1e4
ZJS
1886 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1887 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1888
40b71e89 1889 s->forward_to_wall = true;
d025f1e4 1890
e150e820
MB
1891 s->max_file_usec = DEFAULT_MAX_FILE_USEC;
1892
d025f1e4
ZJS
1893 s->max_level_store = LOG_DEBUG;
1894 s->max_level_syslog = LOG_DEBUG;
1895 s->max_level_kmsg = LOG_NOTICE;
1896 s->max_level_console = LOG_INFO;
40b71e89 1897 s->max_level_wall = LOG_EMERG;
d025f1e4 1898
266a4700
FB
1899 journal_reset_metrics(&s->system_storage.metrics);
1900 journal_reset_metrics(&s->runtime_storage.metrics);
d025f1e4
ZJS
1901
1902 server_parse_config_file(s);
d7f69e16 1903 parse_proc_cmdline(parse_proc_cmdline_item, s, true);
8580d1f7 1904
d288f79f 1905 if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
b1389b0d
ZJS
1906 log_debug("Setting both rate limit interval and burst from "USEC_FMT",%u to 0,0",
1907 s->rate_limit_interval, s->rate_limit_burst);
d288f79f
ZJS
1908 s->rate_limit_interval = s->rate_limit_burst = 0;
1909 }
d025f1e4 1910
8580d1f7 1911 (void) mkdir_p("/run/systemd/journal", 0755);
d025f1e4 1912
43cf8388 1913 s->user_journals = ordered_hashmap_new(NULL);
d025f1e4
ZJS
1914 if (!s->user_journals)
1915 return log_oom();
1916
1917 s->mmap = mmap_cache_new();
1918 if (!s->mmap)
1919 return log_oom();
1920
b58c888f
VC
1921 s->deferred_closes = set_new(NULL);
1922 if (!s->deferred_closes)
1923 return log_oom();
1924
f9a810be 1925 r = sd_event_default(&s->event);
23bbb0de
MS
1926 if (r < 0)
1927 return log_error_errno(r, "Failed to create event loop: %m");
d025f1e4
ZJS
1928
1929 n = sd_listen_fds(true);
23bbb0de
MS
1930 if (n < 0)
1931 return log_error_errno(n, "Failed to read listening file descriptors from environment: %m");
d025f1e4
ZJS
1932
1933 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1934
1935 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1936
1937 if (s->native_fd >= 0) {
1938 log_error("Too many native sockets passed.");
1939 return -EINVAL;
1940 }
1941
1942 s->native_fd = fd;
1943
1944 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1945
1946 if (s->stdout_fd >= 0) {
1947 log_error("Too many stdout sockets passed.");
1948 return -EINVAL;
1949 }
1950
1951 s->stdout_fd = fd;
1952
03ee5c38
LP
1953 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0 ||
1954 sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/dev-log", 0) > 0) {
d025f1e4
ZJS
1955
1956 if (s->syslog_fd >= 0) {
1957 log_error("Too many /dev/log sockets passed.");
1958 return -EINVAL;
1959 }
1960
1961 s->syslog_fd = fd;
1962
875c2e22
LP
1963 } else if (sd_is_socket(fd, AF_NETLINK, SOCK_RAW, -1) > 0) {
1964
1965 if (s->audit_fd >= 0) {
1966 log_error("Too many audit sockets passed.");
1967 return -EINVAL;
1968 }
1969
1970 s->audit_fd = fd;
1971
4ec3cd73 1972 } else {
4ec3cd73 1973
13790add
LP
1974 if (!fds) {
1975 fds = fdset_new();
1976 if (!fds)
1977 return log_oom();
1978 }
4ec3cd73 1979
13790add
LP
1980 r = fdset_put(fds, fd);
1981 if (r < 0)
1982 return log_oom();
4ec3cd73 1983 }
d025f1e4
ZJS
1984 }
1985
15d91bff
ZJS
1986 /* Try to restore streams, but don't bother if this fails */
1987 (void) server_restore_streams(s, fds);
d025f1e4 1988
13790add
LP
1989 if (fdset_size(fds) > 0) {
1990 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds));
1991 fds = fdset_free(fds);
1992 }
1993
7d18d348
ZJS
1994 no_sockets = s->native_fd < 0 && s->stdout_fd < 0 && s->syslog_fd < 0 && s->audit_fd < 0;
1995
1996 /* always open stdout, syslog, native, and kmsg sockets */
37b7affe
ZJS
1997
1998 /* systemd-journald.socket: /run/systemd/journal/stdout */
15d91bff
ZJS
1999 r = server_open_stdout_socket(s);
2000 if (r < 0)
2001 return r;
2002
37b7affe 2003 /* systemd-journald-dev-log.socket: /run/systemd/journal/dev-log */
13790add 2004 r = server_open_syslog_socket(s);
d025f1e4
ZJS
2005 if (r < 0)
2006 return r;
2007
37b7affe 2008 /* systemd-journald.socket: /run/systemd/journal/socket */
13790add 2009 r = server_open_native_socket(s);
d025f1e4
ZJS
2010 if (r < 0)
2011 return r;
2012
37b7affe 2013 /* /dev/ksmg */
d025f1e4
ZJS
2014 r = server_open_dev_kmsg(s);
2015 if (r < 0)
2016 return r;
2017
7d18d348
ZJS
2018 /* Unless we got *some* sockets and not audit, open audit socket */
2019 if (s->audit_fd >= 0 || no_sockets) {
2020 r = server_open_audit(s);
2021 if (r < 0)
2022 return r;
2023 }
875c2e22 2024
d025f1e4
ZJS
2025 r = server_open_kernel_seqnum(s);
2026 if (r < 0)
2027 return r;
2028
0c24bb23
LP
2029 r = server_open_hostname(s);
2030 if (r < 0)
2031 return r;
2032
f9a810be 2033 r = setup_signals(s);
d025f1e4
ZJS
2034 if (r < 0)
2035 return r;
2036
2037 s->udev = udev_new();
2038 if (!s->udev)
2039 return -ENOMEM;
2040
f9a810be 2041 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
d025f1e4
ZJS
2042 if (!s->rate_limit)
2043 return -ENOMEM;
2044
e9174f29
LP
2045 r = cg_get_root_path(&s->cgroup_root);
2046 if (r < 0)
2047 return r;
2048
0c24bb23
LP
2049 server_cache_hostname(s);
2050 server_cache_boot_id(s);
2051 server_cache_machine_id(s);
2052
266a4700
FB
2053 s->runtime_storage.name = "Runtime journal";
2054 s->system_storage.name = "System journal";
2055
2056 s->runtime_storage.path = strjoin("/run/log/journal/", SERVER_MACHINE_ID(s), NULL);
2057 s->system_storage.path = strjoin("/var/log/journal/", SERVER_MACHINE_ID(s), NULL);
2058 if (!s->runtime_storage.path || !s->system_storage.path)
2059 return -ENOMEM;
2060
e22aa3d3
LP
2061 (void) server_connect_notify(s);
2062
804ae586 2063 return system_journal_open(s, false);
d025f1e4
ZJS
2064}
2065
2066void server_maybe_append_tags(Server *s) {
2067#ifdef HAVE_GCRYPT
2068 JournalFile *f;
2069 Iterator i;
2070 usec_t n;
2071
2072 n = now(CLOCK_REALTIME);
2073
2074 if (s->system_journal)
2075 journal_file_maybe_append_tag(s->system_journal, n);
2076
43cf8388 2077 ORDERED_HASHMAP_FOREACH(f, s->user_journals, i)
d025f1e4
ZJS
2078 journal_file_maybe_append_tag(f, n);
2079#endif
2080}
2081
2082void server_done(Server *s) {
2083 JournalFile *f;
2084 assert(s);
2085
b58c888f
VC
2086 if (s->deferred_closes) {
2087 journal_file_close_set(s->deferred_closes);
2088 set_free(s->deferred_closes);
2089 }
2090
d025f1e4
ZJS
2091 while (s->stdout_streams)
2092 stdout_stream_free(s->stdout_streams);
2093
2094 if (s->system_journal)
69a3a6fd 2095 (void) journal_file_close(s->system_journal);
d025f1e4
ZJS
2096
2097 if (s->runtime_journal)
69a3a6fd 2098 (void) journal_file_close(s->runtime_journal);
d025f1e4 2099
43cf8388 2100 while ((f = ordered_hashmap_steal_first(s->user_journals)))
69a3a6fd 2101 (void) journal_file_close(f);
d025f1e4 2102
43cf8388 2103 ordered_hashmap_free(s->user_journals);
d025f1e4 2104
f9a810be
LP
2105 sd_event_source_unref(s->syslog_event_source);
2106 sd_event_source_unref(s->native_event_source);
2107 sd_event_source_unref(s->stdout_event_source);
2108 sd_event_source_unref(s->dev_kmsg_event_source);
875c2e22 2109 sd_event_source_unref(s->audit_event_source);
f9a810be
LP
2110 sd_event_source_unref(s->sync_event_source);
2111 sd_event_source_unref(s->sigusr1_event_source);
2112 sd_event_source_unref(s->sigusr2_event_source);
2113 sd_event_source_unref(s->sigterm_event_source);
2114 sd_event_source_unref(s->sigint_event_source);
94b65516 2115 sd_event_source_unref(s->sigrtmin1_event_source);
0c24bb23 2116 sd_event_source_unref(s->hostname_event_source);
e22aa3d3 2117 sd_event_source_unref(s->notify_event_source);
119e9655 2118 sd_event_source_unref(s->watchdog_event_source);
f9a810be 2119 sd_event_unref(s->event);
d025f1e4 2120
03e334a1
LP
2121 safe_close(s->syslog_fd);
2122 safe_close(s->native_fd);
2123 safe_close(s->stdout_fd);
2124 safe_close(s->dev_kmsg_fd);
875c2e22 2125 safe_close(s->audit_fd);
03e334a1 2126 safe_close(s->hostname_fd);
e22aa3d3 2127 safe_close(s->notify_fd);
0c24bb23 2128
d025f1e4
ZJS
2129 if (s->rate_limit)
2130 journal_rate_limit_free(s->rate_limit);
2131
2132 if (s->kernel_seqnum)
2133 munmap(s->kernel_seqnum, sizeof(uint64_t));
2134
2135 free(s->buffer);
2136 free(s->tty_path);
e9174f29 2137 free(s->cgroup_root);
99d0966e 2138 free(s->hostname_field);
d025f1e4
ZJS
2139
2140 if (s->mmap)
2141 mmap_cache_unref(s->mmap);
2142
3e044c49 2143 udev_unref(s->udev);
d025f1e4 2144}
8580d1f7
LP
2145
2146static const char* const storage_table[_STORAGE_MAX] = {
2147 [STORAGE_AUTO] = "auto",
2148 [STORAGE_VOLATILE] = "volatile",
2149 [STORAGE_PERSISTENT] = "persistent",
2150 [STORAGE_NONE] = "none"
2151};
2152
2153DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
2154DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
2155
2156static const char* const split_mode_table[_SPLIT_MAX] = {
2157 [SPLIT_LOGIN] = "login",
2158 [SPLIT_UID] = "uid",
2159 [SPLIT_NONE] = "none",
2160};
2161
2162DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
2163DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");