]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/journal/journald-server.c
udev: inform systemd how many workers we can potentially spawn (#4036)
[thirdparty/systemd.git] / src / journal / journald-server.c
CommitLineData
d025f1e4
ZJS
1/***
2 This file is part of systemd.
3
4 Copyright 2011 Lennart Poettering
5
6 systemd is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
10
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public License
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
18***/
19
24882e06
LP
20#ifdef HAVE_SELINUX
21#include <selinux/selinux.h>
22#endif
8580d1f7
LP
23#include <sys/ioctl.h>
24#include <sys/mman.h>
25#include <sys/signalfd.h>
26#include <sys/statvfs.h>
07630cea 27#include <linux/sockios.h>
24882e06 28
b4bbcaa9 29#include "libudev.h"
8580d1f7 30#include "sd-daemon.h"
74df0fca
LP
31#include "sd-journal.h"
32#include "sd-messages.h"
8580d1f7
LP
33
34#include "acl-util.h"
b5efdb8a 35#include "alloc-util.h"
430f0182 36#include "audit-util.h"
d025f1e4 37#include "cgroup-util.h"
d025f1e4 38#include "conf-parser.h"
a0956174 39#include "dirent-util.h"
0dec689b 40#include "extract-word.h"
3ffd4af2 41#include "fd-util.h"
33d52ab9 42#include "fileio.h"
958b66ea 43#include "formats-util.h"
f4f15635 44#include "fs-util.h"
8580d1f7 45#include "hashmap.h"
958b66ea 46#include "hostname-util.h"
afc5dbf3 47#include "io-util.h"
8580d1f7
LP
48#include "journal-authenticate.h"
49#include "journal-file.h"
d025f1e4
ZJS
50#include "journal-internal.h"
51#include "journal-vacuum.h"
8580d1f7 52#include "journald-audit.h"
d025f1e4 53#include "journald-kmsg.h"
d025f1e4 54#include "journald-native.h"
8580d1f7 55#include "journald-rate-limit.h"
3ffd4af2 56#include "journald-server.h"
8580d1f7
LP
57#include "journald-stream.h"
58#include "journald-syslog.h"
07630cea
LP
59#include "missing.h"
60#include "mkdir.h"
6bedfcbb 61#include "parse-util.h"
4e731273 62#include "proc-cmdline.h"
07630cea
LP
63#include "process-util.h"
64#include "rm-rf.h"
65#include "selinux-util.h"
66#include "signal-util.h"
67#include "socket-util.h"
32917e33 68#include "stdio-util.h"
8b43440b 69#include "string-table.h"
07630cea 70#include "string-util.h"
4a0b58c4 71#include "user-util.h"
8a03c9ef 72#include "log.h"
d025f1e4 73
d025f1e4
ZJS
74#define USER_JOURNALS_MAX 1024
75
26687bf8 76#define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
7f1ad696
LP
77#define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
78#define DEFAULT_RATE_LIMIT_BURST 1000
e150e820 79#define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
d025f1e4 80
8580d1f7 81#define RECHECK_SPACE_USEC (30*USEC_PER_SEC)
d025f1e4 82
e22aa3d3
LP
83#define NOTIFY_SNDBUF_SIZE (8*1024*1024)
84
7a24f3bf
VC
85/* The period to insert between posting changes for coalescing */
86#define POST_CHANGE_TIMER_INTERVAL_USEC (250*USEC_PER_MSEC)
87
8580d1f7
LP
88static int determine_space_for(
89 Server *s,
90 JournalMetrics *metrics,
91 const char *path,
92 const char *name,
93 bool verbose,
94 bool patch_min_use,
95 uint64_t *available,
96 uint64_t *limit) {
97
98 uint64_t sum = 0, ss_avail, avail;
7fd1b19b 99 _cleanup_closedir_ DIR *d = NULL;
8580d1f7
LP
100 struct dirent *de;
101 struct statvfs ss;
102 const char *p;
d025f1e4 103 usec_t ts;
d025f1e4 104
8580d1f7
LP
105 assert(s);
106 assert(metrics);
107 assert(path);
108 assert(name);
d025f1e4 109
8580d1f7 110 ts = now(CLOCK_MONOTONIC);
d025f1e4 111
8580d1f7 112 if (!verbose && s->cached_space_timestamp + RECHECK_SPACE_USEC > ts) {
d025f1e4 113
8580d1f7
LP
114 if (available)
115 *available = s->cached_space_available;
116 if (limit)
117 *limit = s->cached_space_limit;
d025f1e4 118
d025f1e4 119 return 0;
8580d1f7 120 }
d025f1e4 121
8580d1f7 122 p = strjoina(path, SERVER_MACHINE_ID(s));
d025f1e4 123 d = opendir(p);
d025f1e4 124 if (!d)
8580d1f7 125 return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR, errno, "Failed to open %s: %m", p);
d025f1e4
ZJS
126
127 if (fstatvfs(dirfd(d), &ss) < 0)
8580d1f7 128 return log_error_errno(errno, "Failed to fstatvfs(%s): %m", p);
d025f1e4 129
8580d1f7 130 FOREACH_DIRENT_ALL(de, d, break) {
d025f1e4 131 struct stat st;
d025f1e4
ZJS
132
133 if (!endswith(de->d_name, ".journal") &&
134 !endswith(de->d_name, ".journal~"))
135 continue;
136
8580d1f7
LP
137 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) {
138 log_debug_errno(errno, "Failed to stat %s/%s, ignoring: %m", p, de->d_name);
d025f1e4 139 continue;
8580d1f7 140 }
d025f1e4
ZJS
141
142 if (!S_ISREG(st.st_mode))
143 continue;
144
145 sum += (uint64_t) st.st_blocks * 512UL;
146 }
147
8a03c9ef 148 /* If requested, then let's bump the min_use limit to the
8580d1f7
LP
149 * current usage on disk. We do this when starting up and
150 * first opening the journal files. This way sudden spikes in
151 * disk usage will not cause journald to vacuum files without
152 * bounds. Note that this means that only a restart of
153 * journald will make it reset this value. */
d025f1e4 154
8580d1f7
LP
155 if (patch_min_use)
156 metrics->min_use = MAX(metrics->min_use, sum);
348ced90 157
8580d1f7
LP
158 ss_avail = ss.f_bsize * ss.f_bavail;
159 avail = LESS_BY(ss_avail, metrics->keep_free);
348ced90 160
8580d1f7
LP
161 s->cached_space_limit = MIN(MAX(sum + avail, metrics->min_use), metrics->max_use);
162 s->cached_space_available = LESS_BY(s->cached_space_limit, sum);
163 s->cached_space_timestamp = ts;
d025f1e4 164
670b110c
ZJS
165 if (verbose) {
166 char fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
8580d1f7 167 fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX], fb6[FORMAT_BYTES_MAX];
282c5c4e
ZJS
168 format_bytes(fb1, sizeof(fb1), sum);
169 format_bytes(fb2, sizeof(fb2), metrics->max_use);
170 format_bytes(fb3, sizeof(fb3), metrics->keep_free);
171 format_bytes(fb4, sizeof(fb4), ss_avail);
172 format_bytes(fb5, sizeof(fb5), s->cached_space_limit);
173 format_bytes(fb6, sizeof(fb6), s->cached_space_available);
670b110c
ZJS
174
175 server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
282c5c4e
ZJS
176 LOG_MESSAGE("%s (%s) is %s, max %s, %s free.",
177 name, path, fb1, fb5, fb6),
178 "JOURNAL_NAME=%s", name,
179 "JOURNAL_PATH=%s", path,
180 "CURRENT_USE=%"PRIu64, sum,
181 "CURRENT_USE_PRETTY=%s", fb1,
182 "MAX_USE=%"PRIu64, metrics->max_use,
183 "MAX_USE_PRETTY=%s", fb2,
184 "DISK_KEEP_FREE=%"PRIu64, metrics->keep_free,
185 "DISK_KEEP_FREE_PRETTY=%s", fb3,
186 "DISK_AVAILABLE=%"PRIu64, ss_avail,
187 "DISK_AVAILABLE_PRETTY=%s", fb4,
188 "LIMIT=%"PRIu64, s->cached_space_limit,
189 "LIMIT_PRETTY=%s", fb5,
190 "AVAILABLE=%"PRIu64, s->cached_space_available,
191 "AVAILABLE_PRETTY=%s", fb6,
8a03c9ef 192 NULL);
8580d1f7
LP
193 }
194
195 if (available)
196 *available = s->cached_space_available;
197 if (limit)
198 *limit = s->cached_space_limit;
199
200 return 1;
201}
202
203static int determine_space(Server *s, bool verbose, bool patch_min_use, uint64_t *available, uint64_t *limit) {
204 JournalMetrics *metrics;
205 const char *path, *name;
206
207 assert(s);
208
209 if (s->system_journal) {
210 path = "/var/log/journal/";
211 metrics = &s->system_metrics;
212 name = "System journal";
213 } else {
214 path = "/run/log/journal/";
215 metrics = &s->runtime_metrics;
216 name = "Runtime journal";
670b110c
ZJS
217 }
218
8580d1f7 219 return determine_space_for(s, metrics, path, name, verbose, patch_min_use, available, limit);
d025f1e4
ZJS
220}
221
5c3bde3f 222static void server_add_acls(JournalFile *f, uid_t uid) {
d025f1e4 223#ifdef HAVE_ACL
5c3bde3f 224 int r;
d025f1e4 225#endif
d025f1e4
ZJS
226 assert(f);
227
d025f1e4 228#ifdef HAVE_ACL
34c10968 229 if (uid <= SYSTEM_UID_MAX)
d025f1e4
ZJS
230 return;
231
5c3bde3f
ZJS
232 r = add_acls_for_user(f->fd, uid);
233 if (r < 0)
234 log_warning_errno(r, "Failed to set ACL on %s, ignoring: %m", f->path);
d025f1e4
ZJS
235#endif
236}
237
7a24f3bf
VC
238static int open_journal(
239 Server *s,
240 bool reliably,
241 const char *fname,
242 int flags,
243 bool seal,
244 JournalMetrics *metrics,
7a24f3bf
VC
245 JournalFile **ret) {
246 int r;
e167d7fd 247 JournalFile *f;
7a24f3bf
VC
248
249 assert(s);
250 assert(fname);
251 assert(ret);
252
253 if (reliably)
b58c888f 254 r = journal_file_open_reliably(fname, flags, 0640, s->compress, seal, metrics, s->mmap, s->deferred_closes, NULL, &f);
7a24f3bf 255 else
5d1ce257 256 r = journal_file_open(-1, fname, flags, 0640, s->compress, seal, metrics, s->mmap, s->deferred_closes, NULL, &f);
7a24f3bf
VC
257 if (r < 0)
258 return r;
259
e167d7fd 260 r = journal_file_enable_post_change_timer(f, s->event, POST_CHANGE_TIMER_INTERVAL_USEC);
7a24f3bf 261 if (r < 0) {
69a3a6fd 262 (void) journal_file_close(f);
7a24f3bf
VC
263 return r;
264 }
265
e167d7fd 266 *ret = f;
7a24f3bf
VC
267 return r;
268}
269
105bdb46
VC
270static int system_journal_open(Server *s, bool flush_requested) {
271 const char *fn;
272 int r = 0;
273
274 if (!s->system_journal &&
275 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
276 (flush_requested
277 || access("/run/systemd/journal/flushed", F_OK) >= 0)) {
278
279 /* If in auto mode: first try to create the machine
280 * path, but not the prefix.
281 *
282 * If in persistent mode: create /var/log/journal and
283 * the machine path */
284
285 if (s->storage == STORAGE_PERSISTENT)
286 (void) mkdir_p("/var/log/journal/", 0755);
287
288 fn = strjoina("/var/log/journal/", SERVER_MACHINE_ID(s));
289 (void) mkdir(fn, 0755);
290
291 fn = strjoina(fn, "/system.journal");
292 r = open_journal(s, true, fn, O_RDWR|O_CREAT, s->seal, &s->system_metrics, &s->system_journal);
293 if (r >= 0) {
294 server_add_acls(s->system_journal, 0);
295 (void) determine_space_for(s, &s->system_metrics, "/var/log/journal/", "System journal", true, true, NULL, NULL);
296 } else if (r < 0) {
297 if (r != -ENOENT && r != -EROFS)
298 log_warning_errno(r, "Failed to open system journal: %m");
299
300 r = 0;
301 }
302 }
303
304 if (!s->runtime_journal &&
305 (s->storage != STORAGE_NONE)) {
306
307 fn = strjoina("/run/log/journal/", SERVER_MACHINE_ID(s), "/system.journal");
308
309 if (s->system_journal) {
310
311 /* Try to open the runtime journal, but only
312 * if it already exists, so that we can flush
313 * it into the system journal */
314
315 r = open_journal(s, false, fn, O_RDWR, false, &s->runtime_metrics, &s->runtime_journal);
316 if (r < 0) {
317 if (r != -ENOENT)
318 log_warning_errno(r, "Failed to open runtime journal: %m");
319
320 r = 0;
321 }
322
323 } else {
324
325 /* OK, we really need the runtime journal, so create
326 * it if necessary. */
327
328 (void) mkdir("/run/log", 0755);
329 (void) mkdir("/run/log/journal", 0755);
330 (void) mkdir_parents(fn, 0750);
331
332 r = open_journal(s, true, fn, O_RDWR|O_CREAT, false, &s->runtime_metrics, &s->runtime_journal);
333 if (r < 0)
334 return log_error_errno(r, "Failed to open runtime journal: %m");
335 }
336
337 if (s->runtime_journal) {
338 server_add_acls(s->runtime_journal, 0);
339 (void) determine_space_for(s, &s->runtime_metrics, "/run/log/journal/", "Runtime journal", true, true, NULL, NULL);
340 }
341 }
342
343 return r;
344}
345
d025f1e4 346static JournalFile* find_journal(Server *s, uid_t uid) {
ed375beb 347 _cleanup_free_ char *p = NULL;
d025f1e4
ZJS
348 int r;
349 JournalFile *f;
350 sd_id128_t machine;
351
352 assert(s);
353
105bdb46
VC
354 /* A rotate that fails to create the new journal (ENOSPC) leaves the
355 * rotated journal as NULL. Unless we revisit opening, even after
356 * space is made available we'll continue to return NULL indefinitely.
357 *
358 * system_journal_open() is a noop if the journals are already open, so
359 * we can just call it here to recover from failed rotates (or anything
360 * else that's left the journals as NULL).
361 *
362 * Fixes https://github.com/systemd/systemd/issues/3968 */
363 (void) system_journal_open(s, false);
364
d025f1e4
ZJS
365 /* We split up user logs only on /var, not on /run. If the
366 * runtime file is open, we write to it exclusively, in order
367 * to guarantee proper order as soon as we flush /run to
368 * /var and close the runtime file. */
369
370 if (s->runtime_journal)
371 return s->runtime_journal;
372
61755fda 373 if (uid <= SYSTEM_UID_MAX || uid_is_dynamic(uid))
d025f1e4
ZJS
374 return s->system_journal;
375
376 r = sd_id128_get_machine(&machine);
377 if (r < 0)
378 return s->system_journal;
379
4a0b58c4 380 f = ordered_hashmap_get(s->user_journals, UID_TO_PTR(uid));
d025f1e4
ZJS
381 if (f)
382 return f;
383
de0671ee
ZJS
384 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-"UID_FMT".journal",
385 SD_ID128_FORMAT_VAL(machine), uid) < 0)
d025f1e4
ZJS
386 return s->system_journal;
387
43cf8388 388 while (ordered_hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
d025f1e4 389 /* Too many open? Then let's close one */
43cf8388 390 f = ordered_hashmap_steal_first(s->user_journals);
d025f1e4 391 assert(f);
69a3a6fd 392 (void) journal_file_close(f);
d025f1e4
ZJS
393 }
394
089ed40b 395 r = open_journal(s, true, p, O_RDWR|O_CREAT, s->seal, &s->system_metrics, &f);
d025f1e4
ZJS
396 if (r < 0)
397 return s->system_journal;
398
5c3bde3f 399 server_add_acls(f, uid);
d025f1e4 400
4a0b58c4 401 r = ordered_hashmap_put(s->user_journals, UID_TO_PTR(uid), f);
d025f1e4 402 if (r < 0) {
69a3a6fd 403 (void) journal_file_close(f);
d025f1e4
ZJS
404 return s->system_journal;
405 }
406
407 return f;
408}
409
ea69bd41
LP
410static int do_rotate(
411 Server *s,
412 JournalFile **f,
413 const char* name,
414 bool seal,
415 uint32_t uid) {
416
fc55baee
ZJS
417 int r;
418 assert(s);
419
420 if (!*f)
421 return -EINVAL;
422
b58c888f 423 r = journal_file_rotate(f, s->compress, seal, s->deferred_closes);
fc55baee
ZJS
424 if (r < 0)
425 if (*f)
ea69bd41 426 log_error_errno(r, "Failed to rotate %s: %m", (*f)->path);
fc55baee 427 else
ea69bd41 428 log_error_errno(r, "Failed to create new %s journal: %m", name);
fc55baee 429 else
5c3bde3f 430 server_add_acls(*f, uid);
2678031a 431
fc55baee
ZJS
432 return r;
433}
434
d025f1e4
ZJS
435void server_rotate(Server *s) {
436 JournalFile *f;
437 void *k;
438 Iterator i;
439 int r;
440
441 log_debug("Rotating...");
442
8580d1f7
LP
443 (void) do_rotate(s, &s->runtime_journal, "runtime", false, 0);
444 (void) do_rotate(s, &s->system_journal, "system", s->seal, 0);
d025f1e4 445
43cf8388 446 ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
4a0b58c4 447 r = do_rotate(s, &f, "user", s->seal, PTR_TO_UID(k));
fc55baee 448 if (r >= 0)
43cf8388 449 ordered_hashmap_replace(s->user_journals, k, f);
fc55baee
ZJS
450 else if (!f)
451 /* Old file has been closed and deallocated */
43cf8388 452 ordered_hashmap_remove(s->user_journals, k);
d025f1e4 453 }
b58c888f
VC
454
455 /* Perform any deferred closes which aren't still offlining. */
456 SET_FOREACH(f, s->deferred_closes, i)
457 if (!journal_file_is_offlining(f)) {
458 (void) set_remove(s->deferred_closes, f);
459 (void) journal_file_close(f);
460 }
d025f1e4
ZJS
461}
462
26687bf8
OS
463void server_sync(Server *s) {
464 JournalFile *f;
26687bf8
OS
465 Iterator i;
466 int r;
467
26687bf8 468 if (s->system_journal) {
ac2e41f5 469 r = journal_file_set_offline(s->system_journal, false);
26687bf8 470 if (r < 0)
65089b82 471 log_warning_errno(r, "Failed to sync system journal, ignoring: %m");
26687bf8
OS
472 }
473
65c1d46b 474 ORDERED_HASHMAP_FOREACH(f, s->user_journals, i) {
ac2e41f5 475 r = journal_file_set_offline(f, false);
26687bf8 476 if (r < 0)
65089b82 477 log_warning_errno(r, "Failed to sync user journal, ignoring: %m");
26687bf8
OS
478 }
479
f9a810be
LP
480 if (s->sync_event_source) {
481 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
482 if (r < 0)
da927ba9 483 log_error_errno(r, "Failed to disable sync timer source: %m");
f9a810be 484 }
26687bf8
OS
485
486 s->sync_scheduled = false;
487}
488
ea69bd41
LP
489static void do_vacuum(
490 Server *s,
ea69bd41 491 JournalFile *f,
8580d1f7
LP
492 JournalMetrics *metrics,
493 const char *path,
494 const char *name,
495 bool verbose,
496 bool patch_min_use) {
ea69bd41
LP
497
498 const char *p;
8580d1f7 499 uint64_t limit;
63c8666b
ZJS
500 int r;
501
8580d1f7
LP
502 assert(s);
503 assert(metrics);
504 assert(path);
505 assert(name);
506
63c8666b
ZJS
507 if (!f)
508 return;
509
8580d1f7
LP
510 p = strjoina(path, SERVER_MACHINE_ID(s));
511
512 limit = metrics->max_use;
513 (void) determine_space_for(s, metrics, path, name, verbose, patch_min_use, NULL, &limit);
514
515 r = journal_directory_vacuum(p, limit, metrics->n_max_files, s->max_retention_usec, &s->oldest_file_usec, verbose);
63c8666b 516 if (r < 0 && r != -ENOENT)
8580d1f7 517 log_warning_errno(r, "Failed to vacuum %s, ignoring: %m", p);
63c8666b
ZJS
518}
519
8580d1f7
LP
520int server_vacuum(Server *s, bool verbose, bool patch_min_use) {
521 assert(s);
d025f1e4
ZJS
522
523 log_debug("Vacuuming...");
524
525 s->oldest_file_usec = 0;
526
8580d1f7
LP
527 do_vacuum(s, s->system_journal, &s->system_metrics, "/var/log/journal/", "System journal", verbose, patch_min_use);
528 do_vacuum(s, s->runtime_journal, &s->runtime_metrics, "/run/log/journal/", "Runtime journal", verbose, patch_min_use);
d025f1e4 529
8580d1f7
LP
530 s->cached_space_limit = 0;
531 s->cached_space_available = 0;
532 s->cached_space_timestamp = 0;
d025f1e4 533
8580d1f7 534 return 0;
d025f1e4
ZJS
535}
536
0c24bb23
LP
537static void server_cache_machine_id(Server *s) {
538 sd_id128_t id;
539 int r;
540
541 assert(s);
542
543 r = sd_id128_get_machine(&id);
544 if (r < 0)
545 return;
546
547 sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
548}
549
550static void server_cache_boot_id(Server *s) {
551 sd_id128_t id;
552 int r;
553
554 assert(s);
555
556 r = sd_id128_get_boot(&id);
557 if (r < 0)
558 return;
559
560 sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
561}
562
563static void server_cache_hostname(Server *s) {
564 _cleanup_free_ char *t = NULL;
565 char *x;
566
567 assert(s);
568
569 t = gethostname_malloc();
570 if (!t)
571 return;
572
573 x = strappend("_HOSTNAME=", t);
574 if (!x)
575 return;
576
577 free(s->hostname_field);
578 s->hostname_field = x;
579}
580
8531ae70 581static bool shall_try_append_again(JournalFile *f, int r) {
6e1045e5
ZJS
582 switch(r) {
583 case -E2BIG: /* Hit configured limit */
584 case -EFBIG: /* Hit fs limit */
585 case -EDQUOT: /* Quota limit hit */
586 case -ENOSPC: /* Disk full */
d025f1e4 587 log_debug("%s: Allocation limit reached, rotating.", f->path);
6e1045e5
ZJS
588 return true;
589 case -EIO: /* I/O error of some kind (mmap) */
590 log_warning("%s: IO error, rotating.", f->path);
591 return true;
592 case -EHOSTDOWN: /* Other machine */
d025f1e4 593 log_info("%s: Journal file from other machine, rotating.", f->path);
6e1045e5
ZJS
594 return true;
595 case -EBUSY: /* Unclean shutdown */
d025f1e4 596 log_info("%s: Unclean shutdown, rotating.", f->path);
6e1045e5
ZJS
597 return true;
598 case -EPROTONOSUPPORT: /* Unsupported feature */
d025f1e4 599 log_info("%s: Unsupported feature, rotating.", f->path);
6e1045e5
ZJS
600 return true;
601 case -EBADMSG: /* Corrupted */
602 case -ENODATA: /* Truncated */
603 case -ESHUTDOWN: /* Already archived */
d025f1e4 604 log_warning("%s: Journal file corrupted, rotating.", f->path);
6e1045e5
ZJS
605 return true;
606 case -EIDRM: /* Journal file has been deleted */
2678031a 607 log_warning("%s: Journal file has been deleted, rotating.", f->path);
6e1045e5
ZJS
608 return true;
609 default:
d025f1e4 610 return false;
6e1045e5 611 }
d025f1e4
ZJS
612}
613
d07f7b9e 614static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
d025f1e4
ZJS
615 JournalFile *f;
616 bool vacuumed = false;
617 int r;
618
619 assert(s);
620 assert(iovec);
621 assert(n > 0);
622
623 f = find_journal(s, uid);
624 if (!f)
625 return;
626
627 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
628 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
629 server_rotate(s);
8580d1f7 630 server_vacuum(s, false, false);
d025f1e4
ZJS
631 vacuumed = true;
632
633 f = find_journal(s, uid);
634 if (!f)
635 return;
636 }
637
638 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
26687bf8 639 if (r >= 0) {
d07f7b9e 640 server_schedule_sync(s, priority);
d025f1e4 641 return;
26687bf8 642 }
d025f1e4
ZJS
643
644 if (vacuumed || !shall_try_append_again(f, r)) {
8266e1c0 645 log_error_errno(r, "Failed to write entry (%d items, %zu bytes), ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
d025f1e4
ZJS
646 return;
647 }
648
649 server_rotate(s);
8580d1f7 650 server_vacuum(s, false, false);
d025f1e4
ZJS
651
652 f = find_journal(s, uid);
653 if (!f)
654 return;
655
656 log_debug("Retrying write.");
657 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
8266e1c0
LP
658 if (r < 0)
659 log_error_errno(r, "Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
660 else
d07f7b9e 661 server_schedule_sync(s, priority);
d025f1e4
ZJS
662}
663
664static void dispatch_message_real(
665 Server *s,
666 struct iovec *iovec, unsigned n, unsigned m,
3b3154df
LP
667 const struct ucred *ucred,
668 const struct timeval *tv,
d025f1e4 669 const char *label, size_t label_len,
968f3196 670 const char *unit_id,
d07f7b9e 671 int priority,
968f3196 672 pid_t object_pid) {
d025f1e4 673
968f3196 674 char pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
ae018d9b
LP
675 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
676 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
677 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
d3789917 678 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
968f3196
ZJS
679 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
680 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
681 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
682 uid_t object_uid;
683 gid_t object_gid;
968f3196 684 char *x;
d025f1e4 685 int r;
ae018d9b 686 char *t, *c;
82499507
LP
687 uid_t realuid = 0, owner = 0, journal_uid;
688 bool owner_valid = false;
ae018d9b 689#ifdef HAVE_AUDIT
968f3196
ZJS
690 char audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
691 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
692 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
693 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
ae018d9b
LP
694
695 uint32_t audit;
696 uid_t loginuid;
697#endif
d025f1e4
ZJS
698
699 assert(s);
700 assert(iovec);
701 assert(n > 0);
968f3196 702 assert(n + N_IOVEC_META_FIELDS + (object_pid ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
d025f1e4
ZJS
703
704 if (ucred) {
d025f1e4
ZJS
705 realuid = ucred->uid;
706
de0671ee 707 sprintf(pid, "_PID="PID_FMT, ucred->pid);
c2457105 708 IOVEC_SET_STRING(iovec[n++], pid);
d025f1e4 709
de0671ee 710 sprintf(uid, "_UID="UID_FMT, ucred->uid);
c2457105 711 IOVEC_SET_STRING(iovec[n++], uid);
d025f1e4 712
de0671ee 713 sprintf(gid, "_GID="GID_FMT, ucred->gid);
c2457105 714 IOVEC_SET_STRING(iovec[n++], gid);
d025f1e4
ZJS
715
716 r = get_process_comm(ucred->pid, &t);
717 if (r >= 0) {
63c372cb 718 x = strjoina("_COMM=", t);
d025f1e4 719 free(t);
968f3196 720 IOVEC_SET_STRING(iovec[n++], x);
d025f1e4
ZJS
721 }
722
723 r = get_process_exe(ucred->pid, &t);
724 if (r >= 0) {
63c372cb 725 x = strjoina("_EXE=", t);
d025f1e4 726 free(t);
968f3196 727 IOVEC_SET_STRING(iovec[n++], x);
d025f1e4
ZJS
728 }
729
9bdbc2e2 730 r = get_process_cmdline(ucred->pid, 0, false, &t);
d025f1e4 731 if (r >= 0) {
63c372cb 732 x = strjoina("_CMDLINE=", t);
d025f1e4 733 free(t);
3a832116
SL
734 IOVEC_SET_STRING(iovec[n++], x);
735 }
736
737 r = get_process_capeff(ucred->pid, &t);
738 if (r >= 0) {
63c372cb 739 x = strjoina("_CAP_EFFECTIVE=", t);
3a832116 740 free(t);
968f3196 741 IOVEC_SET_STRING(iovec[n++], x);
d025f1e4
ZJS
742 }
743
0a20e3c1 744#ifdef HAVE_AUDIT
d025f1e4 745 r = audit_session_from_pid(ucred->pid, &audit);
ae018d9b 746 if (r >= 0) {
de0671ee 747 sprintf(audit_session, "_AUDIT_SESSION=%"PRIu32, audit);
ae018d9b
LP
748 IOVEC_SET_STRING(iovec[n++], audit_session);
749 }
d025f1e4
ZJS
750
751 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
7027ff61 752 if (r >= 0) {
de0671ee 753 sprintf(audit_loginuid, "_AUDIT_LOGINUID="UID_FMT, loginuid);
ae018d9b 754 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
d025f1e4 755 }
ae018d9b 756#endif
d025f1e4 757
e9174f29 758 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &c);
7027ff61 759 if (r >= 0) {
968f3196
ZJS
760 char *session = NULL;
761
63c372cb 762 x = strjoina("_SYSTEMD_CGROUP=", c);
968f3196 763 IOVEC_SET_STRING(iovec[n++], x);
d025f1e4 764
ae018d9b
LP
765 r = cg_path_get_session(c, &t);
766 if (r >= 0) {
63c372cb 767 session = strjoina("_SYSTEMD_SESSION=", t);
ae018d9b 768 free(t);
d025f1e4 769 IOVEC_SET_STRING(iovec[n++], session);
ae018d9b
LP
770 }
771
772 if (cg_path_get_owner_uid(c, &owner) >= 0) {
773 owner_valid = true;
d025f1e4 774
de0671ee 775 sprintf(owner_uid, "_SYSTEMD_OWNER_UID="UID_FMT, owner);
d025f1e4 776 IOVEC_SET_STRING(iovec[n++], owner_uid);
ae018d9b 777 }
d025f1e4 778
ae018d9b 779 if (cg_path_get_unit(c, &t) >= 0) {
63c372cb 780 x = strjoina("_SYSTEMD_UNIT=", t);
ae018d9b 781 free(t);
19cace37
LP
782 IOVEC_SET_STRING(iovec[n++], x);
783 } else if (unit_id && !session) {
63c372cb 784 x = strjoina("_SYSTEMD_UNIT=", unit_id);
19cace37
LP
785 IOVEC_SET_STRING(iovec[n++], x);
786 }
787
788 if (cg_path_get_user_unit(c, &t) >= 0) {
63c372cb 789 x = strjoina("_SYSTEMD_USER_UNIT=", t);
ae018d9b 790 free(t);
968f3196 791 IOVEC_SET_STRING(iovec[n++], x);
19cace37 792 } else if (unit_id && session) {
63c372cb 793 x = strjoina("_SYSTEMD_USER_UNIT=", unit_id);
19cace37
LP
794 IOVEC_SET_STRING(iovec[n++], x);
795 }
ae018d9b 796
0a244b8e 797 if (cg_path_get_slice(c, &t) >= 0) {
63c372cb 798 x = strjoina("_SYSTEMD_SLICE=", t);
0a244b8e
LP
799 free(t);
800 IOVEC_SET_STRING(iovec[n++], x);
801 }
802
ae018d9b 803 free(c);
2d43b190 804 } else if (unit_id) {
63c372cb 805 x = strjoina("_SYSTEMD_UNIT=", unit_id);
2d43b190 806 IOVEC_SET_STRING(iovec[n++], x);
ef1673d1 807 }
d025f1e4 808
d025f1e4 809#ifdef HAVE_SELINUX
6355e756 810 if (mac_selinux_have()) {
d682b3a7 811 if (label) {
f8294e41 812 x = alloca(strlen("_SELINUX_CONTEXT=") + label_len + 1);
ae018d9b 813
d682b3a7
LP
814 *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
815 IOVEC_SET_STRING(iovec[n++], x);
816 } else {
2ed96880 817 char *con;
d025f1e4 818
d682b3a7 819 if (getpidcon(ucred->pid, &con) >= 0) {
63c372cb 820 x = strjoina("_SELINUX_CONTEXT=", con);
e7ff4e7f 821
d682b3a7
LP
822 freecon(con);
823 IOVEC_SET_STRING(iovec[n++], x);
824 }
d025f1e4
ZJS
825 }
826 }
827#endif
828 }
968f3196
ZJS
829 assert(n <= m);
830
831 if (object_pid) {
832 r = get_process_uid(object_pid, &object_uid);
833 if (r >= 0) {
de0671ee 834 sprintf(o_uid, "OBJECT_UID="UID_FMT, object_uid);
968f3196
ZJS
835 IOVEC_SET_STRING(iovec[n++], o_uid);
836 }
837
838 r = get_process_gid(object_pid, &object_gid);
839 if (r >= 0) {
de0671ee 840 sprintf(o_gid, "OBJECT_GID="GID_FMT, object_gid);
968f3196
ZJS
841 IOVEC_SET_STRING(iovec[n++], o_gid);
842 }
843
844 r = get_process_comm(object_pid, &t);
845 if (r >= 0) {
63c372cb 846 x = strjoina("OBJECT_COMM=", t);
968f3196
ZJS
847 free(t);
848 IOVEC_SET_STRING(iovec[n++], x);
849 }
850
851 r = get_process_exe(object_pid, &t);
852 if (r >= 0) {
63c372cb 853 x = strjoina("OBJECT_EXE=", t);
968f3196
ZJS
854 free(t);
855 IOVEC_SET_STRING(iovec[n++], x);
856 }
857
858 r = get_process_cmdline(object_pid, 0, false, &t);
859 if (r >= 0) {
63c372cb 860 x = strjoina("OBJECT_CMDLINE=", t);
968f3196
ZJS
861 free(t);
862 IOVEC_SET_STRING(iovec[n++], x);
863 }
864
865#ifdef HAVE_AUDIT
866 r = audit_session_from_pid(object_pid, &audit);
867 if (r >= 0) {
de0671ee 868 sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%"PRIu32, audit);
968f3196
ZJS
869 IOVEC_SET_STRING(iovec[n++], o_audit_session);
870 }
871
872 r = audit_loginuid_from_pid(object_pid, &loginuid);
873 if (r >= 0) {
de0671ee 874 sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID="UID_FMT, loginuid);
968f3196
ZJS
875 IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
876 }
877#endif
878
e9174f29 879 r = cg_pid_get_path_shifted(object_pid, s->cgroup_root, &c);
968f3196 880 if (r >= 0) {
63c372cb 881 x = strjoina("OBJECT_SYSTEMD_CGROUP=", c);
968f3196
ZJS
882 IOVEC_SET_STRING(iovec[n++], x);
883
884 r = cg_path_get_session(c, &t);
885 if (r >= 0) {
63c372cb 886 x = strjoina("OBJECT_SYSTEMD_SESSION=", t);
968f3196
ZJS
887 free(t);
888 IOVEC_SET_STRING(iovec[n++], x);
889 }
890
891 if (cg_path_get_owner_uid(c, &owner) >= 0) {
de0671ee 892 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID="UID_FMT, owner);
968f3196
ZJS
893 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
894 }
895
896 if (cg_path_get_unit(c, &t) >= 0) {
63c372cb 897 x = strjoina("OBJECT_SYSTEMD_UNIT=", t);
968f3196 898 free(t);
19cace37
LP
899 IOVEC_SET_STRING(iovec[n++], x);
900 }
901
902 if (cg_path_get_user_unit(c, &t) >= 0) {
63c372cb 903 x = strjoina("OBJECT_SYSTEMD_USER_UNIT=", t);
968f3196 904 free(t);
968f3196 905 IOVEC_SET_STRING(iovec[n++], x);
19cace37 906 }
968f3196
ZJS
907
908 free(c);
909 }
910 }
911 assert(n <= m);
d025f1e4
ZJS
912
913 if (tv) {
ae018d9b 914 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv));
a5693989 915 IOVEC_SET_STRING(iovec[n++], source_time);
d025f1e4
ZJS
916 }
917
918 /* Note that strictly speaking storing the boot id here is
919 * redundant since the entry includes this in-line
920 * anyway. However, we need this indexed, too. */
0c24bb23
LP
921 if (!isempty(s->boot_id_field))
922 IOVEC_SET_STRING(iovec[n++], s->boot_id_field);
d025f1e4 923
0c24bb23
LP
924 if (!isempty(s->machine_id_field))
925 IOVEC_SET_STRING(iovec[n++], s->machine_id_field);
d025f1e4 926
0c24bb23
LP
927 if (!isempty(s->hostname_field))
928 IOVEC_SET_STRING(iovec[n++], s->hostname_field);
d025f1e4
ZJS
929
930 assert(n <= m);
931
da499392 932 if (s->split_mode == SPLIT_UID && realuid > 0)
40adcda8 933 /* Split up strictly by any UID */
759c945a 934 journal_uid = realuid;
82499507 935 else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
edc3797f
LP
936 /* Split up by login UIDs. We do this only if the
937 * realuid is not root, in order not to accidentally
938 * leak privileged information to the user that is
939 * logged by a privileged process that is part of an
7517e174 940 * unprivileged session. */
8a0889df 941 journal_uid = owner;
da499392
KS
942 else
943 journal_uid = 0;
759c945a 944
d07f7b9e 945 write_to_journal(s, journal_uid, iovec, n, priority);
d025f1e4
ZJS
946}
947
948void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
949 char mid[11 + 32 + 1];
8a03c9ef
ZJS
950 struct iovec iovec[N_IOVEC_META_FIELDS + 5 + N_IOVEC_PAYLOAD_FIELDS];
951 unsigned n = 0, m;
32917e33 952 int r;
d025f1e4 953 va_list ap;
b92bea5d 954 struct ucred ucred = {};
d025f1e4
ZJS
955
956 assert(s);
957 assert(format);
958
4850d39a 959 assert_cc(3 == LOG_FAC(LOG_DAEMON));
b6fa2555
EV
960 IOVEC_SET_STRING(iovec[n++], "SYSLOG_FACILITY=3");
961 IOVEC_SET_STRING(iovec[n++], "SYSLOG_IDENTIFIER=systemd-journald");
962
d025f1e4 963 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
4850d39a 964 assert_cc(6 == LOG_INFO);
32917e33 965 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
d025f1e4 966
3bbaff3e 967 if (!sd_id128_is_null(message_id)) {
e2cc6eca 968 snprintf(mid, sizeof(mid), LOG_MESSAGE_ID(message_id));
d025f1e4
ZJS
969 IOVEC_SET_STRING(iovec[n++], mid);
970 }
971
8a03c9ef
ZJS
972 m = n;
973
974 va_start(ap, format);
32917e33
ZJS
975 r = log_format_iovec(iovec, ELEMENTSOF(iovec), &n, false, 0, format, ap);
976 /* Error handling below */
8a03c9ef
ZJS
977 va_end(ap);
978
d025f1e4
ZJS
979 ucred.pid = getpid();
980 ucred.uid = getuid();
981 ucred.gid = getgid();
982
32917e33
ZJS
983 if (r >= 0)
984 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
8a03c9ef
ZJS
985
986 while (m < n)
987 free(iovec[m++].iov_base);
32917e33
ZJS
988
989 if (r < 0) {
990 /* We failed to format the message. Emit a warning instead. */
991 char buf[LINE_MAX];
992
993 xsprintf(buf, "MESSAGE=Entry printing failed: %s", strerror(-r));
994
995 n = 3;
996 IOVEC_SET_STRING(iovec[n++], "PRIORITY=4");
997 IOVEC_SET_STRING(iovec[n++], buf);
998 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
999 }
d025f1e4
ZJS
1000}
1001
1002void server_dispatch_message(
1003 Server *s,
1004 struct iovec *iovec, unsigned n, unsigned m,
3b3154df
LP
1005 const struct ucred *ucred,
1006 const struct timeval *tv,
d025f1e4
ZJS
1007 const char *label, size_t label_len,
1008 const char *unit_id,
968f3196
ZJS
1009 int priority,
1010 pid_t object_pid) {
d025f1e4 1011
7027ff61 1012 int rl, r;
7fd1b19b 1013 _cleanup_free_ char *path = NULL;
8580d1f7 1014 uint64_t available = 0;
db91ea32 1015 char *c;
d025f1e4
ZJS
1016
1017 assert(s);
1018 assert(iovec || n == 0);
1019
1020 if (n == 0)
1021 return;
1022
1023 if (LOG_PRI(priority) > s->max_level_store)
1024 return;
1025
2f5df74a
HHPF
1026 /* Stop early in case the information will not be stored
1027 * in a journal. */
1028 if (s->storage == STORAGE_NONE)
1029 return;
1030
d025f1e4
ZJS
1031 if (!ucred)
1032 goto finish;
1033
e9174f29 1034 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &path);
7027ff61 1035 if (r < 0)
d025f1e4
ZJS
1036 goto finish;
1037
1038 /* example: /user/lennart/3/foobar
1039 * /system/dbus.service/foobar
1040 *
1041 * So let's cut of everything past the third /, since that is
1042 * where user directories start */
1043
1044 c = strchr(path, '/');
1045 if (c) {
1046 c = strchr(c+1, '/');
1047 if (c) {
1048 c = strchr(c+1, '/');
1049 if (c)
1050 *c = 0;
1051 }
1052 }
1053
8580d1f7
LP
1054 (void) determine_space(s, false, false, &available, NULL);
1055 rl = journal_rate_limit_test(s->rate_limit, path, priority & LOG_PRIMASK, available);
db91ea32 1056 if (rl == 0)
d025f1e4 1057 return;
d025f1e4
ZJS
1058
1059 /* Write a suppression message if we suppressed something */
1060 if (rl > 1)
db91ea32 1061 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
8a03c9ef
ZJS
1062 LOG_MESSAGE("Suppressed %u messages from %s", rl - 1, path),
1063 NULL);
d025f1e4
ZJS
1064
1065finish:
d07f7b9e 1066 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, priority, object_pid);
d025f1e4
ZJS
1067}
1068
d025f1e4 1069int server_flush_to_var(Server *s) {
d025f1e4
ZJS
1070 sd_id128_t machine;
1071 sd_journal *j = NULL;
fbb63411
LP
1072 char ts[FORMAT_TIMESPAN_MAX];
1073 usec_t start;
1074 unsigned n = 0;
1075 int r;
d025f1e4
ZJS
1076
1077 assert(s);
1078
1079 if (s->storage != STORAGE_AUTO &&
1080 s->storage != STORAGE_PERSISTENT)
1081 return 0;
1082
1083 if (!s->runtime_journal)
1084 return 0;
1085
8580d1f7 1086 (void) system_journal_open(s, true);
d025f1e4
ZJS
1087
1088 if (!s->system_journal)
1089 return 0;
1090
1091 log_debug("Flushing to /var...");
1092
fbb63411
LP
1093 start = now(CLOCK_MONOTONIC);
1094
d025f1e4 1095 r = sd_id128_get_machine(&machine);
00a16861 1096 if (r < 0)
d025f1e4 1097 return r;
d025f1e4
ZJS
1098
1099 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
23bbb0de
MS
1100 if (r < 0)
1101 return log_error_errno(r, "Failed to read runtime journal: %m");
d025f1e4 1102
93b73b06
LP
1103 sd_journal_set_data_threshold(j, 0);
1104
d025f1e4
ZJS
1105 SD_JOURNAL_FOREACH(j) {
1106 Object *o = NULL;
1107 JournalFile *f;
1108
1109 f = j->current_file;
1110 assert(f && f->current_offset > 0);
1111
fbb63411
LP
1112 n++;
1113
d025f1e4
ZJS
1114 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1115 if (r < 0) {
da927ba9 1116 log_error_errno(r, "Can't read entry: %m");
d025f1e4
ZJS
1117 goto finish;
1118 }
1119
1120 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1121 if (r >= 0)
1122 continue;
1123
1124 if (!shall_try_append_again(s->system_journal, r)) {
da927ba9 1125 log_error_errno(r, "Can't write entry: %m");
d025f1e4
ZJS
1126 goto finish;
1127 }
1128
1129 server_rotate(s);
8580d1f7 1130 server_vacuum(s, false, false);
d025f1e4 1131
253f59df
LP
1132 if (!s->system_journal) {
1133 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1134 r = -EIO;
1135 goto finish;
1136 }
1137
d025f1e4
ZJS
1138 log_debug("Retrying write.");
1139 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1140 if (r < 0) {
da927ba9 1141 log_error_errno(r, "Can't write entry: %m");
d025f1e4
ZJS
1142 goto finish;
1143 }
1144 }
1145
804ae586
LP
1146 r = 0;
1147
d025f1e4
ZJS
1148finish:
1149 journal_file_post_change(s->system_journal);
1150
804ae586 1151 s->runtime_journal = journal_file_close(s->runtime_journal);
d025f1e4
ZJS
1152
1153 if (r >= 0)
c6878637 1154 (void) rm_rf("/run/log/journal", REMOVE_ROOT);
d025f1e4 1155
763c7aa2 1156 sd_journal_close(j);
d025f1e4 1157
8a03c9ef
ZJS
1158 server_driver_message(s, SD_ID128_NULL,
1159 LOG_MESSAGE("Time spent on flushing to /var is %s for %u entries.",
1160 format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0),
1161 n),
1162 NULL);
fbb63411 1163
d025f1e4
ZJS
1164 return r;
1165}
1166
8531ae70 1167int server_process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
f9a810be 1168 Server *s = userdata;
a315ac4e
LP
1169 struct ucred *ucred = NULL;
1170 struct timeval *tv = NULL;
1171 struct cmsghdr *cmsg;
1172 char *label = NULL;
1173 size_t label_len = 0, m;
1174 struct iovec iovec;
1175 ssize_t n;
1176 int *fds = NULL, v = 0;
1177 unsigned n_fds = 0;
1178
1179 union {
1180 struct cmsghdr cmsghdr;
1181
1182 /* We use NAME_MAX space for the SELinux label
1183 * here. The kernel currently enforces no
1184 * limit, but according to suggestions from
1185 * the SELinux people this will change and it
1186 * will probably be identical to NAME_MAX. For
1187 * now we use that, but this should be updated
1188 * one day when the final limit is known. */
1189 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1190 CMSG_SPACE(sizeof(struct timeval)) +
1191 CMSG_SPACE(sizeof(int)) + /* fd */
1192 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1193 } control = {};
1194
1195 union sockaddr_union sa = {};
1196
1197 struct msghdr msghdr = {
1198 .msg_iov = &iovec,
1199 .msg_iovlen = 1,
1200 .msg_control = &control,
1201 .msg_controllen = sizeof(control),
1202 .msg_name = &sa,
1203 .msg_namelen = sizeof(sa),
1204 };
f9a810be 1205
d025f1e4 1206 assert(s);
875c2e22 1207 assert(fd == s->native_fd || fd == s->syslog_fd || fd == s->audit_fd);
f9a810be
LP
1208
1209 if (revents != EPOLLIN) {
1210 log_error("Got invalid event from epoll for datagram fd: %"PRIx32, revents);
1211 return -EIO;
1212 }
1213
a315ac4e
LP
1214 /* Try to get the right size, if we can. (Not all
1215 * sockets support SIOCINQ, hence we just try, but
1216 * don't rely on it. */
1217 (void) ioctl(fd, SIOCINQ, &v);
d025f1e4 1218
a315ac4e
LP
1219 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1220 m = PAGE_ALIGN(MAX3((size_t) v + 1,
1221 (size_t) LINE_MAX,
1222 ALIGN(sizeof(struct nlmsghdr)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH)) + 1);
d025f1e4 1223
a315ac4e
LP
1224 if (!GREEDY_REALLOC(s->buffer, s->buffer_size, m))
1225 return log_oom();
875c2e22 1226
a315ac4e
LP
1227 iovec.iov_base = s->buffer;
1228 iovec.iov_len = s->buffer_size - 1; /* Leave room for trailing NUL we add later */
d025f1e4 1229
a315ac4e
LP
1230 n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1231 if (n < 0) {
1232 if (errno == EINTR || errno == EAGAIN)
1233 return 0;
875c2e22 1234
a315ac4e
LP
1235 return log_error_errno(errno, "recvmsg() failed: %m");
1236 }
875c2e22 1237
a315ac4e
LP
1238 CMSG_FOREACH(cmsg, &msghdr) {
1239
1240 if (cmsg->cmsg_level == SOL_SOCKET &&
1241 cmsg->cmsg_type == SCM_CREDENTIALS &&
1242 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1243 ucred = (struct ucred*) CMSG_DATA(cmsg);
1244 else if (cmsg->cmsg_level == SOL_SOCKET &&
1245 cmsg->cmsg_type == SCM_SECURITY) {
1246 label = (char*) CMSG_DATA(cmsg);
1247 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1248 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1249 cmsg->cmsg_type == SO_TIMESTAMP &&
1250 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1251 tv = (struct timeval*) CMSG_DATA(cmsg);
1252 else if (cmsg->cmsg_level == SOL_SOCKET &&
1253 cmsg->cmsg_type == SCM_RIGHTS) {
1254 fds = (int*) CMSG_DATA(cmsg);
1255 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
d025f1e4 1256 }
a315ac4e 1257 }
d025f1e4 1258
a315ac4e
LP
1259 /* And a trailing NUL, just in case */
1260 s->buffer[n] = 0;
1261
1262 if (fd == s->syslog_fd) {
1263 if (n > 0 && n_fds == 0)
1264 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1265 else if (n_fds > 0)
1266 log_warning("Got file descriptors via syslog socket. Ignoring.");
1267
1268 } else if (fd == s->native_fd) {
1269 if (n > 0 && n_fds == 0)
1270 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1271 else if (n == 0 && n_fds == 1)
1272 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1273 else if (n_fds > 0)
1274 log_warning("Got too many file descriptors via native socket. Ignoring.");
1275
1276 } else {
1277 assert(fd == s->audit_fd);
1278
1279 if (n > 0 && n_fds == 0)
1280 server_process_audit_message(s, s->buffer, n, ucred, &sa, msghdr.msg_namelen);
1281 else if (n_fds > 0)
1282 log_warning("Got file descriptors via audit socket. Ignoring.");
f9a810be 1283 }
a315ac4e
LP
1284
1285 close_many(fds, n_fds);
1286 return 0;
f9a810be 1287}
d025f1e4 1288
f9a810be
LP
1289static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1290 Server *s = userdata;
33d52ab9 1291 int r;
d025f1e4 1292
f9a810be 1293 assert(s);
d025f1e4 1294
94b65516 1295 log_info("Received request to flush runtime journal from PID " PID_FMT, si->ssi_pid);
d025f1e4 1296
f9a810be
LP
1297 server_flush_to_var(s);
1298 server_sync(s);
8580d1f7 1299 server_vacuum(s, false, false);
d025f1e4 1300
33d52ab9
LP
1301 r = touch("/run/systemd/journal/flushed");
1302 if (r < 0)
1303 log_warning_errno(r, "Failed to touch /run/systemd/journal/flushed, ignoring: %m");
74055aa7 1304
f9a810be
LP
1305 return 0;
1306}
d025f1e4 1307
f9a810be
LP
1308static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1309 Server *s = userdata;
33d52ab9 1310 int r;
d025f1e4 1311
f9a810be 1312 assert(s);
d025f1e4 1313
94b65516 1314 log_info("Received request to rotate journal from PID " PID_FMT, si->ssi_pid);
f9a810be 1315 server_rotate(s);
8580d1f7 1316 server_vacuum(s, true, true);
d025f1e4 1317
dbd6e31c 1318 /* Let clients know when the most recent rotation happened. */
33d52ab9
LP
1319 r = write_timestamp_file_atomic("/run/systemd/journal/rotated", now(CLOCK_MONOTONIC));
1320 if (r < 0)
1321 log_warning_errno(r, "Failed to write /run/systemd/journal/rotated, ignoring: %m");
dbd6e31c 1322
f9a810be
LP
1323 return 0;
1324}
d025f1e4 1325
f9a810be
LP
1326static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1327 Server *s = userdata;
d025f1e4 1328
f9a810be 1329 assert(s);
d025f1e4 1330
4daf54a8 1331 log_received_signal(LOG_INFO, si);
d025f1e4 1332
6203e07a 1333 sd_event_exit(s->event, 0);
d025f1e4
ZJS
1334 return 0;
1335}
1336
94b65516
LP
1337static int dispatch_sigrtmin1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1338 Server *s = userdata;
33d52ab9 1339 int r;
94b65516
LP
1340
1341 assert(s);
1342
1343 log_debug("Received request to sync from PID " PID_FMT, si->ssi_pid);
1344
1345 server_sync(s);
1346
1347 /* Let clients know when the most recent sync happened. */
33d52ab9
LP
1348 r = write_timestamp_file_atomic("/run/systemd/journal/synced", now(CLOCK_MONOTONIC));
1349 if (r < 0)
1350 log_warning_errno(r, "Failed to write /run/systemd/journal/synced, ignoring: %m");
94b65516
LP
1351
1352 return 0;
1353}
1354
f9a810be 1355static int setup_signals(Server *s) {
f9a810be 1356 int r;
d025f1e4
ZJS
1357
1358 assert(s);
1359
94b65516 1360 assert(sigprocmask_many(SIG_SETMASK, NULL, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, SIGRTMIN+1, -1) >= 0);
d025f1e4 1361
151b9b96 1362 r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
f9a810be
LP
1363 if (r < 0)
1364 return r;
1365
151b9b96 1366 r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s);
f9a810be
LP
1367 if (r < 0)
1368 return r;
d025f1e4 1369
151b9b96 1370 r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s);
f9a810be
LP
1371 if (r < 0)
1372 return r;
d025f1e4 1373
b374689c
LP
1374 /* Let's process SIGTERM late, so that we flush all queued
1375 * messages to disk before we exit */
1376 r = sd_event_source_set_priority(s->sigterm_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1377 if (r < 0)
1378 return r;
1379
1380 /* When journald is invoked on the terminal (when debugging),
1381 * it's useful if C-c is handled equivalent to SIGTERM. */
151b9b96 1382 r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s);
f9a810be
LP
1383 if (r < 0)
1384 return r;
d025f1e4 1385
b374689c
LP
1386 r = sd_event_source_set_priority(s->sigint_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1387 if (r < 0)
1388 return r;
1389
94b65516
LP
1390 /* SIGRTMIN+1 causes an immediate sync. We process this very
1391 * late, so that everything else queued at this point is
1392 * really written to disk. Clients can watch
1393 * /run/systemd/journal/synced with inotify until its mtime
1394 * changes to see when a sync happened. */
1395 r = sd_event_add_signal(s->event, &s->sigrtmin1_event_source, SIGRTMIN+1, dispatch_sigrtmin1, s);
1396 if (r < 0)
1397 return r;
1398
1399 r = sd_event_source_set_priority(s->sigrtmin1_event_source, SD_EVENT_PRIORITY_NORMAL+15);
1400 if (r < 0)
1401 return r;
1402
d025f1e4
ZJS
1403 return 0;
1404}
1405
1406static int server_parse_proc_cmdline(Server *s) {
7fd1b19b 1407 _cleanup_free_ char *line = NULL;
d581d9d9 1408 const char *p;
74df0fca 1409 int r;
d025f1e4 1410
74df0fca 1411 r = proc_cmdline(&line);
b5884878 1412 if (r < 0) {
da927ba9 1413 log_warning_errno(r, "Failed to read /proc/cmdline, ignoring: %m");
d025f1e4 1414 return 0;
b5884878 1415 }
d025f1e4 1416
d581d9d9 1417 p = line;
9ed794a3 1418 for (;;) {
ff82c36c 1419 _cleanup_free_ char *word = NULL;
d025f1e4 1420
d581d9d9
SS
1421 r = extract_first_word(&p, &word, NULL, 0);
1422 if (r < 0)
1423 return log_error_errno(r, "Failed to parse journald syntax \"%s\": %m", line);
1424
1425 if (r == 0)
1426 break;
d025f1e4
ZJS
1427
1428 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1429 r = parse_boolean(word + 35);
1430 if (r < 0)
1431 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1432 else
1433 s->forward_to_syslog = r;
1434 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1435 r = parse_boolean(word + 33);
1436 if (r < 0)
1437 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1438 else
1439 s->forward_to_kmsg = r;
1440 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1441 r = parse_boolean(word + 36);
1442 if (r < 0)
1443 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1444 else
1445 s->forward_to_console = r;
40b71e89
ST
1446 } else if (startswith(word, "systemd.journald.forward_to_wall=")) {
1447 r = parse_boolean(word + 33);
1448 if (r < 0)
1449 log_warning("Failed to parse forward to wall switch %s. Ignoring.", word + 33);
1450 else
1451 s->forward_to_wall = r;
d025f1e4
ZJS
1452 } else if (startswith(word, "systemd.journald"))
1453 log_warning("Invalid systemd.journald parameter. Ignoring.");
d025f1e4
ZJS
1454 }
1455
804ae586 1456 /* do not warn about state here, since probably systemd already did */
db91ea32 1457 return 0;
d025f1e4
ZJS
1458}
1459
1460static int server_parse_config_file(Server *s) {
d025f1e4
ZJS
1461 assert(s);
1462
75eb6154
LP
1463 return config_parse_many(PKGSYSCONFDIR "/journald.conf",
1464 CONF_PATHS_NULSTR("systemd/journald.conf.d"),
a9edaeff
JT
1465 "Journal\0",
1466 config_item_perf_lookup, journald_gperf_lookup,
1467 false, s);
d025f1e4
ZJS
1468}
1469
f9a810be
LP
1470static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
1471 Server *s = userdata;
26687bf8
OS
1472
1473 assert(s);
1474
f9a810be 1475 server_sync(s);
26687bf8
OS
1476 return 0;
1477}
1478
d07f7b9e 1479int server_schedule_sync(Server *s, int priority) {
26687bf8
OS
1480 int r;
1481
26687bf8
OS
1482 assert(s);
1483
d07f7b9e
LP
1484 if (priority <= LOG_CRIT) {
1485 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1486 server_sync(s);
1487 return 0;
1488 }
1489
26687bf8
OS
1490 if (s->sync_scheduled)
1491 return 0;
1492
f9a810be
LP
1493 if (s->sync_interval_usec > 0) {
1494 usec_t when;
ca267016 1495
6a0f1f6d 1496 r = sd_event_now(s->event, CLOCK_MONOTONIC, &when);
f9a810be
LP
1497 if (r < 0)
1498 return r;
26687bf8 1499
f9a810be
LP
1500 when += s->sync_interval_usec;
1501
1502 if (!s->sync_event_source) {
6a0f1f6d
LP
1503 r = sd_event_add_time(
1504 s->event,
1505 &s->sync_event_source,
1506 CLOCK_MONOTONIC,
1507 when, 0,
1508 server_dispatch_sync, s);
f9a810be
LP
1509 if (r < 0)
1510 return r;
1511
1512 r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
1513 } else {
1514 r = sd_event_source_set_time(s->sync_event_source, when);
1515 if (r < 0)
1516 return r;
1517
1518 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
1519 }
26687bf8 1520 if (r < 0)
f9a810be 1521 return r;
26687bf8 1522
f9a810be
LP
1523 s->sync_scheduled = true;
1524 }
26687bf8
OS
1525
1526 return 0;
1527}
1528
0c24bb23
LP
1529static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1530 Server *s = userdata;
1531
1532 assert(s);
1533
1534 server_cache_hostname(s);
1535 return 0;
1536}
1537
1538static int server_open_hostname(Server *s) {
1539 int r;
1540
1541 assert(s);
1542
1543 s->hostname_fd = open("/proc/sys/kernel/hostname", O_RDONLY|O_CLOEXEC|O_NDELAY|O_NOCTTY);
4a62c710
MS
1544 if (s->hostname_fd < 0)
1545 return log_error_errno(errno, "Failed to open /proc/sys/kernel/hostname: %m");
0c24bb23 1546
151b9b96 1547 r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s);
0c24bb23 1548 if (r < 0) {
28def94c
DR
1549 /* kernels prior to 3.2 don't support polling this file. Ignore
1550 * the failure. */
1551 if (r == -EPERM) {
e53fc357 1552 log_warning_errno(r, "Failed to register hostname fd in event loop, ignoring: %m");
03e334a1 1553 s->hostname_fd = safe_close(s->hostname_fd);
28def94c
DR
1554 return 0;
1555 }
1556
23bbb0de 1557 return log_error_errno(r, "Failed to register hostname fd in event loop: %m");
0c24bb23
LP
1558 }
1559
1560 r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
23bbb0de
MS
1561 if (r < 0)
1562 return log_error_errno(r, "Failed to adjust priority of host name event source: %m");
0c24bb23
LP
1563
1564 return 0;
1565}
1566
e22aa3d3
LP
1567static int dispatch_notify_event(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1568 Server *s = userdata;
1569 int r;
1570
1571 assert(s);
1572 assert(s->notify_event_source == es);
1573 assert(s->notify_fd == fd);
1574
e22aa3d3 1575 /* The $NOTIFY_SOCKET is writable again, now send exactly one
119e9655
LP
1576 * message on it. Either it's the wtachdog event, the initial
1577 * READY=1 event or an stdout stream event. If there's nothing
1578 * to write anymore, turn our event source off. The next time
1579 * there's something to send it will be turned on again. */
e22aa3d3
LP
1580
1581 if (!s->sent_notify_ready) {
1582 static const char p[] =
1583 "READY=1\n"
1584 "STATUS=Processing requests...";
1585 ssize_t l;
1586
1587 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1588 if (l < 0) {
1589 if (errno == EAGAIN)
1590 return 0;
1591
1592 return log_error_errno(errno, "Failed to send READY=1 notification message: %m");
1593 }
1594
1595 s->sent_notify_ready = true;
1596 log_debug("Sent READY=1 notification.");
1597
119e9655
LP
1598 } else if (s->send_watchdog) {
1599
1600 static const char p[] =
1601 "WATCHDOG=1";
1602
1603 ssize_t l;
1604
1605 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1606 if (l < 0) {
1607 if (errno == EAGAIN)
1608 return 0;
1609
1610 return log_error_errno(errno, "Failed to send WATCHDOG=1 notification message: %m");
1611 }
1612
1613 s->send_watchdog = false;
1614 log_debug("Sent WATCHDOG=1 notification.");
1615
e22aa3d3
LP
1616 } else if (s->stdout_streams_notify_queue)
1617 /* Dispatch one stream notification event */
1618 stdout_stream_send_notify(s->stdout_streams_notify_queue);
1619
61233823 1620 /* Leave us enabled if there's still more to do. */
119e9655 1621 if (s->send_watchdog || s->stdout_streams_notify_queue)
e22aa3d3
LP
1622 return 0;
1623
1624 /* There was nothing to do anymore, let's turn ourselves off. */
1625 r = sd_event_source_set_enabled(es, SD_EVENT_OFF);
1626 if (r < 0)
1627 return log_error_errno(r, "Failed to turn off notify event source: %m");
1628
1629 return 0;
1630}
1631
119e9655
LP
1632static int dispatch_watchdog(sd_event_source *es, uint64_t usec, void *userdata) {
1633 Server *s = userdata;
1634 int r;
1635
1636 assert(s);
1637
1638 s->send_watchdog = true;
1639
1640 r = sd_event_source_set_enabled(s->notify_event_source, SD_EVENT_ON);
1641 if (r < 0)
1642 log_warning_errno(r, "Failed to turn on notify event source: %m");
1643
1644 r = sd_event_source_set_time(s->watchdog_event_source, usec + s->watchdog_usec / 2);
1645 if (r < 0)
1646 return log_error_errno(r, "Failed to restart watchdog event source: %m");
1647
1648 r = sd_event_source_set_enabled(s->watchdog_event_source, SD_EVENT_ON);
1649 if (r < 0)
1650 return log_error_errno(r, "Failed to enable watchdog event source: %m");
1651
1652 return 0;
1653}
1654
e22aa3d3
LP
1655static int server_connect_notify(Server *s) {
1656 union sockaddr_union sa = {
1657 .un.sun_family = AF_UNIX,
1658 };
1659 const char *e;
1660 int r;
1661
1662 assert(s);
1663 assert(s->notify_fd < 0);
1664 assert(!s->notify_event_source);
1665
1666 /*
1667 So here's the problem: we'd like to send notification
1668 messages to PID 1, but we cannot do that via sd_notify(),
1669 since that's synchronous, and we might end up blocking on
1670 it. Specifically: given that PID 1 might block on
1671 dbus-daemon during IPC, and dbus-daemon is logging to us,
1672 and might hence block on us, we might end up in a deadlock
ccddd104 1673 if we block on sending PID 1 notification messages — by
e22aa3d3
LP
1674 generating a full blocking circle. To avoid this, let's
1675 create a non-blocking socket, and connect it to the
1676 notification socket, and then wait for POLLOUT before we
1677 send anything. This should efficiently avoid any deadlocks,
1678 as we'll never block on PID 1, hence PID 1 can safely block
1679 on dbus-daemon which can safely block on us again.
1680
1681 Don't think that this issue is real? It is, see:
1682 https://github.com/systemd/systemd/issues/1505
1683 */
1684
1685 e = getenv("NOTIFY_SOCKET");
1686 if (!e)
1687 return 0;
1688
1689 if ((e[0] != '@' && e[0] != '/') || e[1] == 0) {
1690 log_error("NOTIFY_SOCKET set to an invalid value: %s", e);
1691 return -EINVAL;
1692 }
1693
1694 if (strlen(e) > sizeof(sa.un.sun_path)) {
1695 log_error("NOTIFY_SOCKET path too long: %s", e);
1696 return -EINVAL;
1697 }
1698
1699 s->notify_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
1700 if (s->notify_fd < 0)
1701 return log_error_errno(errno, "Failed to create notify socket: %m");
1702
1703 (void) fd_inc_sndbuf(s->notify_fd, NOTIFY_SNDBUF_SIZE);
1704
1705 strncpy(sa.un.sun_path, e, sizeof(sa.un.sun_path));
1706 if (sa.un.sun_path[0] == '@')
1707 sa.un.sun_path[0] = 0;
1708
fc2fffe7 1709 r = connect(s->notify_fd, &sa.sa, SOCKADDR_UN_LEN(sa.un));
e22aa3d3
LP
1710 if (r < 0)
1711 return log_error_errno(errno, "Failed to connect to notify socket: %m");
1712
1713 r = sd_event_add_io(s->event, &s->notify_event_source, s->notify_fd, EPOLLOUT, dispatch_notify_event, s);
1714 if (r < 0)
1715 return log_error_errno(r, "Failed to watch notification socket: %m");
1716
119e9655
LP
1717 if (sd_watchdog_enabled(false, &s->watchdog_usec) > 0) {
1718 s->send_watchdog = true;
1719
4de2402b 1720 r = sd_event_add_time(s->event, &s->watchdog_event_source, CLOCK_MONOTONIC, now(CLOCK_MONOTONIC) + s->watchdog_usec/2, s->watchdog_usec/4, dispatch_watchdog, s);
119e9655
LP
1721 if (r < 0)
1722 return log_error_errno(r, "Failed to add watchdog time event: %m");
1723 }
1724
e22aa3d3
LP
1725 /* This should fire pretty soon, which we'll use to send the
1726 * READY=1 event. */
1727
1728 return 0;
1729}
1730
d025f1e4 1731int server_init(Server *s) {
13790add 1732 _cleanup_fdset_free_ FDSet *fds = NULL;
d025f1e4 1733 int n, r, fd;
7d18d348 1734 bool no_sockets;
d025f1e4
ZJS
1735
1736 assert(s);
1737
1738 zero(*s);
e22aa3d3 1739 s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->audit_fd = s->hostname_fd = s->notify_fd = -1;
d025f1e4
ZJS
1740 s->compress = true;
1741 s->seal = true;
1742
119e9655
LP
1743 s->watchdog_usec = USEC_INFINITY;
1744
26687bf8
OS
1745 s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1746 s->sync_scheduled = false;
1747
d025f1e4
ZJS
1748 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1749 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1750
40b71e89 1751 s->forward_to_wall = true;
d025f1e4 1752
e150e820
MB
1753 s->max_file_usec = DEFAULT_MAX_FILE_USEC;
1754
d025f1e4
ZJS
1755 s->max_level_store = LOG_DEBUG;
1756 s->max_level_syslog = LOG_DEBUG;
1757 s->max_level_kmsg = LOG_NOTICE;
1758 s->max_level_console = LOG_INFO;
40b71e89 1759 s->max_level_wall = LOG_EMERG;
d025f1e4 1760
8580d1f7
LP
1761 journal_reset_metrics(&s->system_metrics);
1762 journal_reset_metrics(&s->runtime_metrics);
d025f1e4
ZJS
1763
1764 server_parse_config_file(s);
1765 server_parse_proc_cmdline(s);
8580d1f7 1766
d288f79f 1767 if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
b1389b0d
ZJS
1768 log_debug("Setting both rate limit interval and burst from "USEC_FMT",%u to 0,0",
1769 s->rate_limit_interval, s->rate_limit_burst);
d288f79f
ZJS
1770 s->rate_limit_interval = s->rate_limit_burst = 0;
1771 }
d025f1e4 1772
8580d1f7 1773 (void) mkdir_p("/run/systemd/journal", 0755);
d025f1e4 1774
43cf8388 1775 s->user_journals = ordered_hashmap_new(NULL);
d025f1e4
ZJS
1776 if (!s->user_journals)
1777 return log_oom();
1778
1779 s->mmap = mmap_cache_new();
1780 if (!s->mmap)
1781 return log_oom();
1782
b58c888f
VC
1783 s->deferred_closes = set_new(NULL);
1784 if (!s->deferred_closes)
1785 return log_oom();
1786
f9a810be 1787 r = sd_event_default(&s->event);
23bbb0de
MS
1788 if (r < 0)
1789 return log_error_errno(r, "Failed to create event loop: %m");
d025f1e4
ZJS
1790
1791 n = sd_listen_fds(true);
23bbb0de
MS
1792 if (n < 0)
1793 return log_error_errno(n, "Failed to read listening file descriptors from environment: %m");
d025f1e4
ZJS
1794
1795 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1796
1797 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1798
1799 if (s->native_fd >= 0) {
1800 log_error("Too many native sockets passed.");
1801 return -EINVAL;
1802 }
1803
1804 s->native_fd = fd;
1805
1806 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1807
1808 if (s->stdout_fd >= 0) {
1809 log_error("Too many stdout sockets passed.");
1810 return -EINVAL;
1811 }
1812
1813 s->stdout_fd = fd;
1814
03ee5c38
LP
1815 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0 ||
1816 sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/dev-log", 0) > 0) {
d025f1e4
ZJS
1817
1818 if (s->syslog_fd >= 0) {
1819 log_error("Too many /dev/log sockets passed.");
1820 return -EINVAL;
1821 }
1822
1823 s->syslog_fd = fd;
1824
875c2e22
LP
1825 } else if (sd_is_socket(fd, AF_NETLINK, SOCK_RAW, -1) > 0) {
1826
1827 if (s->audit_fd >= 0) {
1828 log_error("Too many audit sockets passed.");
1829 return -EINVAL;
1830 }
1831
1832 s->audit_fd = fd;
1833
4ec3cd73 1834 } else {
4ec3cd73 1835
13790add
LP
1836 if (!fds) {
1837 fds = fdset_new();
1838 if (!fds)
1839 return log_oom();
1840 }
4ec3cd73 1841
13790add
LP
1842 r = fdset_put(fds, fd);
1843 if (r < 0)
1844 return log_oom();
4ec3cd73 1845 }
d025f1e4
ZJS
1846 }
1847
15d91bff
ZJS
1848 /* Try to restore streams, but don't bother if this fails */
1849 (void) server_restore_streams(s, fds);
d025f1e4 1850
13790add
LP
1851 if (fdset_size(fds) > 0) {
1852 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds));
1853 fds = fdset_free(fds);
1854 }
1855
7d18d348
ZJS
1856 no_sockets = s->native_fd < 0 && s->stdout_fd < 0 && s->syslog_fd < 0 && s->audit_fd < 0;
1857
1858 /* always open stdout, syslog, native, and kmsg sockets */
37b7affe
ZJS
1859
1860 /* systemd-journald.socket: /run/systemd/journal/stdout */
15d91bff
ZJS
1861 r = server_open_stdout_socket(s);
1862 if (r < 0)
1863 return r;
1864
37b7affe 1865 /* systemd-journald-dev-log.socket: /run/systemd/journal/dev-log */
13790add 1866 r = server_open_syslog_socket(s);
d025f1e4
ZJS
1867 if (r < 0)
1868 return r;
1869
37b7affe 1870 /* systemd-journald.socket: /run/systemd/journal/socket */
13790add 1871 r = server_open_native_socket(s);
d025f1e4
ZJS
1872 if (r < 0)
1873 return r;
1874
37b7affe 1875 /* /dev/ksmg */
d025f1e4
ZJS
1876 r = server_open_dev_kmsg(s);
1877 if (r < 0)
1878 return r;
1879
7d18d348
ZJS
1880 /* Unless we got *some* sockets and not audit, open audit socket */
1881 if (s->audit_fd >= 0 || no_sockets) {
1882 r = server_open_audit(s);
1883 if (r < 0)
1884 return r;
1885 }
875c2e22 1886
d025f1e4
ZJS
1887 r = server_open_kernel_seqnum(s);
1888 if (r < 0)
1889 return r;
1890
0c24bb23
LP
1891 r = server_open_hostname(s);
1892 if (r < 0)
1893 return r;
1894
f9a810be 1895 r = setup_signals(s);
d025f1e4
ZJS
1896 if (r < 0)
1897 return r;
1898
1899 s->udev = udev_new();
1900 if (!s->udev)
1901 return -ENOMEM;
1902
f9a810be 1903 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
d025f1e4
ZJS
1904 if (!s->rate_limit)
1905 return -ENOMEM;
1906
e9174f29
LP
1907 r = cg_get_root_path(&s->cgroup_root);
1908 if (r < 0)
1909 return r;
1910
0c24bb23
LP
1911 server_cache_hostname(s);
1912 server_cache_boot_id(s);
1913 server_cache_machine_id(s);
1914
e22aa3d3
LP
1915 (void) server_connect_notify(s);
1916
804ae586 1917 return system_journal_open(s, false);
d025f1e4
ZJS
1918}
1919
1920void server_maybe_append_tags(Server *s) {
1921#ifdef HAVE_GCRYPT
1922 JournalFile *f;
1923 Iterator i;
1924 usec_t n;
1925
1926 n = now(CLOCK_REALTIME);
1927
1928 if (s->system_journal)
1929 journal_file_maybe_append_tag(s->system_journal, n);
1930
43cf8388 1931 ORDERED_HASHMAP_FOREACH(f, s->user_journals, i)
d025f1e4
ZJS
1932 journal_file_maybe_append_tag(f, n);
1933#endif
1934}
1935
1936void server_done(Server *s) {
1937 JournalFile *f;
1938 assert(s);
1939
b58c888f
VC
1940 if (s->deferred_closes) {
1941 journal_file_close_set(s->deferred_closes);
1942 set_free(s->deferred_closes);
1943 }
1944
d025f1e4
ZJS
1945 while (s->stdout_streams)
1946 stdout_stream_free(s->stdout_streams);
1947
1948 if (s->system_journal)
69a3a6fd 1949 (void) journal_file_close(s->system_journal);
d025f1e4
ZJS
1950
1951 if (s->runtime_journal)
69a3a6fd 1952 (void) journal_file_close(s->runtime_journal);
d025f1e4 1953
43cf8388 1954 while ((f = ordered_hashmap_steal_first(s->user_journals)))
69a3a6fd 1955 (void) journal_file_close(f);
d025f1e4 1956
43cf8388 1957 ordered_hashmap_free(s->user_journals);
d025f1e4 1958
f9a810be
LP
1959 sd_event_source_unref(s->syslog_event_source);
1960 sd_event_source_unref(s->native_event_source);
1961 sd_event_source_unref(s->stdout_event_source);
1962 sd_event_source_unref(s->dev_kmsg_event_source);
875c2e22 1963 sd_event_source_unref(s->audit_event_source);
f9a810be
LP
1964 sd_event_source_unref(s->sync_event_source);
1965 sd_event_source_unref(s->sigusr1_event_source);
1966 sd_event_source_unref(s->sigusr2_event_source);
1967 sd_event_source_unref(s->sigterm_event_source);
1968 sd_event_source_unref(s->sigint_event_source);
94b65516 1969 sd_event_source_unref(s->sigrtmin1_event_source);
0c24bb23 1970 sd_event_source_unref(s->hostname_event_source);
e22aa3d3 1971 sd_event_source_unref(s->notify_event_source);
119e9655 1972 sd_event_source_unref(s->watchdog_event_source);
f9a810be 1973 sd_event_unref(s->event);
d025f1e4 1974
03e334a1
LP
1975 safe_close(s->syslog_fd);
1976 safe_close(s->native_fd);
1977 safe_close(s->stdout_fd);
1978 safe_close(s->dev_kmsg_fd);
875c2e22 1979 safe_close(s->audit_fd);
03e334a1 1980 safe_close(s->hostname_fd);
e22aa3d3 1981 safe_close(s->notify_fd);
0c24bb23 1982
d025f1e4
ZJS
1983 if (s->rate_limit)
1984 journal_rate_limit_free(s->rate_limit);
1985
1986 if (s->kernel_seqnum)
1987 munmap(s->kernel_seqnum, sizeof(uint64_t));
1988
1989 free(s->buffer);
1990 free(s->tty_path);
e9174f29 1991 free(s->cgroup_root);
99d0966e 1992 free(s->hostname_field);
d025f1e4
ZJS
1993
1994 if (s->mmap)
1995 mmap_cache_unref(s->mmap);
1996
3e044c49 1997 udev_unref(s->udev);
d025f1e4 1998}
8580d1f7
LP
1999
2000static const char* const storage_table[_STORAGE_MAX] = {
2001 [STORAGE_AUTO] = "auto",
2002 [STORAGE_VOLATILE] = "volatile",
2003 [STORAGE_PERSISTENT] = "persistent",
2004 [STORAGE_NONE] = "none"
2005};
2006
2007DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
2008DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
2009
2010static const char* const split_mode_table[_SPLIT_MAX] = {
2011 [SPLIT_LOGIN] = "login",
2012 [SPLIT_UID] = "uid",
2013 [SPLIT_NONE] = "none",
2014};
2015
2016DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
2017DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");