]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/journal/journald-server.c
core: add "invocation ID" concept to service manager
[thirdparty/systemd.git] / src / journal / journald-server.c
CommitLineData
d025f1e4
ZJS
1/***
2 This file is part of systemd.
3
4 Copyright 2011 Lennart Poettering
5
6 systemd is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
10
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public License
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
18***/
19
24882e06
LP
20#ifdef HAVE_SELINUX
21#include <selinux/selinux.h>
22#endif
8580d1f7
LP
23#include <sys/ioctl.h>
24#include <sys/mman.h>
25#include <sys/signalfd.h>
26#include <sys/statvfs.h>
07630cea 27#include <linux/sockios.h>
24882e06 28
b4bbcaa9 29#include "libudev.h"
8580d1f7 30#include "sd-daemon.h"
74df0fca
LP
31#include "sd-journal.h"
32#include "sd-messages.h"
8580d1f7
LP
33
34#include "acl-util.h"
b5efdb8a 35#include "alloc-util.h"
430f0182 36#include "audit-util.h"
d025f1e4 37#include "cgroup-util.h"
d025f1e4 38#include "conf-parser.h"
a0956174 39#include "dirent-util.h"
0dec689b 40#include "extract-word.h"
3ffd4af2 41#include "fd-util.h"
33d52ab9 42#include "fileio.h"
958b66ea 43#include "formats-util.h"
f4f15635 44#include "fs-util.h"
8580d1f7 45#include "hashmap.h"
958b66ea 46#include "hostname-util.h"
4b58153d 47#include "id128-util.h"
afc5dbf3 48#include "io-util.h"
8580d1f7
LP
49#include "journal-authenticate.h"
50#include "journal-file.h"
d025f1e4
ZJS
51#include "journal-internal.h"
52#include "journal-vacuum.h"
8580d1f7 53#include "journald-audit.h"
d025f1e4 54#include "journald-kmsg.h"
d025f1e4 55#include "journald-native.h"
8580d1f7 56#include "journald-rate-limit.h"
3ffd4af2 57#include "journald-server.h"
8580d1f7
LP
58#include "journald-stream.h"
59#include "journald-syslog.h"
4b58153d 60#include "log.h"
07630cea
LP
61#include "missing.h"
62#include "mkdir.h"
6bedfcbb 63#include "parse-util.h"
4e731273 64#include "proc-cmdline.h"
07630cea
LP
65#include "process-util.h"
66#include "rm-rf.h"
67#include "selinux-util.h"
68#include "signal-util.h"
69#include "socket-util.h"
32917e33 70#include "stdio-util.h"
8b43440b 71#include "string-table.h"
07630cea 72#include "string-util.h"
4a0b58c4 73#include "user-util.h"
d025f1e4 74
d025f1e4
ZJS
75#define USER_JOURNALS_MAX 1024
76
26687bf8 77#define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
7f1ad696
LP
78#define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
79#define DEFAULT_RATE_LIMIT_BURST 1000
e150e820 80#define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
d025f1e4 81
8580d1f7 82#define RECHECK_SPACE_USEC (30*USEC_PER_SEC)
d025f1e4 83
e22aa3d3
LP
84#define NOTIFY_SNDBUF_SIZE (8*1024*1024)
85
7a24f3bf
VC
86/* The period to insert between posting changes for coalescing */
87#define POST_CHANGE_TIMER_INTERVAL_USEC (250*USEC_PER_MSEC)
88
8580d1f7
LP
89static int determine_space_for(
90 Server *s,
91 JournalMetrics *metrics,
92 const char *path,
93 const char *name,
94 bool verbose,
95 bool patch_min_use,
96 uint64_t *available,
97 uint64_t *limit) {
98
99 uint64_t sum = 0, ss_avail, avail;
7fd1b19b 100 _cleanup_closedir_ DIR *d = NULL;
8580d1f7
LP
101 struct dirent *de;
102 struct statvfs ss;
103 const char *p;
d025f1e4 104 usec_t ts;
d025f1e4 105
8580d1f7
LP
106 assert(s);
107 assert(metrics);
108 assert(path);
109 assert(name);
d025f1e4 110
8580d1f7 111 ts = now(CLOCK_MONOTONIC);
d025f1e4 112
8580d1f7 113 if (!verbose && s->cached_space_timestamp + RECHECK_SPACE_USEC > ts) {
d025f1e4 114
8580d1f7
LP
115 if (available)
116 *available = s->cached_space_available;
117 if (limit)
118 *limit = s->cached_space_limit;
d025f1e4 119
d025f1e4 120 return 0;
8580d1f7 121 }
d025f1e4 122
8580d1f7 123 p = strjoina(path, SERVER_MACHINE_ID(s));
d025f1e4 124 d = opendir(p);
d025f1e4 125 if (!d)
8580d1f7 126 return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR, errno, "Failed to open %s: %m", p);
d025f1e4
ZJS
127
128 if (fstatvfs(dirfd(d), &ss) < 0)
8580d1f7 129 return log_error_errno(errno, "Failed to fstatvfs(%s): %m", p);
d025f1e4 130
8580d1f7 131 FOREACH_DIRENT_ALL(de, d, break) {
d025f1e4 132 struct stat st;
d025f1e4
ZJS
133
134 if (!endswith(de->d_name, ".journal") &&
135 !endswith(de->d_name, ".journal~"))
136 continue;
137
8580d1f7
LP
138 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) {
139 log_debug_errno(errno, "Failed to stat %s/%s, ignoring: %m", p, de->d_name);
d025f1e4 140 continue;
8580d1f7 141 }
d025f1e4
ZJS
142
143 if (!S_ISREG(st.st_mode))
144 continue;
145
146 sum += (uint64_t) st.st_blocks * 512UL;
147 }
148
8a03c9ef 149 /* If requested, then let's bump the min_use limit to the
8580d1f7
LP
150 * current usage on disk. We do this when starting up and
151 * first opening the journal files. This way sudden spikes in
152 * disk usage will not cause journald to vacuum files without
153 * bounds. Note that this means that only a restart of
154 * journald will make it reset this value. */
d025f1e4 155
8580d1f7
LP
156 if (patch_min_use)
157 metrics->min_use = MAX(metrics->min_use, sum);
348ced90 158
8580d1f7
LP
159 ss_avail = ss.f_bsize * ss.f_bavail;
160 avail = LESS_BY(ss_avail, metrics->keep_free);
348ced90 161
8580d1f7
LP
162 s->cached_space_limit = MIN(MAX(sum + avail, metrics->min_use), metrics->max_use);
163 s->cached_space_available = LESS_BY(s->cached_space_limit, sum);
164 s->cached_space_timestamp = ts;
d025f1e4 165
670b110c
ZJS
166 if (verbose) {
167 char fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
8580d1f7 168 fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX], fb6[FORMAT_BYTES_MAX];
282c5c4e
ZJS
169 format_bytes(fb1, sizeof(fb1), sum);
170 format_bytes(fb2, sizeof(fb2), metrics->max_use);
171 format_bytes(fb3, sizeof(fb3), metrics->keep_free);
172 format_bytes(fb4, sizeof(fb4), ss_avail);
173 format_bytes(fb5, sizeof(fb5), s->cached_space_limit);
174 format_bytes(fb6, sizeof(fb6), s->cached_space_available);
670b110c
ZJS
175
176 server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
282c5c4e
ZJS
177 LOG_MESSAGE("%s (%s) is %s, max %s, %s free.",
178 name, path, fb1, fb5, fb6),
179 "JOURNAL_NAME=%s", name,
180 "JOURNAL_PATH=%s", path,
181 "CURRENT_USE=%"PRIu64, sum,
182 "CURRENT_USE_PRETTY=%s", fb1,
183 "MAX_USE=%"PRIu64, metrics->max_use,
184 "MAX_USE_PRETTY=%s", fb2,
185 "DISK_KEEP_FREE=%"PRIu64, metrics->keep_free,
186 "DISK_KEEP_FREE_PRETTY=%s", fb3,
187 "DISK_AVAILABLE=%"PRIu64, ss_avail,
188 "DISK_AVAILABLE_PRETTY=%s", fb4,
189 "LIMIT=%"PRIu64, s->cached_space_limit,
190 "LIMIT_PRETTY=%s", fb5,
191 "AVAILABLE=%"PRIu64, s->cached_space_available,
192 "AVAILABLE_PRETTY=%s", fb6,
8a03c9ef 193 NULL);
8580d1f7
LP
194 }
195
196 if (available)
197 *available = s->cached_space_available;
198 if (limit)
199 *limit = s->cached_space_limit;
200
201 return 1;
202}
203
204static int determine_space(Server *s, bool verbose, bool patch_min_use, uint64_t *available, uint64_t *limit) {
205 JournalMetrics *metrics;
206 const char *path, *name;
207
208 assert(s);
209
210 if (s->system_journal) {
211 path = "/var/log/journal/";
212 metrics = &s->system_metrics;
213 name = "System journal";
214 } else {
215 path = "/run/log/journal/";
216 metrics = &s->runtime_metrics;
217 name = "Runtime journal";
670b110c
ZJS
218 }
219
8580d1f7 220 return determine_space_for(s, metrics, path, name, verbose, patch_min_use, available, limit);
d025f1e4
ZJS
221}
222
5c3bde3f 223static void server_add_acls(JournalFile *f, uid_t uid) {
d025f1e4 224#ifdef HAVE_ACL
5c3bde3f 225 int r;
d025f1e4 226#endif
d025f1e4
ZJS
227 assert(f);
228
d025f1e4 229#ifdef HAVE_ACL
34c10968 230 if (uid <= SYSTEM_UID_MAX)
d025f1e4
ZJS
231 return;
232
5c3bde3f
ZJS
233 r = add_acls_for_user(f->fd, uid);
234 if (r < 0)
235 log_warning_errno(r, "Failed to set ACL on %s, ignoring: %m", f->path);
d025f1e4
ZJS
236#endif
237}
238
7a24f3bf
VC
239static int open_journal(
240 Server *s,
241 bool reliably,
242 const char *fname,
243 int flags,
244 bool seal,
245 JournalMetrics *metrics,
7a24f3bf
VC
246 JournalFile **ret) {
247 int r;
e167d7fd 248 JournalFile *f;
7a24f3bf
VC
249
250 assert(s);
251 assert(fname);
252 assert(ret);
253
254 if (reliably)
b58c888f 255 r = journal_file_open_reliably(fname, flags, 0640, s->compress, seal, metrics, s->mmap, s->deferred_closes, NULL, &f);
7a24f3bf 256 else
5d1ce257 257 r = journal_file_open(-1, fname, flags, 0640, s->compress, seal, metrics, s->mmap, s->deferred_closes, NULL, &f);
7a24f3bf
VC
258 if (r < 0)
259 return r;
260
e167d7fd 261 r = journal_file_enable_post_change_timer(f, s->event, POST_CHANGE_TIMER_INTERVAL_USEC);
7a24f3bf 262 if (r < 0) {
69a3a6fd 263 (void) journal_file_close(f);
7a24f3bf
VC
264 return r;
265 }
266
e167d7fd 267 *ret = f;
7a24f3bf
VC
268 return r;
269}
270
6431c7e2
VC
271static bool flushed_flag_is_set(void) {
272 return (access("/run/systemd/journal/flushed", F_OK) >= 0);
273}
274
105bdb46 275static int system_journal_open(Server *s, bool flush_requested) {
929eeb54 276 bool flushed = false;
105bdb46
VC
277 const char *fn;
278 int r = 0;
279
280 if (!s->system_journal &&
281 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
6431c7e2 282 (flush_requested || (flushed = flushed_flag_is_set()))) {
105bdb46
VC
283
284 /* If in auto mode: first try to create the machine
285 * path, but not the prefix.
286 *
287 * If in persistent mode: create /var/log/journal and
288 * the machine path */
289
290 if (s->storage == STORAGE_PERSISTENT)
291 (void) mkdir_p("/var/log/journal/", 0755);
292
293 fn = strjoina("/var/log/journal/", SERVER_MACHINE_ID(s));
294 (void) mkdir(fn, 0755);
295
296 fn = strjoina(fn, "/system.journal");
297 r = open_journal(s, true, fn, O_RDWR|O_CREAT, s->seal, &s->system_metrics, &s->system_journal);
298 if (r >= 0) {
299 server_add_acls(s->system_journal, 0);
300 (void) determine_space_for(s, &s->system_metrics, "/var/log/journal/", "System journal", true, true, NULL, NULL);
301 } else if (r < 0) {
302 if (r != -ENOENT && r != -EROFS)
303 log_warning_errno(r, "Failed to open system journal: %m");
304
305 r = 0;
306 }
929eeb54
VC
307
308 /* If the runtime journal is open, and we're post-flush, we're
309 * recovering from a failed system journal rotate (ENOSPC)
310 * for which the runtime journal was reopened.
311 *
312 * Perform an implicit flush to var, leaving the runtime
313 * journal closed, now that the system journal is back.
314 */
315 if (s->runtime_journal && flushed)
316 (void) server_flush_to_var(s);
105bdb46
VC
317 }
318
319 if (!s->runtime_journal &&
320 (s->storage != STORAGE_NONE)) {
321
322 fn = strjoina("/run/log/journal/", SERVER_MACHINE_ID(s), "/system.journal");
323
324 if (s->system_journal) {
325
326 /* Try to open the runtime journal, but only
327 * if it already exists, so that we can flush
328 * it into the system journal */
329
330 r = open_journal(s, false, fn, O_RDWR, false, &s->runtime_metrics, &s->runtime_journal);
331 if (r < 0) {
332 if (r != -ENOENT)
333 log_warning_errno(r, "Failed to open runtime journal: %m");
334
335 r = 0;
336 }
337
338 } else {
339
340 /* OK, we really need the runtime journal, so create
341 * it if necessary. */
342
343 (void) mkdir("/run/log", 0755);
344 (void) mkdir("/run/log/journal", 0755);
345 (void) mkdir_parents(fn, 0750);
346
347 r = open_journal(s, true, fn, O_RDWR|O_CREAT, false, &s->runtime_metrics, &s->runtime_journal);
348 if (r < 0)
349 return log_error_errno(r, "Failed to open runtime journal: %m");
350 }
351
352 if (s->runtime_journal) {
353 server_add_acls(s->runtime_journal, 0);
354 (void) determine_space_for(s, &s->runtime_metrics, "/run/log/journal/", "Runtime journal", true, true, NULL, NULL);
355 }
356 }
357
358 return r;
359}
360
d025f1e4 361static JournalFile* find_journal(Server *s, uid_t uid) {
ed375beb 362 _cleanup_free_ char *p = NULL;
d025f1e4
ZJS
363 int r;
364 JournalFile *f;
365 sd_id128_t machine;
366
367 assert(s);
368
105bdb46
VC
369 /* A rotate that fails to create the new journal (ENOSPC) leaves the
370 * rotated journal as NULL. Unless we revisit opening, even after
371 * space is made available we'll continue to return NULL indefinitely.
372 *
373 * system_journal_open() is a noop if the journals are already open, so
374 * we can just call it here to recover from failed rotates (or anything
375 * else that's left the journals as NULL).
376 *
377 * Fixes https://github.com/systemd/systemd/issues/3968 */
378 (void) system_journal_open(s, false);
379
d025f1e4
ZJS
380 /* We split up user logs only on /var, not on /run. If the
381 * runtime file is open, we write to it exclusively, in order
382 * to guarantee proper order as soon as we flush /run to
383 * /var and close the runtime file. */
384
385 if (s->runtime_journal)
386 return s->runtime_journal;
387
61755fda 388 if (uid <= SYSTEM_UID_MAX || uid_is_dynamic(uid))
d025f1e4
ZJS
389 return s->system_journal;
390
391 r = sd_id128_get_machine(&machine);
392 if (r < 0)
393 return s->system_journal;
394
4a0b58c4 395 f = ordered_hashmap_get(s->user_journals, UID_TO_PTR(uid));
d025f1e4
ZJS
396 if (f)
397 return f;
398
de0671ee
ZJS
399 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-"UID_FMT".journal",
400 SD_ID128_FORMAT_VAL(machine), uid) < 0)
d025f1e4
ZJS
401 return s->system_journal;
402
43cf8388 403 while (ordered_hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
d025f1e4 404 /* Too many open? Then let's close one */
43cf8388 405 f = ordered_hashmap_steal_first(s->user_journals);
d025f1e4 406 assert(f);
69a3a6fd 407 (void) journal_file_close(f);
d025f1e4
ZJS
408 }
409
089ed40b 410 r = open_journal(s, true, p, O_RDWR|O_CREAT, s->seal, &s->system_metrics, &f);
d025f1e4
ZJS
411 if (r < 0)
412 return s->system_journal;
413
5c3bde3f 414 server_add_acls(f, uid);
d025f1e4 415
4a0b58c4 416 r = ordered_hashmap_put(s->user_journals, UID_TO_PTR(uid), f);
d025f1e4 417 if (r < 0) {
69a3a6fd 418 (void) journal_file_close(f);
d025f1e4
ZJS
419 return s->system_journal;
420 }
421
422 return f;
423}
424
ea69bd41
LP
425static int do_rotate(
426 Server *s,
427 JournalFile **f,
428 const char* name,
429 bool seal,
430 uint32_t uid) {
431
fc55baee
ZJS
432 int r;
433 assert(s);
434
435 if (!*f)
436 return -EINVAL;
437
b58c888f 438 r = journal_file_rotate(f, s->compress, seal, s->deferred_closes);
fc55baee
ZJS
439 if (r < 0)
440 if (*f)
ea69bd41 441 log_error_errno(r, "Failed to rotate %s: %m", (*f)->path);
fc55baee 442 else
ea69bd41 443 log_error_errno(r, "Failed to create new %s journal: %m", name);
fc55baee 444 else
5c3bde3f 445 server_add_acls(*f, uid);
2678031a 446
fc55baee
ZJS
447 return r;
448}
449
d025f1e4
ZJS
450void server_rotate(Server *s) {
451 JournalFile *f;
452 void *k;
453 Iterator i;
454 int r;
455
456 log_debug("Rotating...");
457
8580d1f7
LP
458 (void) do_rotate(s, &s->runtime_journal, "runtime", false, 0);
459 (void) do_rotate(s, &s->system_journal, "system", s->seal, 0);
d025f1e4 460
43cf8388 461 ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
4a0b58c4 462 r = do_rotate(s, &f, "user", s->seal, PTR_TO_UID(k));
fc55baee 463 if (r >= 0)
43cf8388 464 ordered_hashmap_replace(s->user_journals, k, f);
fc55baee
ZJS
465 else if (!f)
466 /* Old file has been closed and deallocated */
43cf8388 467 ordered_hashmap_remove(s->user_journals, k);
d025f1e4 468 }
b58c888f
VC
469
470 /* Perform any deferred closes which aren't still offlining. */
471 SET_FOREACH(f, s->deferred_closes, i)
472 if (!journal_file_is_offlining(f)) {
473 (void) set_remove(s->deferred_closes, f);
474 (void) journal_file_close(f);
475 }
d025f1e4
ZJS
476}
477
26687bf8
OS
478void server_sync(Server *s) {
479 JournalFile *f;
26687bf8
OS
480 Iterator i;
481 int r;
482
26687bf8 483 if (s->system_journal) {
ac2e41f5 484 r = journal_file_set_offline(s->system_journal, false);
26687bf8 485 if (r < 0)
65089b82 486 log_warning_errno(r, "Failed to sync system journal, ignoring: %m");
26687bf8
OS
487 }
488
65c1d46b 489 ORDERED_HASHMAP_FOREACH(f, s->user_journals, i) {
ac2e41f5 490 r = journal_file_set_offline(f, false);
26687bf8 491 if (r < 0)
65089b82 492 log_warning_errno(r, "Failed to sync user journal, ignoring: %m");
26687bf8
OS
493 }
494
f9a810be
LP
495 if (s->sync_event_source) {
496 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
497 if (r < 0)
da927ba9 498 log_error_errno(r, "Failed to disable sync timer source: %m");
f9a810be 499 }
26687bf8
OS
500
501 s->sync_scheduled = false;
502}
503
ea69bd41
LP
504static void do_vacuum(
505 Server *s,
ea69bd41 506 JournalFile *f,
8580d1f7
LP
507 JournalMetrics *metrics,
508 const char *path,
509 const char *name,
510 bool verbose,
511 bool patch_min_use) {
ea69bd41
LP
512
513 const char *p;
8580d1f7 514 uint64_t limit;
63c8666b
ZJS
515 int r;
516
8580d1f7
LP
517 assert(s);
518 assert(metrics);
519 assert(path);
520 assert(name);
521
63c8666b
ZJS
522 if (!f)
523 return;
524
8580d1f7
LP
525 p = strjoina(path, SERVER_MACHINE_ID(s));
526
527 limit = metrics->max_use;
528 (void) determine_space_for(s, metrics, path, name, verbose, patch_min_use, NULL, &limit);
529
530 r = journal_directory_vacuum(p, limit, metrics->n_max_files, s->max_retention_usec, &s->oldest_file_usec, verbose);
63c8666b 531 if (r < 0 && r != -ENOENT)
8580d1f7 532 log_warning_errno(r, "Failed to vacuum %s, ignoring: %m", p);
63c8666b
ZJS
533}
534
8580d1f7
LP
535int server_vacuum(Server *s, bool verbose, bool patch_min_use) {
536 assert(s);
d025f1e4
ZJS
537
538 log_debug("Vacuuming...");
539
540 s->oldest_file_usec = 0;
541
8580d1f7
LP
542 do_vacuum(s, s->system_journal, &s->system_metrics, "/var/log/journal/", "System journal", verbose, patch_min_use);
543 do_vacuum(s, s->runtime_journal, &s->runtime_metrics, "/run/log/journal/", "Runtime journal", verbose, patch_min_use);
d025f1e4 544
8580d1f7
LP
545 s->cached_space_limit = 0;
546 s->cached_space_available = 0;
547 s->cached_space_timestamp = 0;
d025f1e4 548
8580d1f7 549 return 0;
d025f1e4
ZJS
550}
551
0c24bb23
LP
552static void server_cache_machine_id(Server *s) {
553 sd_id128_t id;
554 int r;
555
556 assert(s);
557
558 r = sd_id128_get_machine(&id);
559 if (r < 0)
560 return;
561
562 sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
563}
564
565static void server_cache_boot_id(Server *s) {
566 sd_id128_t id;
567 int r;
568
569 assert(s);
570
571 r = sd_id128_get_boot(&id);
572 if (r < 0)
573 return;
574
575 sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
576}
577
578static void server_cache_hostname(Server *s) {
579 _cleanup_free_ char *t = NULL;
580 char *x;
581
582 assert(s);
583
584 t = gethostname_malloc();
585 if (!t)
586 return;
587
588 x = strappend("_HOSTNAME=", t);
589 if (!x)
590 return;
591
592 free(s->hostname_field);
593 s->hostname_field = x;
594}
595
8531ae70 596static bool shall_try_append_again(JournalFile *f, int r) {
6e1045e5
ZJS
597 switch(r) {
598 case -E2BIG: /* Hit configured limit */
599 case -EFBIG: /* Hit fs limit */
600 case -EDQUOT: /* Quota limit hit */
601 case -ENOSPC: /* Disk full */
d025f1e4 602 log_debug("%s: Allocation limit reached, rotating.", f->path);
6e1045e5
ZJS
603 return true;
604 case -EIO: /* I/O error of some kind (mmap) */
605 log_warning("%s: IO error, rotating.", f->path);
606 return true;
607 case -EHOSTDOWN: /* Other machine */
d025f1e4 608 log_info("%s: Journal file from other machine, rotating.", f->path);
6e1045e5
ZJS
609 return true;
610 case -EBUSY: /* Unclean shutdown */
d025f1e4 611 log_info("%s: Unclean shutdown, rotating.", f->path);
6e1045e5
ZJS
612 return true;
613 case -EPROTONOSUPPORT: /* Unsupported feature */
d025f1e4 614 log_info("%s: Unsupported feature, rotating.", f->path);
6e1045e5
ZJS
615 return true;
616 case -EBADMSG: /* Corrupted */
617 case -ENODATA: /* Truncated */
618 case -ESHUTDOWN: /* Already archived */
d025f1e4 619 log_warning("%s: Journal file corrupted, rotating.", f->path);
6e1045e5
ZJS
620 return true;
621 case -EIDRM: /* Journal file has been deleted */
2678031a 622 log_warning("%s: Journal file has been deleted, rotating.", f->path);
6e1045e5
ZJS
623 return true;
624 default:
d025f1e4 625 return false;
6e1045e5 626 }
d025f1e4
ZJS
627}
628
d07f7b9e 629static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
d025f1e4
ZJS
630 JournalFile *f;
631 bool vacuumed = false;
632 int r;
633
634 assert(s);
635 assert(iovec);
636 assert(n > 0);
637
638 f = find_journal(s, uid);
639 if (!f)
640 return;
641
642 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
643 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
644 server_rotate(s);
8580d1f7 645 server_vacuum(s, false, false);
d025f1e4
ZJS
646 vacuumed = true;
647
648 f = find_journal(s, uid);
649 if (!f)
650 return;
651 }
652
653 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
26687bf8 654 if (r >= 0) {
d07f7b9e 655 server_schedule_sync(s, priority);
d025f1e4 656 return;
26687bf8 657 }
d025f1e4
ZJS
658
659 if (vacuumed || !shall_try_append_again(f, r)) {
8266e1c0 660 log_error_errno(r, "Failed to write entry (%d items, %zu bytes), ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
d025f1e4
ZJS
661 return;
662 }
663
664 server_rotate(s);
8580d1f7 665 server_vacuum(s, false, false);
d025f1e4
ZJS
666
667 f = find_journal(s, uid);
668 if (!f)
669 return;
670
671 log_debug("Retrying write.");
672 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
8266e1c0
LP
673 if (r < 0)
674 log_error_errno(r, "Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
675 else
d07f7b9e 676 server_schedule_sync(s, priority);
d025f1e4
ZJS
677}
678
4b58153d
LP
679static int get_invocation_id(const char *cgroup_root, const char *slice, const char *unit, char **ret) {
680 _cleanup_free_ char *escaped = NULL, *slice_path = NULL, *p = NULL;
681 char *copy, ids[SD_ID128_STRING_MAX];
682 int r;
683
684 /* Read the invocation ID of a unit off a unit. It's stored in the "trusted.invocation_id" extended attribute
685 * on the cgroup path. */
686
687 r = cg_slice_to_path(slice, &slice_path);
688 if (r < 0)
689 return r;
690
691 escaped = cg_escape(unit);
692 if (!escaped)
693 return -ENOMEM;
694
695 p = strjoin(cgroup_root, "/", slice_path, "/", escaped, NULL);
696 if (!p)
697 return -ENOMEM;
698
699 r = cg_get_xattr(SYSTEMD_CGROUP_CONTROLLER, p, "trusted.invocation_id", ids, 32);
700 if (r < 0)
701 return r;
702 if (r != 32)
703 return -EINVAL;
704 ids[32] = 0;
705
706 if (!id128_is_valid(ids))
707 return -EINVAL;
708
709 copy = strdup(ids);
710 if (!copy)
711 return -ENOMEM;
712
713 *ret = copy;
714 return 0;
715}
716
d025f1e4
ZJS
717static void dispatch_message_real(
718 Server *s,
719 struct iovec *iovec, unsigned n, unsigned m,
3b3154df
LP
720 const struct ucred *ucred,
721 const struct timeval *tv,
d025f1e4 722 const char *label, size_t label_len,
968f3196 723 const char *unit_id,
d07f7b9e 724 int priority,
968f3196 725 pid_t object_pid) {
d025f1e4 726
968f3196 727 char pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
ae018d9b
LP
728 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
729 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
730 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
d3789917 731 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
968f3196
ZJS
732 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
733 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
734 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
735 uid_t object_uid;
736 gid_t object_gid;
968f3196 737 char *x;
d025f1e4 738 int r;
ae018d9b 739 char *t, *c;
82499507
LP
740 uid_t realuid = 0, owner = 0, journal_uid;
741 bool owner_valid = false;
ae018d9b 742#ifdef HAVE_AUDIT
968f3196
ZJS
743 char audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
744 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
745 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
746 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
ae018d9b
LP
747
748 uint32_t audit;
749 uid_t loginuid;
750#endif
d025f1e4
ZJS
751
752 assert(s);
753 assert(iovec);
754 assert(n > 0);
d473176a 755 assert(n + N_IOVEC_META_FIELDS + (object_pid > 0 ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
d025f1e4
ZJS
756
757 if (ucred) {
d025f1e4
ZJS
758 realuid = ucred->uid;
759
de0671ee 760 sprintf(pid, "_PID="PID_FMT, ucred->pid);
c2457105 761 IOVEC_SET_STRING(iovec[n++], pid);
d025f1e4 762
de0671ee 763 sprintf(uid, "_UID="UID_FMT, ucred->uid);
c2457105 764 IOVEC_SET_STRING(iovec[n++], uid);
d025f1e4 765
de0671ee 766 sprintf(gid, "_GID="GID_FMT, ucred->gid);
c2457105 767 IOVEC_SET_STRING(iovec[n++], gid);
d025f1e4
ZJS
768
769 r = get_process_comm(ucred->pid, &t);
770 if (r >= 0) {
63c372cb 771 x = strjoina("_COMM=", t);
d025f1e4 772 free(t);
968f3196 773 IOVEC_SET_STRING(iovec[n++], x);
d025f1e4
ZJS
774 }
775
776 r = get_process_exe(ucred->pid, &t);
777 if (r >= 0) {
63c372cb 778 x = strjoina("_EXE=", t);
d025f1e4 779 free(t);
968f3196 780 IOVEC_SET_STRING(iovec[n++], x);
d025f1e4
ZJS
781 }
782
9bdbc2e2 783 r = get_process_cmdline(ucred->pid, 0, false, &t);
d025f1e4 784 if (r >= 0) {
63c372cb 785 x = strjoina("_CMDLINE=", t);
d025f1e4 786 free(t);
3a832116
SL
787 IOVEC_SET_STRING(iovec[n++], x);
788 }
789
790 r = get_process_capeff(ucred->pid, &t);
791 if (r >= 0) {
63c372cb 792 x = strjoina("_CAP_EFFECTIVE=", t);
3a832116 793 free(t);
968f3196 794 IOVEC_SET_STRING(iovec[n++], x);
d025f1e4
ZJS
795 }
796
0a20e3c1 797#ifdef HAVE_AUDIT
d025f1e4 798 r = audit_session_from_pid(ucred->pid, &audit);
ae018d9b 799 if (r >= 0) {
de0671ee 800 sprintf(audit_session, "_AUDIT_SESSION=%"PRIu32, audit);
ae018d9b
LP
801 IOVEC_SET_STRING(iovec[n++], audit_session);
802 }
d025f1e4
ZJS
803
804 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
7027ff61 805 if (r >= 0) {
de0671ee 806 sprintf(audit_loginuid, "_AUDIT_LOGINUID="UID_FMT, loginuid);
ae018d9b 807 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
d025f1e4 808 }
ae018d9b 809#endif
d025f1e4 810
e9174f29 811 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &c);
7027ff61 812 if (r >= 0) {
4b58153d 813 _cleanup_free_ char *raw_unit = NULL, *raw_slice = NULL;
968f3196
ZJS
814 char *session = NULL;
815
63c372cb 816 x = strjoina("_SYSTEMD_CGROUP=", c);
968f3196 817 IOVEC_SET_STRING(iovec[n++], x);
d025f1e4 818
ae018d9b
LP
819 r = cg_path_get_session(c, &t);
820 if (r >= 0) {
63c372cb 821 session = strjoina("_SYSTEMD_SESSION=", t);
ae018d9b 822 free(t);
d025f1e4 823 IOVEC_SET_STRING(iovec[n++], session);
ae018d9b
LP
824 }
825
826 if (cg_path_get_owner_uid(c, &owner) >= 0) {
827 owner_valid = true;
d025f1e4 828
de0671ee 829 sprintf(owner_uid, "_SYSTEMD_OWNER_UID="UID_FMT, owner);
d025f1e4 830 IOVEC_SET_STRING(iovec[n++], owner_uid);
ae018d9b 831 }
d025f1e4 832
4b58153d
LP
833 if (cg_path_get_unit(c, &raw_unit) >= 0) {
834 x = strjoina("_SYSTEMD_UNIT=", raw_unit);
19cace37
LP
835 IOVEC_SET_STRING(iovec[n++], x);
836 } else if (unit_id && !session) {
63c372cb 837 x = strjoina("_SYSTEMD_UNIT=", unit_id);
19cace37
LP
838 IOVEC_SET_STRING(iovec[n++], x);
839 }
840
841 if (cg_path_get_user_unit(c, &t) >= 0) {
63c372cb 842 x = strjoina("_SYSTEMD_USER_UNIT=", t);
ae018d9b 843 free(t);
968f3196 844 IOVEC_SET_STRING(iovec[n++], x);
19cace37 845 } else if (unit_id && session) {
63c372cb 846 x = strjoina("_SYSTEMD_USER_UNIT=", unit_id);
19cace37
LP
847 IOVEC_SET_STRING(iovec[n++], x);
848 }
ae018d9b 849
4b58153d
LP
850 if (cg_path_get_slice(c, &raw_slice) >= 0) {
851 x = strjoina("_SYSTEMD_SLICE=", raw_slice);
0a244b8e
LP
852 IOVEC_SET_STRING(iovec[n++], x);
853 }
854
d473176a
LP
855 if (cg_path_get_user_slice(c, &t) >= 0) {
856 x = strjoina("_SYSTEMD_USER_SLICE=", t);
857 free(t);
858 IOVEC_SET_STRING(iovec[n++], x);
859 }
860
4b58153d
LP
861 if (raw_slice && raw_unit) {
862 if (get_invocation_id(s->cgroup_root, raw_slice, raw_unit, &t) >= 0) {
863 x = strjoina("_SYSTEMD_INVOCATION_ID=", t);
864 free(t);
865 IOVEC_SET_STRING(iovec[n++], x);
866 }
867 }
868
ae018d9b 869 free(c);
2d43b190 870 } else if (unit_id) {
63c372cb 871 x = strjoina("_SYSTEMD_UNIT=", unit_id);
2d43b190 872 IOVEC_SET_STRING(iovec[n++], x);
ef1673d1 873 }
d025f1e4 874
d025f1e4 875#ifdef HAVE_SELINUX
6355e756 876 if (mac_selinux_have()) {
d682b3a7 877 if (label) {
f8294e41 878 x = alloca(strlen("_SELINUX_CONTEXT=") + label_len + 1);
ae018d9b 879
d682b3a7
LP
880 *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
881 IOVEC_SET_STRING(iovec[n++], x);
882 } else {
2ed96880 883 char *con;
d025f1e4 884
d682b3a7 885 if (getpidcon(ucred->pid, &con) >= 0) {
63c372cb 886 x = strjoina("_SELINUX_CONTEXT=", con);
e7ff4e7f 887
d682b3a7
LP
888 freecon(con);
889 IOVEC_SET_STRING(iovec[n++], x);
890 }
d025f1e4
ZJS
891 }
892 }
893#endif
894 }
968f3196
ZJS
895 assert(n <= m);
896
897 if (object_pid) {
898 r = get_process_uid(object_pid, &object_uid);
899 if (r >= 0) {
de0671ee 900 sprintf(o_uid, "OBJECT_UID="UID_FMT, object_uid);
968f3196
ZJS
901 IOVEC_SET_STRING(iovec[n++], o_uid);
902 }
903
904 r = get_process_gid(object_pid, &object_gid);
905 if (r >= 0) {
de0671ee 906 sprintf(o_gid, "OBJECT_GID="GID_FMT, object_gid);
968f3196
ZJS
907 IOVEC_SET_STRING(iovec[n++], o_gid);
908 }
909
910 r = get_process_comm(object_pid, &t);
911 if (r >= 0) {
63c372cb 912 x = strjoina("OBJECT_COMM=", t);
968f3196
ZJS
913 free(t);
914 IOVEC_SET_STRING(iovec[n++], x);
915 }
916
917 r = get_process_exe(object_pid, &t);
918 if (r >= 0) {
63c372cb 919 x = strjoina("OBJECT_EXE=", t);
968f3196
ZJS
920 free(t);
921 IOVEC_SET_STRING(iovec[n++], x);
922 }
923
924 r = get_process_cmdline(object_pid, 0, false, &t);
925 if (r >= 0) {
63c372cb 926 x = strjoina("OBJECT_CMDLINE=", t);
968f3196
ZJS
927 free(t);
928 IOVEC_SET_STRING(iovec[n++], x);
929 }
930
931#ifdef HAVE_AUDIT
932 r = audit_session_from_pid(object_pid, &audit);
933 if (r >= 0) {
de0671ee 934 sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%"PRIu32, audit);
968f3196
ZJS
935 IOVEC_SET_STRING(iovec[n++], o_audit_session);
936 }
937
938 r = audit_loginuid_from_pid(object_pid, &loginuid);
939 if (r >= 0) {
de0671ee 940 sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID="UID_FMT, loginuid);
968f3196
ZJS
941 IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
942 }
943#endif
944
e9174f29 945 r = cg_pid_get_path_shifted(object_pid, s->cgroup_root, &c);
968f3196 946 if (r >= 0) {
63c372cb 947 x = strjoina("OBJECT_SYSTEMD_CGROUP=", c);
968f3196
ZJS
948 IOVEC_SET_STRING(iovec[n++], x);
949
950 r = cg_path_get_session(c, &t);
951 if (r >= 0) {
63c372cb 952 x = strjoina("OBJECT_SYSTEMD_SESSION=", t);
968f3196
ZJS
953 free(t);
954 IOVEC_SET_STRING(iovec[n++], x);
955 }
956
957 if (cg_path_get_owner_uid(c, &owner) >= 0) {
de0671ee 958 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID="UID_FMT, owner);
968f3196
ZJS
959 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
960 }
961
962 if (cg_path_get_unit(c, &t) >= 0) {
63c372cb 963 x = strjoina("OBJECT_SYSTEMD_UNIT=", t);
968f3196 964 free(t);
19cace37
LP
965 IOVEC_SET_STRING(iovec[n++], x);
966 }
967
968 if (cg_path_get_user_unit(c, &t) >= 0) {
63c372cb 969 x = strjoina("OBJECT_SYSTEMD_USER_UNIT=", t);
968f3196 970 free(t);
968f3196 971 IOVEC_SET_STRING(iovec[n++], x);
19cace37 972 }
968f3196 973
d473176a
LP
974 if (cg_path_get_slice(c, &t) >= 0) {
975 x = strjoina("OBJECT_SYSTEMD_SLICE=", t);
976 free(t);
977 IOVEC_SET_STRING(iovec[n++], x);
978 }
979
980 if (cg_path_get_user_slice(c, &t) >= 0) {
981 x = strjoina("OBJECT_SYSTEMD_USER_SLICE=", t);
982 free(t);
983 IOVEC_SET_STRING(iovec[n++], x);
984 }
985
968f3196
ZJS
986 free(c);
987 }
988 }
989 assert(n <= m);
d025f1e4
ZJS
990
991 if (tv) {
398a50cd 992 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=" USEC_FMT, timeval_load(tv));
a5693989 993 IOVEC_SET_STRING(iovec[n++], source_time);
d025f1e4
ZJS
994 }
995
996 /* Note that strictly speaking storing the boot id here is
997 * redundant since the entry includes this in-line
998 * anyway. However, we need this indexed, too. */
0c24bb23
LP
999 if (!isempty(s->boot_id_field))
1000 IOVEC_SET_STRING(iovec[n++], s->boot_id_field);
d025f1e4 1001
0c24bb23
LP
1002 if (!isempty(s->machine_id_field))
1003 IOVEC_SET_STRING(iovec[n++], s->machine_id_field);
d025f1e4 1004
0c24bb23
LP
1005 if (!isempty(s->hostname_field))
1006 IOVEC_SET_STRING(iovec[n++], s->hostname_field);
d025f1e4
ZJS
1007
1008 assert(n <= m);
1009
da499392 1010 if (s->split_mode == SPLIT_UID && realuid > 0)
40adcda8 1011 /* Split up strictly by any UID */
759c945a 1012 journal_uid = realuid;
82499507 1013 else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
edc3797f
LP
1014 /* Split up by login UIDs. We do this only if the
1015 * realuid is not root, in order not to accidentally
1016 * leak privileged information to the user that is
1017 * logged by a privileged process that is part of an
7517e174 1018 * unprivileged session. */
8a0889df 1019 journal_uid = owner;
da499392
KS
1020 else
1021 journal_uid = 0;
759c945a 1022
d07f7b9e 1023 write_to_journal(s, journal_uid, iovec, n, priority);
d025f1e4
ZJS
1024}
1025
1026void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
1027 char mid[11 + 32 + 1];
8a03c9ef
ZJS
1028 struct iovec iovec[N_IOVEC_META_FIELDS + 5 + N_IOVEC_PAYLOAD_FIELDS];
1029 unsigned n = 0, m;
32917e33 1030 int r;
d025f1e4 1031 va_list ap;
b92bea5d 1032 struct ucred ucred = {};
d025f1e4
ZJS
1033
1034 assert(s);
1035 assert(format);
1036
4850d39a 1037 assert_cc(3 == LOG_FAC(LOG_DAEMON));
b6fa2555
EV
1038 IOVEC_SET_STRING(iovec[n++], "SYSLOG_FACILITY=3");
1039 IOVEC_SET_STRING(iovec[n++], "SYSLOG_IDENTIFIER=systemd-journald");
1040
d025f1e4 1041 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
4850d39a 1042 assert_cc(6 == LOG_INFO);
32917e33 1043 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
d025f1e4 1044
3bbaff3e 1045 if (!sd_id128_is_null(message_id)) {
e2cc6eca 1046 snprintf(mid, sizeof(mid), LOG_MESSAGE_ID(message_id));
d025f1e4
ZJS
1047 IOVEC_SET_STRING(iovec[n++], mid);
1048 }
1049
8a03c9ef
ZJS
1050 m = n;
1051
1052 va_start(ap, format);
32917e33
ZJS
1053 r = log_format_iovec(iovec, ELEMENTSOF(iovec), &n, false, 0, format, ap);
1054 /* Error handling below */
8a03c9ef
ZJS
1055 va_end(ap);
1056
d025f1e4
ZJS
1057 ucred.pid = getpid();
1058 ucred.uid = getuid();
1059 ucred.gid = getgid();
1060
32917e33
ZJS
1061 if (r >= 0)
1062 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
8a03c9ef
ZJS
1063
1064 while (m < n)
1065 free(iovec[m++].iov_base);
32917e33
ZJS
1066
1067 if (r < 0) {
1068 /* We failed to format the message. Emit a warning instead. */
1069 char buf[LINE_MAX];
1070
1071 xsprintf(buf, "MESSAGE=Entry printing failed: %s", strerror(-r));
1072
1073 n = 3;
1074 IOVEC_SET_STRING(iovec[n++], "PRIORITY=4");
1075 IOVEC_SET_STRING(iovec[n++], buf);
1076 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
1077 }
d025f1e4
ZJS
1078}
1079
1080void server_dispatch_message(
1081 Server *s,
1082 struct iovec *iovec, unsigned n, unsigned m,
3b3154df
LP
1083 const struct ucred *ucred,
1084 const struct timeval *tv,
d025f1e4
ZJS
1085 const char *label, size_t label_len,
1086 const char *unit_id,
968f3196
ZJS
1087 int priority,
1088 pid_t object_pid) {
d025f1e4 1089
7027ff61 1090 int rl, r;
7fd1b19b 1091 _cleanup_free_ char *path = NULL;
8580d1f7 1092 uint64_t available = 0;
db91ea32 1093 char *c;
d025f1e4
ZJS
1094
1095 assert(s);
1096 assert(iovec || n == 0);
1097
1098 if (n == 0)
1099 return;
1100
1101 if (LOG_PRI(priority) > s->max_level_store)
1102 return;
1103
2f5df74a
HHPF
1104 /* Stop early in case the information will not be stored
1105 * in a journal. */
1106 if (s->storage == STORAGE_NONE)
1107 return;
1108
d025f1e4
ZJS
1109 if (!ucred)
1110 goto finish;
1111
e9174f29 1112 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &path);
7027ff61 1113 if (r < 0)
d025f1e4
ZJS
1114 goto finish;
1115
1116 /* example: /user/lennart/3/foobar
1117 * /system/dbus.service/foobar
1118 *
1119 * So let's cut of everything past the third /, since that is
1120 * where user directories start */
1121
1122 c = strchr(path, '/');
1123 if (c) {
1124 c = strchr(c+1, '/');
1125 if (c) {
1126 c = strchr(c+1, '/');
1127 if (c)
1128 *c = 0;
1129 }
1130 }
1131
8580d1f7
LP
1132 (void) determine_space(s, false, false, &available, NULL);
1133 rl = journal_rate_limit_test(s->rate_limit, path, priority & LOG_PRIMASK, available);
db91ea32 1134 if (rl == 0)
d025f1e4 1135 return;
d025f1e4
ZJS
1136
1137 /* Write a suppression message if we suppressed something */
1138 if (rl > 1)
db91ea32 1139 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
8a03c9ef
ZJS
1140 LOG_MESSAGE("Suppressed %u messages from %s", rl - 1, path),
1141 NULL);
d025f1e4
ZJS
1142
1143finish:
d07f7b9e 1144 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, priority, object_pid);
d025f1e4
ZJS
1145}
1146
d025f1e4 1147int server_flush_to_var(Server *s) {
d025f1e4
ZJS
1148 sd_id128_t machine;
1149 sd_journal *j = NULL;
fbb63411
LP
1150 char ts[FORMAT_TIMESPAN_MAX];
1151 usec_t start;
1152 unsigned n = 0;
1153 int r;
d025f1e4
ZJS
1154
1155 assert(s);
1156
1157 if (s->storage != STORAGE_AUTO &&
1158 s->storage != STORAGE_PERSISTENT)
1159 return 0;
1160
1161 if (!s->runtime_journal)
1162 return 0;
1163
8580d1f7 1164 (void) system_journal_open(s, true);
d025f1e4
ZJS
1165
1166 if (!s->system_journal)
1167 return 0;
1168
1169 log_debug("Flushing to /var...");
1170
fbb63411
LP
1171 start = now(CLOCK_MONOTONIC);
1172
d025f1e4 1173 r = sd_id128_get_machine(&machine);
00a16861 1174 if (r < 0)
d025f1e4 1175 return r;
d025f1e4
ZJS
1176
1177 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
23bbb0de
MS
1178 if (r < 0)
1179 return log_error_errno(r, "Failed to read runtime journal: %m");
d025f1e4 1180
93b73b06
LP
1181 sd_journal_set_data_threshold(j, 0);
1182
d025f1e4
ZJS
1183 SD_JOURNAL_FOREACH(j) {
1184 Object *o = NULL;
1185 JournalFile *f;
1186
1187 f = j->current_file;
1188 assert(f && f->current_offset > 0);
1189
fbb63411
LP
1190 n++;
1191
d025f1e4
ZJS
1192 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1193 if (r < 0) {
da927ba9 1194 log_error_errno(r, "Can't read entry: %m");
d025f1e4
ZJS
1195 goto finish;
1196 }
1197
1198 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1199 if (r >= 0)
1200 continue;
1201
1202 if (!shall_try_append_again(s->system_journal, r)) {
da927ba9 1203 log_error_errno(r, "Can't write entry: %m");
d025f1e4
ZJS
1204 goto finish;
1205 }
1206
1207 server_rotate(s);
8580d1f7 1208 server_vacuum(s, false, false);
d025f1e4 1209
253f59df
LP
1210 if (!s->system_journal) {
1211 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1212 r = -EIO;
1213 goto finish;
1214 }
1215
d025f1e4
ZJS
1216 log_debug("Retrying write.");
1217 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1218 if (r < 0) {
da927ba9 1219 log_error_errno(r, "Can't write entry: %m");
d025f1e4
ZJS
1220 goto finish;
1221 }
1222 }
1223
804ae586
LP
1224 r = 0;
1225
d025f1e4
ZJS
1226finish:
1227 journal_file_post_change(s->system_journal);
1228
804ae586 1229 s->runtime_journal = journal_file_close(s->runtime_journal);
d025f1e4
ZJS
1230
1231 if (r >= 0)
c6878637 1232 (void) rm_rf("/run/log/journal", REMOVE_ROOT);
d025f1e4 1233
763c7aa2 1234 sd_journal_close(j);
d025f1e4 1235
8a03c9ef
ZJS
1236 server_driver_message(s, SD_ID128_NULL,
1237 LOG_MESSAGE("Time spent on flushing to /var is %s for %u entries.",
1238 format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0),
1239 n),
1240 NULL);
fbb63411 1241
d025f1e4
ZJS
1242 return r;
1243}
1244
8531ae70 1245int server_process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
f9a810be 1246 Server *s = userdata;
a315ac4e
LP
1247 struct ucred *ucred = NULL;
1248 struct timeval *tv = NULL;
1249 struct cmsghdr *cmsg;
1250 char *label = NULL;
1251 size_t label_len = 0, m;
1252 struct iovec iovec;
1253 ssize_t n;
1254 int *fds = NULL, v = 0;
1255 unsigned n_fds = 0;
1256
1257 union {
1258 struct cmsghdr cmsghdr;
1259
1260 /* We use NAME_MAX space for the SELinux label
1261 * here. The kernel currently enforces no
1262 * limit, but according to suggestions from
1263 * the SELinux people this will change and it
1264 * will probably be identical to NAME_MAX. For
1265 * now we use that, but this should be updated
1266 * one day when the final limit is known. */
1267 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1268 CMSG_SPACE(sizeof(struct timeval)) +
1269 CMSG_SPACE(sizeof(int)) + /* fd */
1270 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1271 } control = {};
1272
1273 union sockaddr_union sa = {};
1274
1275 struct msghdr msghdr = {
1276 .msg_iov = &iovec,
1277 .msg_iovlen = 1,
1278 .msg_control = &control,
1279 .msg_controllen = sizeof(control),
1280 .msg_name = &sa,
1281 .msg_namelen = sizeof(sa),
1282 };
f9a810be 1283
d025f1e4 1284 assert(s);
875c2e22 1285 assert(fd == s->native_fd || fd == s->syslog_fd || fd == s->audit_fd);
f9a810be
LP
1286
1287 if (revents != EPOLLIN) {
1288 log_error("Got invalid event from epoll for datagram fd: %"PRIx32, revents);
1289 return -EIO;
1290 }
1291
a315ac4e
LP
1292 /* Try to get the right size, if we can. (Not all
1293 * sockets support SIOCINQ, hence we just try, but
1294 * don't rely on it. */
1295 (void) ioctl(fd, SIOCINQ, &v);
d025f1e4 1296
a315ac4e
LP
1297 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1298 m = PAGE_ALIGN(MAX3((size_t) v + 1,
1299 (size_t) LINE_MAX,
1300 ALIGN(sizeof(struct nlmsghdr)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH)) + 1);
d025f1e4 1301
a315ac4e
LP
1302 if (!GREEDY_REALLOC(s->buffer, s->buffer_size, m))
1303 return log_oom();
875c2e22 1304
a315ac4e
LP
1305 iovec.iov_base = s->buffer;
1306 iovec.iov_len = s->buffer_size - 1; /* Leave room for trailing NUL we add later */
d025f1e4 1307
a315ac4e
LP
1308 n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1309 if (n < 0) {
1310 if (errno == EINTR || errno == EAGAIN)
1311 return 0;
875c2e22 1312
a315ac4e
LP
1313 return log_error_errno(errno, "recvmsg() failed: %m");
1314 }
875c2e22 1315
a315ac4e
LP
1316 CMSG_FOREACH(cmsg, &msghdr) {
1317
1318 if (cmsg->cmsg_level == SOL_SOCKET &&
1319 cmsg->cmsg_type == SCM_CREDENTIALS &&
1320 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1321 ucred = (struct ucred*) CMSG_DATA(cmsg);
1322 else if (cmsg->cmsg_level == SOL_SOCKET &&
1323 cmsg->cmsg_type == SCM_SECURITY) {
1324 label = (char*) CMSG_DATA(cmsg);
1325 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1326 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1327 cmsg->cmsg_type == SO_TIMESTAMP &&
1328 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1329 tv = (struct timeval*) CMSG_DATA(cmsg);
1330 else if (cmsg->cmsg_level == SOL_SOCKET &&
1331 cmsg->cmsg_type == SCM_RIGHTS) {
1332 fds = (int*) CMSG_DATA(cmsg);
1333 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
d025f1e4 1334 }
a315ac4e 1335 }
d025f1e4 1336
a315ac4e
LP
1337 /* And a trailing NUL, just in case */
1338 s->buffer[n] = 0;
1339
1340 if (fd == s->syslog_fd) {
1341 if (n > 0 && n_fds == 0)
1342 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1343 else if (n_fds > 0)
1344 log_warning("Got file descriptors via syslog socket. Ignoring.");
1345
1346 } else if (fd == s->native_fd) {
1347 if (n > 0 && n_fds == 0)
1348 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1349 else if (n == 0 && n_fds == 1)
1350 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1351 else if (n_fds > 0)
1352 log_warning("Got too many file descriptors via native socket. Ignoring.");
1353
1354 } else {
1355 assert(fd == s->audit_fd);
1356
1357 if (n > 0 && n_fds == 0)
1358 server_process_audit_message(s, s->buffer, n, ucred, &sa, msghdr.msg_namelen);
1359 else if (n_fds > 0)
1360 log_warning("Got file descriptors via audit socket. Ignoring.");
f9a810be 1361 }
a315ac4e
LP
1362
1363 close_many(fds, n_fds);
1364 return 0;
f9a810be 1365}
d025f1e4 1366
f9a810be
LP
1367static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1368 Server *s = userdata;
33d52ab9 1369 int r;
d025f1e4 1370
f9a810be 1371 assert(s);
d025f1e4 1372
94b65516 1373 log_info("Received request to flush runtime journal from PID " PID_FMT, si->ssi_pid);
d025f1e4 1374
929eeb54 1375 (void) server_flush_to_var(s);
f9a810be 1376 server_sync(s);
8580d1f7 1377 server_vacuum(s, false, false);
d025f1e4 1378
33d52ab9
LP
1379 r = touch("/run/systemd/journal/flushed");
1380 if (r < 0)
1381 log_warning_errno(r, "Failed to touch /run/systemd/journal/flushed, ignoring: %m");
74055aa7 1382
f9a810be
LP
1383 return 0;
1384}
d025f1e4 1385
f9a810be
LP
1386static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1387 Server *s = userdata;
33d52ab9 1388 int r;
d025f1e4 1389
f9a810be 1390 assert(s);
d025f1e4 1391
94b65516 1392 log_info("Received request to rotate journal from PID " PID_FMT, si->ssi_pid);
f9a810be 1393 server_rotate(s);
8580d1f7 1394 server_vacuum(s, true, true);
d025f1e4 1395
dbd6e31c 1396 /* Let clients know when the most recent rotation happened. */
33d52ab9
LP
1397 r = write_timestamp_file_atomic("/run/systemd/journal/rotated", now(CLOCK_MONOTONIC));
1398 if (r < 0)
1399 log_warning_errno(r, "Failed to write /run/systemd/journal/rotated, ignoring: %m");
dbd6e31c 1400
f9a810be
LP
1401 return 0;
1402}
d025f1e4 1403
f9a810be
LP
1404static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1405 Server *s = userdata;
d025f1e4 1406
f9a810be 1407 assert(s);
d025f1e4 1408
4daf54a8 1409 log_received_signal(LOG_INFO, si);
d025f1e4 1410
6203e07a 1411 sd_event_exit(s->event, 0);
d025f1e4
ZJS
1412 return 0;
1413}
1414
94b65516
LP
1415static int dispatch_sigrtmin1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1416 Server *s = userdata;
33d52ab9 1417 int r;
94b65516
LP
1418
1419 assert(s);
1420
1421 log_debug("Received request to sync from PID " PID_FMT, si->ssi_pid);
1422
1423 server_sync(s);
1424
1425 /* Let clients know when the most recent sync happened. */
33d52ab9
LP
1426 r = write_timestamp_file_atomic("/run/systemd/journal/synced", now(CLOCK_MONOTONIC));
1427 if (r < 0)
1428 log_warning_errno(r, "Failed to write /run/systemd/journal/synced, ignoring: %m");
94b65516
LP
1429
1430 return 0;
1431}
1432
f9a810be 1433static int setup_signals(Server *s) {
f9a810be 1434 int r;
d025f1e4
ZJS
1435
1436 assert(s);
1437
94b65516 1438 assert(sigprocmask_many(SIG_SETMASK, NULL, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, SIGRTMIN+1, -1) >= 0);
d025f1e4 1439
151b9b96 1440 r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
f9a810be
LP
1441 if (r < 0)
1442 return r;
1443
151b9b96 1444 r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s);
f9a810be
LP
1445 if (r < 0)
1446 return r;
d025f1e4 1447
151b9b96 1448 r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s);
f9a810be
LP
1449 if (r < 0)
1450 return r;
d025f1e4 1451
b374689c
LP
1452 /* Let's process SIGTERM late, so that we flush all queued
1453 * messages to disk before we exit */
1454 r = sd_event_source_set_priority(s->sigterm_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1455 if (r < 0)
1456 return r;
1457
1458 /* When journald is invoked on the terminal (when debugging),
1459 * it's useful if C-c is handled equivalent to SIGTERM. */
151b9b96 1460 r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s);
f9a810be
LP
1461 if (r < 0)
1462 return r;
d025f1e4 1463
b374689c
LP
1464 r = sd_event_source_set_priority(s->sigint_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1465 if (r < 0)
1466 return r;
1467
94b65516
LP
1468 /* SIGRTMIN+1 causes an immediate sync. We process this very
1469 * late, so that everything else queued at this point is
1470 * really written to disk. Clients can watch
1471 * /run/systemd/journal/synced with inotify until its mtime
1472 * changes to see when a sync happened. */
1473 r = sd_event_add_signal(s->event, &s->sigrtmin1_event_source, SIGRTMIN+1, dispatch_sigrtmin1, s);
1474 if (r < 0)
1475 return r;
1476
1477 r = sd_event_source_set_priority(s->sigrtmin1_event_source, SD_EVENT_PRIORITY_NORMAL+15);
1478 if (r < 0)
1479 return r;
1480
d025f1e4
ZJS
1481 return 0;
1482}
1483
1484static int server_parse_proc_cmdline(Server *s) {
7fd1b19b 1485 _cleanup_free_ char *line = NULL;
d581d9d9 1486 const char *p;
74df0fca 1487 int r;
d025f1e4 1488
74df0fca 1489 r = proc_cmdline(&line);
b5884878 1490 if (r < 0) {
da927ba9 1491 log_warning_errno(r, "Failed to read /proc/cmdline, ignoring: %m");
d025f1e4 1492 return 0;
b5884878 1493 }
d025f1e4 1494
d581d9d9 1495 p = line;
9ed794a3 1496 for (;;) {
ff82c36c 1497 _cleanup_free_ char *word = NULL;
d025f1e4 1498
d581d9d9
SS
1499 r = extract_first_word(&p, &word, NULL, 0);
1500 if (r < 0)
1501 return log_error_errno(r, "Failed to parse journald syntax \"%s\": %m", line);
1502
1503 if (r == 0)
1504 break;
d025f1e4
ZJS
1505
1506 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1507 r = parse_boolean(word + 35);
1508 if (r < 0)
1509 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1510 else
1511 s->forward_to_syslog = r;
1512 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1513 r = parse_boolean(word + 33);
1514 if (r < 0)
1515 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1516 else
1517 s->forward_to_kmsg = r;
1518 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1519 r = parse_boolean(word + 36);
1520 if (r < 0)
1521 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1522 else
1523 s->forward_to_console = r;
40b71e89
ST
1524 } else if (startswith(word, "systemd.journald.forward_to_wall=")) {
1525 r = parse_boolean(word + 33);
1526 if (r < 0)
1527 log_warning("Failed to parse forward to wall switch %s. Ignoring.", word + 33);
1528 else
1529 s->forward_to_wall = r;
d025f1e4
ZJS
1530 } else if (startswith(word, "systemd.journald"))
1531 log_warning("Invalid systemd.journald parameter. Ignoring.");
d025f1e4
ZJS
1532 }
1533
804ae586 1534 /* do not warn about state here, since probably systemd already did */
db91ea32 1535 return 0;
d025f1e4
ZJS
1536}
1537
1538static int server_parse_config_file(Server *s) {
d025f1e4
ZJS
1539 assert(s);
1540
43688c49 1541 return config_parse_many_nulstr(PKGSYSCONFDIR "/journald.conf",
75eb6154 1542 CONF_PATHS_NULSTR("systemd/journald.conf.d"),
a9edaeff
JT
1543 "Journal\0",
1544 config_item_perf_lookup, journald_gperf_lookup,
1545 false, s);
d025f1e4
ZJS
1546}
1547
f9a810be
LP
1548static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
1549 Server *s = userdata;
26687bf8
OS
1550
1551 assert(s);
1552
f9a810be 1553 server_sync(s);
26687bf8
OS
1554 return 0;
1555}
1556
d07f7b9e 1557int server_schedule_sync(Server *s, int priority) {
26687bf8
OS
1558 int r;
1559
26687bf8
OS
1560 assert(s);
1561
d07f7b9e
LP
1562 if (priority <= LOG_CRIT) {
1563 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1564 server_sync(s);
1565 return 0;
1566 }
1567
26687bf8
OS
1568 if (s->sync_scheduled)
1569 return 0;
1570
f9a810be
LP
1571 if (s->sync_interval_usec > 0) {
1572 usec_t when;
ca267016 1573
6a0f1f6d 1574 r = sd_event_now(s->event, CLOCK_MONOTONIC, &when);
f9a810be
LP
1575 if (r < 0)
1576 return r;
26687bf8 1577
f9a810be
LP
1578 when += s->sync_interval_usec;
1579
1580 if (!s->sync_event_source) {
6a0f1f6d
LP
1581 r = sd_event_add_time(
1582 s->event,
1583 &s->sync_event_source,
1584 CLOCK_MONOTONIC,
1585 when, 0,
1586 server_dispatch_sync, s);
f9a810be
LP
1587 if (r < 0)
1588 return r;
1589
1590 r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
1591 } else {
1592 r = sd_event_source_set_time(s->sync_event_source, when);
1593 if (r < 0)
1594 return r;
1595
1596 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
1597 }
26687bf8 1598 if (r < 0)
f9a810be 1599 return r;
26687bf8 1600
f9a810be
LP
1601 s->sync_scheduled = true;
1602 }
26687bf8
OS
1603
1604 return 0;
1605}
1606
0c24bb23
LP
1607static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1608 Server *s = userdata;
1609
1610 assert(s);
1611
1612 server_cache_hostname(s);
1613 return 0;
1614}
1615
1616static int server_open_hostname(Server *s) {
1617 int r;
1618
1619 assert(s);
1620
1621 s->hostname_fd = open("/proc/sys/kernel/hostname", O_RDONLY|O_CLOEXEC|O_NDELAY|O_NOCTTY);
4a62c710
MS
1622 if (s->hostname_fd < 0)
1623 return log_error_errno(errno, "Failed to open /proc/sys/kernel/hostname: %m");
0c24bb23 1624
151b9b96 1625 r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s);
0c24bb23 1626 if (r < 0) {
28def94c
DR
1627 /* kernels prior to 3.2 don't support polling this file. Ignore
1628 * the failure. */
1629 if (r == -EPERM) {
e53fc357 1630 log_warning_errno(r, "Failed to register hostname fd in event loop, ignoring: %m");
03e334a1 1631 s->hostname_fd = safe_close(s->hostname_fd);
28def94c
DR
1632 return 0;
1633 }
1634
23bbb0de 1635 return log_error_errno(r, "Failed to register hostname fd in event loop: %m");
0c24bb23
LP
1636 }
1637
1638 r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
23bbb0de
MS
1639 if (r < 0)
1640 return log_error_errno(r, "Failed to adjust priority of host name event source: %m");
0c24bb23
LP
1641
1642 return 0;
1643}
1644
e22aa3d3
LP
1645static int dispatch_notify_event(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1646 Server *s = userdata;
1647 int r;
1648
1649 assert(s);
1650 assert(s->notify_event_source == es);
1651 assert(s->notify_fd == fd);
1652
e22aa3d3 1653 /* The $NOTIFY_SOCKET is writable again, now send exactly one
dd835265 1654 * message on it. Either it's the watchdog event, the initial
119e9655
LP
1655 * READY=1 event or an stdout stream event. If there's nothing
1656 * to write anymore, turn our event source off. The next time
1657 * there's something to send it will be turned on again. */
e22aa3d3
LP
1658
1659 if (!s->sent_notify_ready) {
1660 static const char p[] =
1661 "READY=1\n"
1662 "STATUS=Processing requests...";
1663 ssize_t l;
1664
1665 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1666 if (l < 0) {
1667 if (errno == EAGAIN)
1668 return 0;
1669
1670 return log_error_errno(errno, "Failed to send READY=1 notification message: %m");
1671 }
1672
1673 s->sent_notify_ready = true;
1674 log_debug("Sent READY=1 notification.");
1675
119e9655
LP
1676 } else if (s->send_watchdog) {
1677
1678 static const char p[] =
1679 "WATCHDOG=1";
1680
1681 ssize_t l;
1682
1683 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1684 if (l < 0) {
1685 if (errno == EAGAIN)
1686 return 0;
1687
1688 return log_error_errno(errno, "Failed to send WATCHDOG=1 notification message: %m");
1689 }
1690
1691 s->send_watchdog = false;
1692 log_debug("Sent WATCHDOG=1 notification.");
1693
e22aa3d3
LP
1694 } else if (s->stdout_streams_notify_queue)
1695 /* Dispatch one stream notification event */
1696 stdout_stream_send_notify(s->stdout_streams_notify_queue);
1697
61233823 1698 /* Leave us enabled if there's still more to do. */
119e9655 1699 if (s->send_watchdog || s->stdout_streams_notify_queue)
e22aa3d3
LP
1700 return 0;
1701
1702 /* There was nothing to do anymore, let's turn ourselves off. */
1703 r = sd_event_source_set_enabled(es, SD_EVENT_OFF);
1704 if (r < 0)
1705 return log_error_errno(r, "Failed to turn off notify event source: %m");
1706
1707 return 0;
1708}
1709
119e9655
LP
1710static int dispatch_watchdog(sd_event_source *es, uint64_t usec, void *userdata) {
1711 Server *s = userdata;
1712 int r;
1713
1714 assert(s);
1715
1716 s->send_watchdog = true;
1717
1718 r = sd_event_source_set_enabled(s->notify_event_source, SD_EVENT_ON);
1719 if (r < 0)
1720 log_warning_errno(r, "Failed to turn on notify event source: %m");
1721
1722 r = sd_event_source_set_time(s->watchdog_event_source, usec + s->watchdog_usec / 2);
1723 if (r < 0)
1724 return log_error_errno(r, "Failed to restart watchdog event source: %m");
1725
1726 r = sd_event_source_set_enabled(s->watchdog_event_source, SD_EVENT_ON);
1727 if (r < 0)
1728 return log_error_errno(r, "Failed to enable watchdog event source: %m");
1729
1730 return 0;
1731}
1732
e22aa3d3
LP
1733static int server_connect_notify(Server *s) {
1734 union sockaddr_union sa = {
1735 .un.sun_family = AF_UNIX,
1736 };
1737 const char *e;
1738 int r;
1739
1740 assert(s);
1741 assert(s->notify_fd < 0);
1742 assert(!s->notify_event_source);
1743
1744 /*
1745 So here's the problem: we'd like to send notification
1746 messages to PID 1, but we cannot do that via sd_notify(),
1747 since that's synchronous, and we might end up blocking on
1748 it. Specifically: given that PID 1 might block on
1749 dbus-daemon during IPC, and dbus-daemon is logging to us,
1750 and might hence block on us, we might end up in a deadlock
ccddd104 1751 if we block on sending PID 1 notification messages — by
e22aa3d3
LP
1752 generating a full blocking circle. To avoid this, let's
1753 create a non-blocking socket, and connect it to the
1754 notification socket, and then wait for POLLOUT before we
1755 send anything. This should efficiently avoid any deadlocks,
1756 as we'll never block on PID 1, hence PID 1 can safely block
1757 on dbus-daemon which can safely block on us again.
1758
1759 Don't think that this issue is real? It is, see:
1760 https://github.com/systemd/systemd/issues/1505
1761 */
1762
1763 e = getenv("NOTIFY_SOCKET");
1764 if (!e)
1765 return 0;
1766
1767 if ((e[0] != '@' && e[0] != '/') || e[1] == 0) {
1768 log_error("NOTIFY_SOCKET set to an invalid value: %s", e);
1769 return -EINVAL;
1770 }
1771
1772 if (strlen(e) > sizeof(sa.un.sun_path)) {
1773 log_error("NOTIFY_SOCKET path too long: %s", e);
1774 return -EINVAL;
1775 }
1776
1777 s->notify_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
1778 if (s->notify_fd < 0)
1779 return log_error_errno(errno, "Failed to create notify socket: %m");
1780
1781 (void) fd_inc_sndbuf(s->notify_fd, NOTIFY_SNDBUF_SIZE);
1782
1783 strncpy(sa.un.sun_path, e, sizeof(sa.un.sun_path));
1784 if (sa.un.sun_path[0] == '@')
1785 sa.un.sun_path[0] = 0;
1786
fc2fffe7 1787 r = connect(s->notify_fd, &sa.sa, SOCKADDR_UN_LEN(sa.un));
e22aa3d3
LP
1788 if (r < 0)
1789 return log_error_errno(errno, "Failed to connect to notify socket: %m");
1790
1791 r = sd_event_add_io(s->event, &s->notify_event_source, s->notify_fd, EPOLLOUT, dispatch_notify_event, s);
1792 if (r < 0)
1793 return log_error_errno(r, "Failed to watch notification socket: %m");
1794
119e9655
LP
1795 if (sd_watchdog_enabled(false, &s->watchdog_usec) > 0) {
1796 s->send_watchdog = true;
1797
4de2402b 1798 r = sd_event_add_time(s->event, &s->watchdog_event_source, CLOCK_MONOTONIC, now(CLOCK_MONOTONIC) + s->watchdog_usec/2, s->watchdog_usec/4, dispatch_watchdog, s);
119e9655
LP
1799 if (r < 0)
1800 return log_error_errno(r, "Failed to add watchdog time event: %m");
1801 }
1802
e22aa3d3
LP
1803 /* This should fire pretty soon, which we'll use to send the
1804 * READY=1 event. */
1805
1806 return 0;
1807}
1808
d025f1e4 1809int server_init(Server *s) {
13790add 1810 _cleanup_fdset_free_ FDSet *fds = NULL;
d025f1e4 1811 int n, r, fd;
7d18d348 1812 bool no_sockets;
d025f1e4
ZJS
1813
1814 assert(s);
1815
1816 zero(*s);
e22aa3d3 1817 s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->audit_fd = s->hostname_fd = s->notify_fd = -1;
d025f1e4
ZJS
1818 s->compress = true;
1819 s->seal = true;
1820
119e9655
LP
1821 s->watchdog_usec = USEC_INFINITY;
1822
26687bf8
OS
1823 s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1824 s->sync_scheduled = false;
1825
d025f1e4
ZJS
1826 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1827 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1828
40b71e89 1829 s->forward_to_wall = true;
d025f1e4 1830
e150e820
MB
1831 s->max_file_usec = DEFAULT_MAX_FILE_USEC;
1832
d025f1e4
ZJS
1833 s->max_level_store = LOG_DEBUG;
1834 s->max_level_syslog = LOG_DEBUG;
1835 s->max_level_kmsg = LOG_NOTICE;
1836 s->max_level_console = LOG_INFO;
40b71e89 1837 s->max_level_wall = LOG_EMERG;
d025f1e4 1838
8580d1f7
LP
1839 journal_reset_metrics(&s->system_metrics);
1840 journal_reset_metrics(&s->runtime_metrics);
d025f1e4
ZJS
1841
1842 server_parse_config_file(s);
1843 server_parse_proc_cmdline(s);
8580d1f7 1844
d288f79f 1845 if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
b1389b0d
ZJS
1846 log_debug("Setting both rate limit interval and burst from "USEC_FMT",%u to 0,0",
1847 s->rate_limit_interval, s->rate_limit_burst);
d288f79f
ZJS
1848 s->rate_limit_interval = s->rate_limit_burst = 0;
1849 }
d025f1e4 1850
8580d1f7 1851 (void) mkdir_p("/run/systemd/journal", 0755);
d025f1e4 1852
43cf8388 1853 s->user_journals = ordered_hashmap_new(NULL);
d025f1e4
ZJS
1854 if (!s->user_journals)
1855 return log_oom();
1856
1857 s->mmap = mmap_cache_new();
1858 if (!s->mmap)
1859 return log_oom();
1860
b58c888f
VC
1861 s->deferred_closes = set_new(NULL);
1862 if (!s->deferred_closes)
1863 return log_oom();
1864
f9a810be 1865 r = sd_event_default(&s->event);
23bbb0de
MS
1866 if (r < 0)
1867 return log_error_errno(r, "Failed to create event loop: %m");
d025f1e4
ZJS
1868
1869 n = sd_listen_fds(true);
23bbb0de
MS
1870 if (n < 0)
1871 return log_error_errno(n, "Failed to read listening file descriptors from environment: %m");
d025f1e4
ZJS
1872
1873 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1874
1875 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1876
1877 if (s->native_fd >= 0) {
1878 log_error("Too many native sockets passed.");
1879 return -EINVAL;
1880 }
1881
1882 s->native_fd = fd;
1883
1884 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1885
1886 if (s->stdout_fd >= 0) {
1887 log_error("Too many stdout sockets passed.");
1888 return -EINVAL;
1889 }
1890
1891 s->stdout_fd = fd;
1892
03ee5c38
LP
1893 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0 ||
1894 sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/dev-log", 0) > 0) {
d025f1e4
ZJS
1895
1896 if (s->syslog_fd >= 0) {
1897 log_error("Too many /dev/log sockets passed.");
1898 return -EINVAL;
1899 }
1900
1901 s->syslog_fd = fd;
1902
875c2e22
LP
1903 } else if (sd_is_socket(fd, AF_NETLINK, SOCK_RAW, -1) > 0) {
1904
1905 if (s->audit_fd >= 0) {
1906 log_error("Too many audit sockets passed.");
1907 return -EINVAL;
1908 }
1909
1910 s->audit_fd = fd;
1911
4ec3cd73 1912 } else {
4ec3cd73 1913
13790add
LP
1914 if (!fds) {
1915 fds = fdset_new();
1916 if (!fds)
1917 return log_oom();
1918 }
4ec3cd73 1919
13790add
LP
1920 r = fdset_put(fds, fd);
1921 if (r < 0)
1922 return log_oom();
4ec3cd73 1923 }
d025f1e4
ZJS
1924 }
1925
15d91bff
ZJS
1926 /* Try to restore streams, but don't bother if this fails */
1927 (void) server_restore_streams(s, fds);
d025f1e4 1928
13790add
LP
1929 if (fdset_size(fds) > 0) {
1930 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds));
1931 fds = fdset_free(fds);
1932 }
1933
7d18d348
ZJS
1934 no_sockets = s->native_fd < 0 && s->stdout_fd < 0 && s->syslog_fd < 0 && s->audit_fd < 0;
1935
1936 /* always open stdout, syslog, native, and kmsg sockets */
37b7affe
ZJS
1937
1938 /* systemd-journald.socket: /run/systemd/journal/stdout */
15d91bff
ZJS
1939 r = server_open_stdout_socket(s);
1940 if (r < 0)
1941 return r;
1942
37b7affe 1943 /* systemd-journald-dev-log.socket: /run/systemd/journal/dev-log */
13790add 1944 r = server_open_syslog_socket(s);
d025f1e4
ZJS
1945 if (r < 0)
1946 return r;
1947
37b7affe 1948 /* systemd-journald.socket: /run/systemd/journal/socket */
13790add 1949 r = server_open_native_socket(s);
d025f1e4
ZJS
1950 if (r < 0)
1951 return r;
1952
37b7affe 1953 /* /dev/ksmg */
d025f1e4
ZJS
1954 r = server_open_dev_kmsg(s);
1955 if (r < 0)
1956 return r;
1957
7d18d348
ZJS
1958 /* Unless we got *some* sockets and not audit, open audit socket */
1959 if (s->audit_fd >= 0 || no_sockets) {
1960 r = server_open_audit(s);
1961 if (r < 0)
1962 return r;
1963 }
875c2e22 1964
d025f1e4
ZJS
1965 r = server_open_kernel_seqnum(s);
1966 if (r < 0)
1967 return r;
1968
0c24bb23
LP
1969 r = server_open_hostname(s);
1970 if (r < 0)
1971 return r;
1972
f9a810be 1973 r = setup_signals(s);
d025f1e4
ZJS
1974 if (r < 0)
1975 return r;
1976
1977 s->udev = udev_new();
1978 if (!s->udev)
1979 return -ENOMEM;
1980
f9a810be 1981 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
d025f1e4
ZJS
1982 if (!s->rate_limit)
1983 return -ENOMEM;
1984
e9174f29
LP
1985 r = cg_get_root_path(&s->cgroup_root);
1986 if (r < 0)
1987 return r;
1988
0c24bb23
LP
1989 server_cache_hostname(s);
1990 server_cache_boot_id(s);
1991 server_cache_machine_id(s);
1992
e22aa3d3
LP
1993 (void) server_connect_notify(s);
1994
804ae586 1995 return system_journal_open(s, false);
d025f1e4
ZJS
1996}
1997
1998void server_maybe_append_tags(Server *s) {
1999#ifdef HAVE_GCRYPT
2000 JournalFile *f;
2001 Iterator i;
2002 usec_t n;
2003
2004 n = now(CLOCK_REALTIME);
2005
2006 if (s->system_journal)
2007 journal_file_maybe_append_tag(s->system_journal, n);
2008
43cf8388 2009 ORDERED_HASHMAP_FOREACH(f, s->user_journals, i)
d025f1e4
ZJS
2010 journal_file_maybe_append_tag(f, n);
2011#endif
2012}
2013
2014void server_done(Server *s) {
2015 JournalFile *f;
2016 assert(s);
2017
b58c888f
VC
2018 if (s->deferred_closes) {
2019 journal_file_close_set(s->deferred_closes);
2020 set_free(s->deferred_closes);
2021 }
2022
d025f1e4
ZJS
2023 while (s->stdout_streams)
2024 stdout_stream_free(s->stdout_streams);
2025
2026 if (s->system_journal)
69a3a6fd 2027 (void) journal_file_close(s->system_journal);
d025f1e4
ZJS
2028
2029 if (s->runtime_journal)
69a3a6fd 2030 (void) journal_file_close(s->runtime_journal);
d025f1e4 2031
43cf8388 2032 while ((f = ordered_hashmap_steal_first(s->user_journals)))
69a3a6fd 2033 (void) journal_file_close(f);
d025f1e4 2034
43cf8388 2035 ordered_hashmap_free(s->user_journals);
d025f1e4 2036
f9a810be
LP
2037 sd_event_source_unref(s->syslog_event_source);
2038 sd_event_source_unref(s->native_event_source);
2039 sd_event_source_unref(s->stdout_event_source);
2040 sd_event_source_unref(s->dev_kmsg_event_source);
875c2e22 2041 sd_event_source_unref(s->audit_event_source);
f9a810be
LP
2042 sd_event_source_unref(s->sync_event_source);
2043 sd_event_source_unref(s->sigusr1_event_source);
2044 sd_event_source_unref(s->sigusr2_event_source);
2045 sd_event_source_unref(s->sigterm_event_source);
2046 sd_event_source_unref(s->sigint_event_source);
94b65516 2047 sd_event_source_unref(s->sigrtmin1_event_source);
0c24bb23 2048 sd_event_source_unref(s->hostname_event_source);
e22aa3d3 2049 sd_event_source_unref(s->notify_event_source);
119e9655 2050 sd_event_source_unref(s->watchdog_event_source);
f9a810be 2051 sd_event_unref(s->event);
d025f1e4 2052
03e334a1
LP
2053 safe_close(s->syslog_fd);
2054 safe_close(s->native_fd);
2055 safe_close(s->stdout_fd);
2056 safe_close(s->dev_kmsg_fd);
875c2e22 2057 safe_close(s->audit_fd);
03e334a1 2058 safe_close(s->hostname_fd);
e22aa3d3 2059 safe_close(s->notify_fd);
0c24bb23 2060
d025f1e4
ZJS
2061 if (s->rate_limit)
2062 journal_rate_limit_free(s->rate_limit);
2063
2064 if (s->kernel_seqnum)
2065 munmap(s->kernel_seqnum, sizeof(uint64_t));
2066
2067 free(s->buffer);
2068 free(s->tty_path);
e9174f29 2069 free(s->cgroup_root);
99d0966e 2070 free(s->hostname_field);
d025f1e4
ZJS
2071
2072 if (s->mmap)
2073 mmap_cache_unref(s->mmap);
2074
3e044c49 2075 udev_unref(s->udev);
d025f1e4 2076}
8580d1f7
LP
2077
2078static const char* const storage_table[_STORAGE_MAX] = {
2079 [STORAGE_AUTO] = "auto",
2080 [STORAGE_VOLATILE] = "volatile",
2081 [STORAGE_PERSISTENT] = "persistent",
2082 [STORAGE_NONE] = "none"
2083};
2084
2085DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
2086DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
2087
2088static const char* const split_mode_table[_SPLIT_MAX] = {
2089 [SPLIT_LOGIN] = "login",
2090 [SPLIT_UID] = "uid",
2091 [SPLIT_NONE] = "none",
2092};
2093
2094DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
2095DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");