]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/journal/journald-server.c
journald: use the event loop dispatch timestamp for journal entries
[thirdparty/systemd.git] / src / journal / journald-server.c
CommitLineData
d025f1e4
ZJS
1/***
2 This file is part of systemd.
3
4 Copyright 2011 Lennart Poettering
5
6 systemd is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
10
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public License
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
18***/
19
24882e06
LP
20#ifdef HAVE_SELINUX
21#include <selinux/selinux.h>
22#endif
8580d1f7
LP
23#include <sys/ioctl.h>
24#include <sys/mman.h>
25#include <sys/signalfd.h>
26#include <sys/statvfs.h>
07630cea 27#include <linux/sockios.h>
24882e06 28
b4bbcaa9 29#include "libudev.h"
8580d1f7 30#include "sd-daemon.h"
74df0fca
LP
31#include "sd-journal.h"
32#include "sd-messages.h"
8580d1f7
LP
33
34#include "acl-util.h"
b5efdb8a 35#include "alloc-util.h"
430f0182 36#include "audit-util.h"
d025f1e4 37#include "cgroup-util.h"
d025f1e4 38#include "conf-parser.h"
a0956174 39#include "dirent-util.h"
0dec689b 40#include "extract-word.h"
3ffd4af2 41#include "fd-util.h"
33d52ab9 42#include "fileio.h"
958b66ea 43#include "formats-util.h"
f4f15635 44#include "fs-util.h"
8580d1f7 45#include "hashmap.h"
958b66ea 46#include "hostname-util.h"
4b58153d 47#include "id128-util.h"
afc5dbf3 48#include "io-util.h"
8580d1f7
LP
49#include "journal-authenticate.h"
50#include "journal-file.h"
d025f1e4
ZJS
51#include "journal-internal.h"
52#include "journal-vacuum.h"
8580d1f7 53#include "journald-audit.h"
d025f1e4 54#include "journald-kmsg.h"
d025f1e4 55#include "journald-native.h"
8580d1f7 56#include "journald-rate-limit.h"
3ffd4af2 57#include "journald-server.h"
8580d1f7
LP
58#include "journald-stream.h"
59#include "journald-syslog.h"
4b58153d 60#include "log.h"
07630cea
LP
61#include "missing.h"
62#include "mkdir.h"
6bedfcbb 63#include "parse-util.h"
4e731273 64#include "proc-cmdline.h"
07630cea
LP
65#include "process-util.h"
66#include "rm-rf.h"
67#include "selinux-util.h"
68#include "signal-util.h"
69#include "socket-util.h"
32917e33 70#include "stdio-util.h"
8b43440b 71#include "string-table.h"
07630cea 72#include "string-util.h"
4a0b58c4 73#include "user-util.h"
d025f1e4 74
d025f1e4
ZJS
75#define USER_JOURNALS_MAX 1024
76
26687bf8 77#define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
7f1ad696
LP
78#define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
79#define DEFAULT_RATE_LIMIT_BURST 1000
e150e820 80#define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
d025f1e4 81
8580d1f7 82#define RECHECK_SPACE_USEC (30*USEC_PER_SEC)
d025f1e4 83
e22aa3d3
LP
84#define NOTIFY_SNDBUF_SIZE (8*1024*1024)
85
7a24f3bf
VC
86/* The period to insert between posting changes for coalescing */
87#define POST_CHANGE_TIMER_INTERVAL_USEC (250*USEC_PER_MSEC)
88
8580d1f7
LP
89static int determine_space_for(
90 Server *s,
91 JournalMetrics *metrics,
92 const char *path,
93 const char *name,
94 bool verbose,
95 bool patch_min_use,
96 uint64_t *available,
97 uint64_t *limit) {
98
99 uint64_t sum = 0, ss_avail, avail;
7fd1b19b 100 _cleanup_closedir_ DIR *d = NULL;
8580d1f7
LP
101 struct dirent *de;
102 struct statvfs ss;
103 const char *p;
d025f1e4 104 usec_t ts;
d025f1e4 105
8580d1f7
LP
106 assert(s);
107 assert(metrics);
108 assert(path);
109 assert(name);
d025f1e4 110
8580d1f7 111 ts = now(CLOCK_MONOTONIC);
d025f1e4 112
8580d1f7 113 if (!verbose && s->cached_space_timestamp + RECHECK_SPACE_USEC > ts) {
d025f1e4 114
8580d1f7
LP
115 if (available)
116 *available = s->cached_space_available;
117 if (limit)
118 *limit = s->cached_space_limit;
d025f1e4 119
d025f1e4 120 return 0;
8580d1f7 121 }
d025f1e4 122
8580d1f7 123 p = strjoina(path, SERVER_MACHINE_ID(s));
d025f1e4 124 d = opendir(p);
d025f1e4 125 if (!d)
8580d1f7 126 return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR, errno, "Failed to open %s: %m", p);
d025f1e4
ZJS
127
128 if (fstatvfs(dirfd(d), &ss) < 0)
8580d1f7 129 return log_error_errno(errno, "Failed to fstatvfs(%s): %m", p);
d025f1e4 130
8580d1f7 131 FOREACH_DIRENT_ALL(de, d, break) {
d025f1e4 132 struct stat st;
d025f1e4
ZJS
133
134 if (!endswith(de->d_name, ".journal") &&
135 !endswith(de->d_name, ".journal~"))
136 continue;
137
8580d1f7
LP
138 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) {
139 log_debug_errno(errno, "Failed to stat %s/%s, ignoring: %m", p, de->d_name);
d025f1e4 140 continue;
8580d1f7 141 }
d025f1e4
ZJS
142
143 if (!S_ISREG(st.st_mode))
144 continue;
145
146 sum += (uint64_t) st.st_blocks * 512UL;
147 }
148
8a03c9ef 149 /* If requested, then let's bump the min_use limit to the
8580d1f7
LP
150 * current usage on disk. We do this when starting up and
151 * first opening the journal files. This way sudden spikes in
152 * disk usage will not cause journald to vacuum files without
153 * bounds. Note that this means that only a restart of
154 * journald will make it reset this value. */
d025f1e4 155
8580d1f7
LP
156 if (patch_min_use)
157 metrics->min_use = MAX(metrics->min_use, sum);
348ced90 158
8580d1f7
LP
159 ss_avail = ss.f_bsize * ss.f_bavail;
160 avail = LESS_BY(ss_avail, metrics->keep_free);
348ced90 161
8580d1f7
LP
162 s->cached_space_limit = MIN(MAX(sum + avail, metrics->min_use), metrics->max_use);
163 s->cached_space_available = LESS_BY(s->cached_space_limit, sum);
164 s->cached_space_timestamp = ts;
d025f1e4 165
670b110c
ZJS
166 if (verbose) {
167 char fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
8580d1f7 168 fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX], fb6[FORMAT_BYTES_MAX];
282c5c4e
ZJS
169 format_bytes(fb1, sizeof(fb1), sum);
170 format_bytes(fb2, sizeof(fb2), metrics->max_use);
171 format_bytes(fb3, sizeof(fb3), metrics->keep_free);
172 format_bytes(fb4, sizeof(fb4), ss_avail);
173 format_bytes(fb5, sizeof(fb5), s->cached_space_limit);
174 format_bytes(fb6, sizeof(fb6), s->cached_space_available);
670b110c
ZJS
175
176 server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
282c5c4e
ZJS
177 LOG_MESSAGE("%s (%s) is %s, max %s, %s free.",
178 name, path, fb1, fb5, fb6),
179 "JOURNAL_NAME=%s", name,
180 "JOURNAL_PATH=%s", path,
181 "CURRENT_USE=%"PRIu64, sum,
182 "CURRENT_USE_PRETTY=%s", fb1,
183 "MAX_USE=%"PRIu64, metrics->max_use,
184 "MAX_USE_PRETTY=%s", fb2,
185 "DISK_KEEP_FREE=%"PRIu64, metrics->keep_free,
186 "DISK_KEEP_FREE_PRETTY=%s", fb3,
187 "DISK_AVAILABLE=%"PRIu64, ss_avail,
188 "DISK_AVAILABLE_PRETTY=%s", fb4,
189 "LIMIT=%"PRIu64, s->cached_space_limit,
190 "LIMIT_PRETTY=%s", fb5,
191 "AVAILABLE=%"PRIu64, s->cached_space_available,
192 "AVAILABLE_PRETTY=%s", fb6,
8a03c9ef 193 NULL);
8580d1f7
LP
194 }
195
196 if (available)
197 *available = s->cached_space_available;
198 if (limit)
199 *limit = s->cached_space_limit;
200
201 return 1;
202}
203
204static int determine_space(Server *s, bool verbose, bool patch_min_use, uint64_t *available, uint64_t *limit) {
205 JournalMetrics *metrics;
206 const char *path, *name;
207
208 assert(s);
209
210 if (s->system_journal) {
211 path = "/var/log/journal/";
212 metrics = &s->system_metrics;
213 name = "System journal";
214 } else {
215 path = "/run/log/journal/";
216 metrics = &s->runtime_metrics;
217 name = "Runtime journal";
670b110c
ZJS
218 }
219
8580d1f7 220 return determine_space_for(s, metrics, path, name, verbose, patch_min_use, available, limit);
d025f1e4
ZJS
221}
222
5c3bde3f 223static void server_add_acls(JournalFile *f, uid_t uid) {
d025f1e4 224#ifdef HAVE_ACL
5c3bde3f 225 int r;
d025f1e4 226#endif
d025f1e4
ZJS
227 assert(f);
228
d025f1e4 229#ifdef HAVE_ACL
34c10968 230 if (uid <= SYSTEM_UID_MAX)
d025f1e4
ZJS
231 return;
232
5c3bde3f
ZJS
233 r = add_acls_for_user(f->fd, uid);
234 if (r < 0)
235 log_warning_errno(r, "Failed to set ACL on %s, ignoring: %m", f->path);
d025f1e4
ZJS
236#endif
237}
238
7a24f3bf
VC
239static int open_journal(
240 Server *s,
241 bool reliably,
242 const char *fname,
243 int flags,
244 bool seal,
245 JournalMetrics *metrics,
7a24f3bf
VC
246 JournalFile **ret) {
247 int r;
e167d7fd 248 JournalFile *f;
7a24f3bf
VC
249
250 assert(s);
251 assert(fname);
252 assert(ret);
253
254 if (reliably)
b58c888f 255 r = journal_file_open_reliably(fname, flags, 0640, s->compress, seal, metrics, s->mmap, s->deferred_closes, NULL, &f);
7a24f3bf 256 else
5d1ce257 257 r = journal_file_open(-1, fname, flags, 0640, s->compress, seal, metrics, s->mmap, s->deferred_closes, NULL, &f);
7a24f3bf
VC
258 if (r < 0)
259 return r;
260
e167d7fd 261 r = journal_file_enable_post_change_timer(f, s->event, POST_CHANGE_TIMER_INTERVAL_USEC);
7a24f3bf 262 if (r < 0) {
69a3a6fd 263 (void) journal_file_close(f);
7a24f3bf
VC
264 return r;
265 }
266
e167d7fd 267 *ret = f;
7a24f3bf
VC
268 return r;
269}
270
6431c7e2
VC
271static bool flushed_flag_is_set(void) {
272 return (access("/run/systemd/journal/flushed", F_OK) >= 0);
273}
274
105bdb46 275static int system_journal_open(Server *s, bool flush_requested) {
929eeb54 276 bool flushed = false;
105bdb46
VC
277 const char *fn;
278 int r = 0;
279
280 if (!s->system_journal &&
281 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
6431c7e2 282 (flush_requested || (flushed = flushed_flag_is_set()))) {
105bdb46
VC
283
284 /* If in auto mode: first try to create the machine
285 * path, but not the prefix.
286 *
287 * If in persistent mode: create /var/log/journal and
288 * the machine path */
289
290 if (s->storage == STORAGE_PERSISTENT)
291 (void) mkdir_p("/var/log/journal/", 0755);
292
293 fn = strjoina("/var/log/journal/", SERVER_MACHINE_ID(s));
294 (void) mkdir(fn, 0755);
295
296 fn = strjoina(fn, "/system.journal");
297 r = open_journal(s, true, fn, O_RDWR|O_CREAT, s->seal, &s->system_metrics, &s->system_journal);
298 if (r >= 0) {
299 server_add_acls(s->system_journal, 0);
300 (void) determine_space_for(s, &s->system_metrics, "/var/log/journal/", "System journal", true, true, NULL, NULL);
301 } else if (r < 0) {
302 if (r != -ENOENT && r != -EROFS)
303 log_warning_errno(r, "Failed to open system journal: %m");
304
305 r = 0;
306 }
929eeb54
VC
307
308 /* If the runtime journal is open, and we're post-flush, we're
309 * recovering from a failed system journal rotate (ENOSPC)
310 * for which the runtime journal was reopened.
311 *
312 * Perform an implicit flush to var, leaving the runtime
313 * journal closed, now that the system journal is back.
314 */
315 if (s->runtime_journal && flushed)
316 (void) server_flush_to_var(s);
105bdb46
VC
317 }
318
319 if (!s->runtime_journal &&
320 (s->storage != STORAGE_NONE)) {
321
322 fn = strjoina("/run/log/journal/", SERVER_MACHINE_ID(s), "/system.journal");
323
324 if (s->system_journal) {
325
326 /* Try to open the runtime journal, but only
327 * if it already exists, so that we can flush
328 * it into the system journal */
329
330 r = open_journal(s, false, fn, O_RDWR, false, &s->runtime_metrics, &s->runtime_journal);
331 if (r < 0) {
332 if (r != -ENOENT)
333 log_warning_errno(r, "Failed to open runtime journal: %m");
334
335 r = 0;
336 }
337
338 } else {
339
340 /* OK, we really need the runtime journal, so create
341 * it if necessary. */
342
343 (void) mkdir("/run/log", 0755);
344 (void) mkdir("/run/log/journal", 0755);
345 (void) mkdir_parents(fn, 0750);
346
347 r = open_journal(s, true, fn, O_RDWR|O_CREAT, false, &s->runtime_metrics, &s->runtime_journal);
348 if (r < 0)
349 return log_error_errno(r, "Failed to open runtime journal: %m");
350 }
351
352 if (s->runtime_journal) {
353 server_add_acls(s->runtime_journal, 0);
354 (void) determine_space_for(s, &s->runtime_metrics, "/run/log/journal/", "Runtime journal", true, true, NULL, NULL);
355 }
356 }
357
358 return r;
359}
360
d025f1e4 361static JournalFile* find_journal(Server *s, uid_t uid) {
ed375beb 362 _cleanup_free_ char *p = NULL;
d025f1e4
ZJS
363 int r;
364 JournalFile *f;
365 sd_id128_t machine;
366
367 assert(s);
368
105bdb46
VC
369 /* A rotate that fails to create the new journal (ENOSPC) leaves the
370 * rotated journal as NULL. Unless we revisit opening, even after
371 * space is made available we'll continue to return NULL indefinitely.
372 *
373 * system_journal_open() is a noop if the journals are already open, so
374 * we can just call it here to recover from failed rotates (or anything
375 * else that's left the journals as NULL).
376 *
377 * Fixes https://github.com/systemd/systemd/issues/3968 */
378 (void) system_journal_open(s, false);
379
d025f1e4
ZJS
380 /* We split up user logs only on /var, not on /run. If the
381 * runtime file is open, we write to it exclusively, in order
382 * to guarantee proper order as soon as we flush /run to
383 * /var and close the runtime file. */
384
385 if (s->runtime_journal)
386 return s->runtime_journal;
387
61755fda 388 if (uid <= SYSTEM_UID_MAX || uid_is_dynamic(uid))
d025f1e4
ZJS
389 return s->system_journal;
390
391 r = sd_id128_get_machine(&machine);
392 if (r < 0)
393 return s->system_journal;
394
4a0b58c4 395 f = ordered_hashmap_get(s->user_journals, UID_TO_PTR(uid));
d025f1e4
ZJS
396 if (f)
397 return f;
398
de0671ee
ZJS
399 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-"UID_FMT".journal",
400 SD_ID128_FORMAT_VAL(machine), uid) < 0)
d025f1e4
ZJS
401 return s->system_journal;
402
43cf8388 403 while (ordered_hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
d025f1e4 404 /* Too many open? Then let's close one */
43cf8388 405 f = ordered_hashmap_steal_first(s->user_journals);
d025f1e4 406 assert(f);
69a3a6fd 407 (void) journal_file_close(f);
d025f1e4
ZJS
408 }
409
089ed40b 410 r = open_journal(s, true, p, O_RDWR|O_CREAT, s->seal, &s->system_metrics, &f);
d025f1e4
ZJS
411 if (r < 0)
412 return s->system_journal;
413
5c3bde3f 414 server_add_acls(f, uid);
d025f1e4 415
4a0b58c4 416 r = ordered_hashmap_put(s->user_journals, UID_TO_PTR(uid), f);
d025f1e4 417 if (r < 0) {
69a3a6fd 418 (void) journal_file_close(f);
d025f1e4
ZJS
419 return s->system_journal;
420 }
421
422 return f;
423}
424
ea69bd41
LP
425static int do_rotate(
426 Server *s,
427 JournalFile **f,
428 const char* name,
429 bool seal,
430 uint32_t uid) {
431
fc55baee
ZJS
432 int r;
433 assert(s);
434
435 if (!*f)
436 return -EINVAL;
437
b58c888f 438 r = journal_file_rotate(f, s->compress, seal, s->deferred_closes);
fc55baee
ZJS
439 if (r < 0)
440 if (*f)
ea69bd41 441 log_error_errno(r, "Failed to rotate %s: %m", (*f)->path);
fc55baee 442 else
ea69bd41 443 log_error_errno(r, "Failed to create new %s journal: %m", name);
fc55baee 444 else
5c3bde3f 445 server_add_acls(*f, uid);
2678031a 446
fc55baee
ZJS
447 return r;
448}
449
d025f1e4
ZJS
450void server_rotate(Server *s) {
451 JournalFile *f;
452 void *k;
453 Iterator i;
454 int r;
455
456 log_debug("Rotating...");
457
8580d1f7
LP
458 (void) do_rotate(s, &s->runtime_journal, "runtime", false, 0);
459 (void) do_rotate(s, &s->system_journal, "system", s->seal, 0);
d025f1e4 460
43cf8388 461 ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
4a0b58c4 462 r = do_rotate(s, &f, "user", s->seal, PTR_TO_UID(k));
fc55baee 463 if (r >= 0)
43cf8388 464 ordered_hashmap_replace(s->user_journals, k, f);
fc55baee
ZJS
465 else if (!f)
466 /* Old file has been closed and deallocated */
43cf8388 467 ordered_hashmap_remove(s->user_journals, k);
d025f1e4 468 }
b58c888f
VC
469
470 /* Perform any deferred closes which aren't still offlining. */
471 SET_FOREACH(f, s->deferred_closes, i)
472 if (!journal_file_is_offlining(f)) {
473 (void) set_remove(s->deferred_closes, f);
474 (void) journal_file_close(f);
475 }
d025f1e4
ZJS
476}
477
26687bf8
OS
478void server_sync(Server *s) {
479 JournalFile *f;
26687bf8
OS
480 Iterator i;
481 int r;
482
26687bf8 483 if (s->system_journal) {
ac2e41f5 484 r = journal_file_set_offline(s->system_journal, false);
26687bf8 485 if (r < 0)
65089b82 486 log_warning_errno(r, "Failed to sync system journal, ignoring: %m");
26687bf8
OS
487 }
488
65c1d46b 489 ORDERED_HASHMAP_FOREACH(f, s->user_journals, i) {
ac2e41f5 490 r = journal_file_set_offline(f, false);
26687bf8 491 if (r < 0)
65089b82 492 log_warning_errno(r, "Failed to sync user journal, ignoring: %m");
26687bf8
OS
493 }
494
f9a810be
LP
495 if (s->sync_event_source) {
496 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
497 if (r < 0)
da927ba9 498 log_error_errno(r, "Failed to disable sync timer source: %m");
f9a810be 499 }
26687bf8
OS
500
501 s->sync_scheduled = false;
502}
503
ea69bd41
LP
504static void do_vacuum(
505 Server *s,
ea69bd41 506 JournalFile *f,
8580d1f7
LP
507 JournalMetrics *metrics,
508 const char *path,
509 const char *name,
510 bool verbose,
511 bool patch_min_use) {
ea69bd41
LP
512
513 const char *p;
8580d1f7 514 uint64_t limit;
63c8666b
ZJS
515 int r;
516
8580d1f7
LP
517 assert(s);
518 assert(metrics);
519 assert(path);
520 assert(name);
521
63c8666b
ZJS
522 if (!f)
523 return;
524
8580d1f7
LP
525 p = strjoina(path, SERVER_MACHINE_ID(s));
526
527 limit = metrics->max_use;
528 (void) determine_space_for(s, metrics, path, name, verbose, patch_min_use, NULL, &limit);
529
530 r = journal_directory_vacuum(p, limit, metrics->n_max_files, s->max_retention_usec, &s->oldest_file_usec, verbose);
63c8666b 531 if (r < 0 && r != -ENOENT)
8580d1f7 532 log_warning_errno(r, "Failed to vacuum %s, ignoring: %m", p);
63c8666b
ZJS
533}
534
8580d1f7
LP
535int server_vacuum(Server *s, bool verbose, bool patch_min_use) {
536 assert(s);
d025f1e4
ZJS
537
538 log_debug("Vacuuming...");
539
540 s->oldest_file_usec = 0;
541
8580d1f7
LP
542 do_vacuum(s, s->system_journal, &s->system_metrics, "/var/log/journal/", "System journal", verbose, patch_min_use);
543 do_vacuum(s, s->runtime_journal, &s->runtime_metrics, "/run/log/journal/", "Runtime journal", verbose, patch_min_use);
d025f1e4 544
8580d1f7
LP
545 s->cached_space_limit = 0;
546 s->cached_space_available = 0;
547 s->cached_space_timestamp = 0;
d025f1e4 548
8580d1f7 549 return 0;
d025f1e4
ZJS
550}
551
0c24bb23
LP
552static void server_cache_machine_id(Server *s) {
553 sd_id128_t id;
554 int r;
555
556 assert(s);
557
558 r = sd_id128_get_machine(&id);
559 if (r < 0)
560 return;
561
562 sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
563}
564
565static void server_cache_boot_id(Server *s) {
566 sd_id128_t id;
567 int r;
568
569 assert(s);
570
571 r = sd_id128_get_boot(&id);
572 if (r < 0)
573 return;
574
575 sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
576}
577
578static void server_cache_hostname(Server *s) {
579 _cleanup_free_ char *t = NULL;
580 char *x;
581
582 assert(s);
583
584 t = gethostname_malloc();
585 if (!t)
586 return;
587
588 x = strappend("_HOSTNAME=", t);
589 if (!x)
590 return;
591
592 free(s->hostname_field);
593 s->hostname_field = x;
594}
595
8531ae70 596static bool shall_try_append_again(JournalFile *f, int r) {
6e1045e5
ZJS
597 switch(r) {
598 case -E2BIG: /* Hit configured limit */
599 case -EFBIG: /* Hit fs limit */
600 case -EDQUOT: /* Quota limit hit */
601 case -ENOSPC: /* Disk full */
d025f1e4 602 log_debug("%s: Allocation limit reached, rotating.", f->path);
6e1045e5
ZJS
603 return true;
604 case -EIO: /* I/O error of some kind (mmap) */
605 log_warning("%s: IO error, rotating.", f->path);
606 return true;
607 case -EHOSTDOWN: /* Other machine */
d025f1e4 608 log_info("%s: Journal file from other machine, rotating.", f->path);
6e1045e5
ZJS
609 return true;
610 case -EBUSY: /* Unclean shutdown */
d025f1e4 611 log_info("%s: Unclean shutdown, rotating.", f->path);
6e1045e5
ZJS
612 return true;
613 case -EPROTONOSUPPORT: /* Unsupported feature */
d025f1e4 614 log_info("%s: Unsupported feature, rotating.", f->path);
6e1045e5
ZJS
615 return true;
616 case -EBADMSG: /* Corrupted */
617 case -ENODATA: /* Truncated */
618 case -ESHUTDOWN: /* Already archived */
d025f1e4 619 log_warning("%s: Journal file corrupted, rotating.", f->path);
6e1045e5
ZJS
620 return true;
621 case -EIDRM: /* Journal file has been deleted */
2678031a 622 log_warning("%s: Journal file has been deleted, rotating.", f->path);
6e1045e5
ZJS
623 return true;
624 default:
d025f1e4 625 return false;
6e1045e5 626 }
d025f1e4
ZJS
627}
628
d07f7b9e 629static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
0f972d66 630 struct dual_timestamp ts;
d025f1e4 631 bool vacuumed = false;
0f972d66 632 JournalFile *f;
d025f1e4
ZJS
633 int r;
634
635 assert(s);
636 assert(iovec);
637 assert(n > 0);
638
0f972d66
LP
639 /* Get the closest, linearized time we have for this log event from the event loop. (Note that we do not use
640 * the source time, and not even the time the event was originally seen, but instead simply the time we started
641 * processing it, as we want strictly linear ordering in what we write out.) */
642 assert_se(sd_event_now(s->event, CLOCK_REALTIME, &ts.realtime) >= 0);
643 assert_se(sd_event_now(s->event, CLOCK_MONOTONIC, &ts.monotonic) >= 0);
644
d025f1e4
ZJS
645 f = find_journal(s, uid);
646 if (!f)
647 return;
648
649 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
650 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
651 server_rotate(s);
8580d1f7 652 server_vacuum(s, false, false);
d025f1e4
ZJS
653 vacuumed = true;
654
655 f = find_journal(s, uid);
656 if (!f)
657 return;
658 }
659
0f972d66 660 r = journal_file_append_entry(f, &ts, iovec, n, &s->seqnum, NULL, NULL);
26687bf8 661 if (r >= 0) {
d07f7b9e 662 server_schedule_sync(s, priority);
d025f1e4 663 return;
26687bf8 664 }
d025f1e4
ZJS
665
666 if (vacuumed || !shall_try_append_again(f, r)) {
8266e1c0 667 log_error_errno(r, "Failed to write entry (%d items, %zu bytes), ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
d025f1e4
ZJS
668 return;
669 }
670
671 server_rotate(s);
8580d1f7 672 server_vacuum(s, false, false);
d025f1e4
ZJS
673
674 f = find_journal(s, uid);
675 if (!f)
676 return;
677
678 log_debug("Retrying write.");
0f972d66 679 r = journal_file_append_entry(f, &ts, iovec, n, &s->seqnum, NULL, NULL);
8266e1c0
LP
680 if (r < 0)
681 log_error_errno(r, "Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
682 else
d07f7b9e 683 server_schedule_sync(s, priority);
d025f1e4
ZJS
684}
685
4b58153d
LP
686static int get_invocation_id(const char *cgroup_root, const char *slice, const char *unit, char **ret) {
687 _cleanup_free_ char *escaped = NULL, *slice_path = NULL, *p = NULL;
688 char *copy, ids[SD_ID128_STRING_MAX];
689 int r;
690
691 /* Read the invocation ID of a unit off a unit. It's stored in the "trusted.invocation_id" extended attribute
692 * on the cgroup path. */
693
694 r = cg_slice_to_path(slice, &slice_path);
695 if (r < 0)
696 return r;
697
698 escaped = cg_escape(unit);
699 if (!escaped)
700 return -ENOMEM;
701
702 p = strjoin(cgroup_root, "/", slice_path, "/", escaped, NULL);
703 if (!p)
704 return -ENOMEM;
705
706 r = cg_get_xattr(SYSTEMD_CGROUP_CONTROLLER, p, "trusted.invocation_id", ids, 32);
707 if (r < 0)
708 return r;
709 if (r != 32)
710 return -EINVAL;
711 ids[32] = 0;
712
713 if (!id128_is_valid(ids))
714 return -EINVAL;
715
716 copy = strdup(ids);
717 if (!copy)
718 return -ENOMEM;
719
720 *ret = copy;
721 return 0;
722}
723
d025f1e4
ZJS
724static void dispatch_message_real(
725 Server *s,
726 struct iovec *iovec, unsigned n, unsigned m,
3b3154df
LP
727 const struct ucred *ucred,
728 const struct timeval *tv,
d025f1e4 729 const char *label, size_t label_len,
968f3196 730 const char *unit_id,
d07f7b9e 731 int priority,
968f3196 732 pid_t object_pid) {
d025f1e4 733
968f3196 734 char pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
ae018d9b
LP
735 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
736 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
737 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
d3789917 738 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
968f3196
ZJS
739 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
740 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
741 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
742 uid_t object_uid;
743 gid_t object_gid;
968f3196 744 char *x;
d025f1e4 745 int r;
ae018d9b 746 char *t, *c;
82499507
LP
747 uid_t realuid = 0, owner = 0, journal_uid;
748 bool owner_valid = false;
ae018d9b 749#ifdef HAVE_AUDIT
968f3196
ZJS
750 char audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
751 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
752 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
753 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
ae018d9b
LP
754
755 uint32_t audit;
756 uid_t loginuid;
757#endif
d025f1e4
ZJS
758
759 assert(s);
760 assert(iovec);
761 assert(n > 0);
d473176a 762 assert(n + N_IOVEC_META_FIELDS + (object_pid > 0 ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
d025f1e4
ZJS
763
764 if (ucred) {
d025f1e4
ZJS
765 realuid = ucred->uid;
766
de0671ee 767 sprintf(pid, "_PID="PID_FMT, ucred->pid);
c2457105 768 IOVEC_SET_STRING(iovec[n++], pid);
d025f1e4 769
de0671ee 770 sprintf(uid, "_UID="UID_FMT, ucred->uid);
c2457105 771 IOVEC_SET_STRING(iovec[n++], uid);
d025f1e4 772
de0671ee 773 sprintf(gid, "_GID="GID_FMT, ucred->gid);
c2457105 774 IOVEC_SET_STRING(iovec[n++], gid);
d025f1e4
ZJS
775
776 r = get_process_comm(ucred->pid, &t);
777 if (r >= 0) {
63c372cb 778 x = strjoina("_COMM=", t);
d025f1e4 779 free(t);
968f3196 780 IOVEC_SET_STRING(iovec[n++], x);
d025f1e4
ZJS
781 }
782
783 r = get_process_exe(ucred->pid, &t);
784 if (r >= 0) {
63c372cb 785 x = strjoina("_EXE=", t);
d025f1e4 786 free(t);
968f3196 787 IOVEC_SET_STRING(iovec[n++], x);
d025f1e4
ZJS
788 }
789
9bdbc2e2 790 r = get_process_cmdline(ucred->pid, 0, false, &t);
d025f1e4 791 if (r >= 0) {
63c372cb 792 x = strjoina("_CMDLINE=", t);
d025f1e4 793 free(t);
3a832116
SL
794 IOVEC_SET_STRING(iovec[n++], x);
795 }
796
797 r = get_process_capeff(ucred->pid, &t);
798 if (r >= 0) {
63c372cb 799 x = strjoina("_CAP_EFFECTIVE=", t);
3a832116 800 free(t);
968f3196 801 IOVEC_SET_STRING(iovec[n++], x);
d025f1e4
ZJS
802 }
803
0a20e3c1 804#ifdef HAVE_AUDIT
d025f1e4 805 r = audit_session_from_pid(ucred->pid, &audit);
ae018d9b 806 if (r >= 0) {
de0671ee 807 sprintf(audit_session, "_AUDIT_SESSION=%"PRIu32, audit);
ae018d9b
LP
808 IOVEC_SET_STRING(iovec[n++], audit_session);
809 }
d025f1e4
ZJS
810
811 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
7027ff61 812 if (r >= 0) {
de0671ee 813 sprintf(audit_loginuid, "_AUDIT_LOGINUID="UID_FMT, loginuid);
ae018d9b 814 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
d025f1e4 815 }
ae018d9b 816#endif
d025f1e4 817
e9174f29 818 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &c);
7027ff61 819 if (r >= 0) {
4b58153d 820 _cleanup_free_ char *raw_unit = NULL, *raw_slice = NULL;
968f3196
ZJS
821 char *session = NULL;
822
63c372cb 823 x = strjoina("_SYSTEMD_CGROUP=", c);
968f3196 824 IOVEC_SET_STRING(iovec[n++], x);
d025f1e4 825
ae018d9b
LP
826 r = cg_path_get_session(c, &t);
827 if (r >= 0) {
63c372cb 828 session = strjoina("_SYSTEMD_SESSION=", t);
ae018d9b 829 free(t);
d025f1e4 830 IOVEC_SET_STRING(iovec[n++], session);
ae018d9b
LP
831 }
832
833 if (cg_path_get_owner_uid(c, &owner) >= 0) {
834 owner_valid = true;
d025f1e4 835
de0671ee 836 sprintf(owner_uid, "_SYSTEMD_OWNER_UID="UID_FMT, owner);
d025f1e4 837 IOVEC_SET_STRING(iovec[n++], owner_uid);
ae018d9b 838 }
d025f1e4 839
4b58153d
LP
840 if (cg_path_get_unit(c, &raw_unit) >= 0) {
841 x = strjoina("_SYSTEMD_UNIT=", raw_unit);
19cace37
LP
842 IOVEC_SET_STRING(iovec[n++], x);
843 } else if (unit_id && !session) {
63c372cb 844 x = strjoina("_SYSTEMD_UNIT=", unit_id);
19cace37
LP
845 IOVEC_SET_STRING(iovec[n++], x);
846 }
847
848 if (cg_path_get_user_unit(c, &t) >= 0) {
63c372cb 849 x = strjoina("_SYSTEMD_USER_UNIT=", t);
ae018d9b 850 free(t);
968f3196 851 IOVEC_SET_STRING(iovec[n++], x);
19cace37 852 } else if (unit_id && session) {
63c372cb 853 x = strjoina("_SYSTEMD_USER_UNIT=", unit_id);
19cace37
LP
854 IOVEC_SET_STRING(iovec[n++], x);
855 }
ae018d9b 856
4b58153d
LP
857 if (cg_path_get_slice(c, &raw_slice) >= 0) {
858 x = strjoina("_SYSTEMD_SLICE=", raw_slice);
0a244b8e
LP
859 IOVEC_SET_STRING(iovec[n++], x);
860 }
861
d473176a
LP
862 if (cg_path_get_user_slice(c, &t) >= 0) {
863 x = strjoina("_SYSTEMD_USER_SLICE=", t);
864 free(t);
865 IOVEC_SET_STRING(iovec[n++], x);
866 }
867
4b58153d
LP
868 if (raw_slice && raw_unit) {
869 if (get_invocation_id(s->cgroup_root, raw_slice, raw_unit, &t) >= 0) {
870 x = strjoina("_SYSTEMD_INVOCATION_ID=", t);
871 free(t);
872 IOVEC_SET_STRING(iovec[n++], x);
873 }
874 }
875
ae018d9b 876 free(c);
2d43b190 877 } else if (unit_id) {
63c372cb 878 x = strjoina("_SYSTEMD_UNIT=", unit_id);
2d43b190 879 IOVEC_SET_STRING(iovec[n++], x);
ef1673d1 880 }
d025f1e4 881
d025f1e4 882#ifdef HAVE_SELINUX
6355e756 883 if (mac_selinux_have()) {
d682b3a7 884 if (label) {
f8294e41 885 x = alloca(strlen("_SELINUX_CONTEXT=") + label_len + 1);
ae018d9b 886
d682b3a7
LP
887 *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
888 IOVEC_SET_STRING(iovec[n++], x);
889 } else {
2ed96880 890 char *con;
d025f1e4 891
d682b3a7 892 if (getpidcon(ucred->pid, &con) >= 0) {
63c372cb 893 x = strjoina("_SELINUX_CONTEXT=", con);
e7ff4e7f 894
d682b3a7
LP
895 freecon(con);
896 IOVEC_SET_STRING(iovec[n++], x);
897 }
d025f1e4
ZJS
898 }
899 }
900#endif
901 }
968f3196
ZJS
902 assert(n <= m);
903
904 if (object_pid) {
905 r = get_process_uid(object_pid, &object_uid);
906 if (r >= 0) {
de0671ee 907 sprintf(o_uid, "OBJECT_UID="UID_FMT, object_uid);
968f3196
ZJS
908 IOVEC_SET_STRING(iovec[n++], o_uid);
909 }
910
911 r = get_process_gid(object_pid, &object_gid);
912 if (r >= 0) {
de0671ee 913 sprintf(o_gid, "OBJECT_GID="GID_FMT, object_gid);
968f3196
ZJS
914 IOVEC_SET_STRING(iovec[n++], o_gid);
915 }
916
917 r = get_process_comm(object_pid, &t);
918 if (r >= 0) {
63c372cb 919 x = strjoina("OBJECT_COMM=", t);
968f3196
ZJS
920 free(t);
921 IOVEC_SET_STRING(iovec[n++], x);
922 }
923
924 r = get_process_exe(object_pid, &t);
925 if (r >= 0) {
63c372cb 926 x = strjoina("OBJECT_EXE=", t);
968f3196
ZJS
927 free(t);
928 IOVEC_SET_STRING(iovec[n++], x);
929 }
930
931 r = get_process_cmdline(object_pid, 0, false, &t);
932 if (r >= 0) {
63c372cb 933 x = strjoina("OBJECT_CMDLINE=", t);
968f3196
ZJS
934 free(t);
935 IOVEC_SET_STRING(iovec[n++], x);
936 }
937
938#ifdef HAVE_AUDIT
939 r = audit_session_from_pid(object_pid, &audit);
940 if (r >= 0) {
de0671ee 941 sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%"PRIu32, audit);
968f3196
ZJS
942 IOVEC_SET_STRING(iovec[n++], o_audit_session);
943 }
944
945 r = audit_loginuid_from_pid(object_pid, &loginuid);
946 if (r >= 0) {
de0671ee 947 sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID="UID_FMT, loginuid);
968f3196
ZJS
948 IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
949 }
950#endif
951
e9174f29 952 r = cg_pid_get_path_shifted(object_pid, s->cgroup_root, &c);
968f3196 953 if (r >= 0) {
63c372cb 954 x = strjoina("OBJECT_SYSTEMD_CGROUP=", c);
968f3196
ZJS
955 IOVEC_SET_STRING(iovec[n++], x);
956
957 r = cg_path_get_session(c, &t);
958 if (r >= 0) {
63c372cb 959 x = strjoina("OBJECT_SYSTEMD_SESSION=", t);
968f3196
ZJS
960 free(t);
961 IOVEC_SET_STRING(iovec[n++], x);
962 }
963
964 if (cg_path_get_owner_uid(c, &owner) >= 0) {
de0671ee 965 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID="UID_FMT, owner);
968f3196
ZJS
966 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
967 }
968
969 if (cg_path_get_unit(c, &t) >= 0) {
63c372cb 970 x = strjoina("OBJECT_SYSTEMD_UNIT=", t);
968f3196 971 free(t);
19cace37
LP
972 IOVEC_SET_STRING(iovec[n++], x);
973 }
974
975 if (cg_path_get_user_unit(c, &t) >= 0) {
63c372cb 976 x = strjoina("OBJECT_SYSTEMD_USER_UNIT=", t);
968f3196 977 free(t);
968f3196 978 IOVEC_SET_STRING(iovec[n++], x);
19cace37 979 }
968f3196 980
d473176a
LP
981 if (cg_path_get_slice(c, &t) >= 0) {
982 x = strjoina("OBJECT_SYSTEMD_SLICE=", t);
983 free(t);
984 IOVEC_SET_STRING(iovec[n++], x);
985 }
986
987 if (cg_path_get_user_slice(c, &t) >= 0) {
988 x = strjoina("OBJECT_SYSTEMD_USER_SLICE=", t);
989 free(t);
990 IOVEC_SET_STRING(iovec[n++], x);
991 }
992
968f3196
ZJS
993 free(c);
994 }
995 }
996 assert(n <= m);
d025f1e4
ZJS
997
998 if (tv) {
398a50cd 999 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=" USEC_FMT, timeval_load(tv));
a5693989 1000 IOVEC_SET_STRING(iovec[n++], source_time);
d025f1e4
ZJS
1001 }
1002
1003 /* Note that strictly speaking storing the boot id here is
1004 * redundant since the entry includes this in-line
1005 * anyway. However, we need this indexed, too. */
0c24bb23
LP
1006 if (!isempty(s->boot_id_field))
1007 IOVEC_SET_STRING(iovec[n++], s->boot_id_field);
d025f1e4 1008
0c24bb23
LP
1009 if (!isempty(s->machine_id_field))
1010 IOVEC_SET_STRING(iovec[n++], s->machine_id_field);
d025f1e4 1011
0c24bb23
LP
1012 if (!isempty(s->hostname_field))
1013 IOVEC_SET_STRING(iovec[n++], s->hostname_field);
d025f1e4
ZJS
1014
1015 assert(n <= m);
1016
da499392 1017 if (s->split_mode == SPLIT_UID && realuid > 0)
40adcda8 1018 /* Split up strictly by any UID */
759c945a 1019 journal_uid = realuid;
82499507 1020 else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
edc3797f
LP
1021 /* Split up by login UIDs. We do this only if the
1022 * realuid is not root, in order not to accidentally
1023 * leak privileged information to the user that is
1024 * logged by a privileged process that is part of an
7517e174 1025 * unprivileged session. */
8a0889df 1026 journal_uid = owner;
da499392
KS
1027 else
1028 journal_uid = 0;
759c945a 1029
d07f7b9e 1030 write_to_journal(s, journal_uid, iovec, n, priority);
d025f1e4
ZJS
1031}
1032
1033void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
1034 char mid[11 + 32 + 1];
8a03c9ef
ZJS
1035 struct iovec iovec[N_IOVEC_META_FIELDS + 5 + N_IOVEC_PAYLOAD_FIELDS];
1036 unsigned n = 0, m;
32917e33 1037 int r;
d025f1e4 1038 va_list ap;
b92bea5d 1039 struct ucred ucred = {};
d025f1e4
ZJS
1040
1041 assert(s);
1042 assert(format);
1043
4850d39a 1044 assert_cc(3 == LOG_FAC(LOG_DAEMON));
b6fa2555
EV
1045 IOVEC_SET_STRING(iovec[n++], "SYSLOG_FACILITY=3");
1046 IOVEC_SET_STRING(iovec[n++], "SYSLOG_IDENTIFIER=systemd-journald");
1047
d025f1e4 1048 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
4850d39a 1049 assert_cc(6 == LOG_INFO);
32917e33 1050 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
d025f1e4 1051
3bbaff3e 1052 if (!sd_id128_is_null(message_id)) {
e2cc6eca 1053 snprintf(mid, sizeof(mid), LOG_MESSAGE_ID(message_id));
d025f1e4
ZJS
1054 IOVEC_SET_STRING(iovec[n++], mid);
1055 }
1056
8a03c9ef
ZJS
1057 m = n;
1058
1059 va_start(ap, format);
32917e33
ZJS
1060 r = log_format_iovec(iovec, ELEMENTSOF(iovec), &n, false, 0, format, ap);
1061 /* Error handling below */
8a03c9ef
ZJS
1062 va_end(ap);
1063
d025f1e4
ZJS
1064 ucred.pid = getpid();
1065 ucred.uid = getuid();
1066 ucred.gid = getgid();
1067
32917e33
ZJS
1068 if (r >= 0)
1069 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
8a03c9ef
ZJS
1070
1071 while (m < n)
1072 free(iovec[m++].iov_base);
32917e33
ZJS
1073
1074 if (r < 0) {
1075 /* We failed to format the message. Emit a warning instead. */
1076 char buf[LINE_MAX];
1077
1078 xsprintf(buf, "MESSAGE=Entry printing failed: %s", strerror(-r));
1079
1080 n = 3;
1081 IOVEC_SET_STRING(iovec[n++], "PRIORITY=4");
1082 IOVEC_SET_STRING(iovec[n++], buf);
1083 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
1084 }
d025f1e4
ZJS
1085}
1086
1087void server_dispatch_message(
1088 Server *s,
1089 struct iovec *iovec, unsigned n, unsigned m,
3b3154df
LP
1090 const struct ucred *ucred,
1091 const struct timeval *tv,
d025f1e4
ZJS
1092 const char *label, size_t label_len,
1093 const char *unit_id,
968f3196
ZJS
1094 int priority,
1095 pid_t object_pid) {
d025f1e4 1096
7027ff61 1097 int rl, r;
7fd1b19b 1098 _cleanup_free_ char *path = NULL;
8580d1f7 1099 uint64_t available = 0;
db91ea32 1100 char *c;
d025f1e4
ZJS
1101
1102 assert(s);
1103 assert(iovec || n == 0);
1104
1105 if (n == 0)
1106 return;
1107
1108 if (LOG_PRI(priority) > s->max_level_store)
1109 return;
1110
2f5df74a
HHPF
1111 /* Stop early in case the information will not be stored
1112 * in a journal. */
1113 if (s->storage == STORAGE_NONE)
1114 return;
1115
d025f1e4
ZJS
1116 if (!ucred)
1117 goto finish;
1118
e9174f29 1119 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &path);
7027ff61 1120 if (r < 0)
d025f1e4
ZJS
1121 goto finish;
1122
1123 /* example: /user/lennart/3/foobar
1124 * /system/dbus.service/foobar
1125 *
1126 * So let's cut of everything past the third /, since that is
1127 * where user directories start */
1128
1129 c = strchr(path, '/');
1130 if (c) {
1131 c = strchr(c+1, '/');
1132 if (c) {
1133 c = strchr(c+1, '/');
1134 if (c)
1135 *c = 0;
1136 }
1137 }
1138
8580d1f7
LP
1139 (void) determine_space(s, false, false, &available, NULL);
1140 rl = journal_rate_limit_test(s->rate_limit, path, priority & LOG_PRIMASK, available);
db91ea32 1141 if (rl == 0)
d025f1e4 1142 return;
d025f1e4
ZJS
1143
1144 /* Write a suppression message if we suppressed something */
1145 if (rl > 1)
db91ea32 1146 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
8a03c9ef
ZJS
1147 LOG_MESSAGE("Suppressed %u messages from %s", rl - 1, path),
1148 NULL);
d025f1e4
ZJS
1149
1150finish:
d07f7b9e 1151 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, priority, object_pid);
d025f1e4
ZJS
1152}
1153
d025f1e4 1154int server_flush_to_var(Server *s) {
d025f1e4
ZJS
1155 sd_id128_t machine;
1156 sd_journal *j = NULL;
fbb63411
LP
1157 char ts[FORMAT_TIMESPAN_MAX];
1158 usec_t start;
1159 unsigned n = 0;
1160 int r;
d025f1e4
ZJS
1161
1162 assert(s);
1163
1164 if (s->storage != STORAGE_AUTO &&
1165 s->storage != STORAGE_PERSISTENT)
1166 return 0;
1167
1168 if (!s->runtime_journal)
1169 return 0;
1170
8580d1f7 1171 (void) system_journal_open(s, true);
d025f1e4
ZJS
1172
1173 if (!s->system_journal)
1174 return 0;
1175
1176 log_debug("Flushing to /var...");
1177
fbb63411
LP
1178 start = now(CLOCK_MONOTONIC);
1179
d025f1e4 1180 r = sd_id128_get_machine(&machine);
00a16861 1181 if (r < 0)
d025f1e4 1182 return r;
d025f1e4
ZJS
1183
1184 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
23bbb0de
MS
1185 if (r < 0)
1186 return log_error_errno(r, "Failed to read runtime journal: %m");
d025f1e4 1187
93b73b06
LP
1188 sd_journal_set_data_threshold(j, 0);
1189
d025f1e4
ZJS
1190 SD_JOURNAL_FOREACH(j) {
1191 Object *o = NULL;
1192 JournalFile *f;
1193
1194 f = j->current_file;
1195 assert(f && f->current_offset > 0);
1196
fbb63411
LP
1197 n++;
1198
d025f1e4
ZJS
1199 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1200 if (r < 0) {
da927ba9 1201 log_error_errno(r, "Can't read entry: %m");
d025f1e4
ZJS
1202 goto finish;
1203 }
1204
1205 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1206 if (r >= 0)
1207 continue;
1208
1209 if (!shall_try_append_again(s->system_journal, r)) {
da927ba9 1210 log_error_errno(r, "Can't write entry: %m");
d025f1e4
ZJS
1211 goto finish;
1212 }
1213
1214 server_rotate(s);
8580d1f7 1215 server_vacuum(s, false, false);
d025f1e4 1216
253f59df
LP
1217 if (!s->system_journal) {
1218 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1219 r = -EIO;
1220 goto finish;
1221 }
1222
d025f1e4
ZJS
1223 log_debug("Retrying write.");
1224 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1225 if (r < 0) {
da927ba9 1226 log_error_errno(r, "Can't write entry: %m");
d025f1e4
ZJS
1227 goto finish;
1228 }
1229 }
1230
804ae586
LP
1231 r = 0;
1232
d025f1e4
ZJS
1233finish:
1234 journal_file_post_change(s->system_journal);
1235
804ae586 1236 s->runtime_journal = journal_file_close(s->runtime_journal);
d025f1e4
ZJS
1237
1238 if (r >= 0)
c6878637 1239 (void) rm_rf("/run/log/journal", REMOVE_ROOT);
d025f1e4 1240
763c7aa2 1241 sd_journal_close(j);
d025f1e4 1242
8a03c9ef
ZJS
1243 server_driver_message(s, SD_ID128_NULL,
1244 LOG_MESSAGE("Time spent on flushing to /var is %s for %u entries.",
1245 format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0),
1246 n),
1247 NULL);
fbb63411 1248
d025f1e4
ZJS
1249 return r;
1250}
1251
8531ae70 1252int server_process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
f9a810be 1253 Server *s = userdata;
a315ac4e
LP
1254 struct ucred *ucred = NULL;
1255 struct timeval *tv = NULL;
1256 struct cmsghdr *cmsg;
1257 char *label = NULL;
1258 size_t label_len = 0, m;
1259 struct iovec iovec;
1260 ssize_t n;
1261 int *fds = NULL, v = 0;
1262 unsigned n_fds = 0;
1263
1264 union {
1265 struct cmsghdr cmsghdr;
1266
1267 /* We use NAME_MAX space for the SELinux label
1268 * here. The kernel currently enforces no
1269 * limit, but according to suggestions from
1270 * the SELinux people this will change and it
1271 * will probably be identical to NAME_MAX. For
1272 * now we use that, but this should be updated
1273 * one day when the final limit is known. */
1274 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1275 CMSG_SPACE(sizeof(struct timeval)) +
1276 CMSG_SPACE(sizeof(int)) + /* fd */
1277 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1278 } control = {};
1279
1280 union sockaddr_union sa = {};
1281
1282 struct msghdr msghdr = {
1283 .msg_iov = &iovec,
1284 .msg_iovlen = 1,
1285 .msg_control = &control,
1286 .msg_controllen = sizeof(control),
1287 .msg_name = &sa,
1288 .msg_namelen = sizeof(sa),
1289 };
f9a810be 1290
d025f1e4 1291 assert(s);
875c2e22 1292 assert(fd == s->native_fd || fd == s->syslog_fd || fd == s->audit_fd);
f9a810be
LP
1293
1294 if (revents != EPOLLIN) {
1295 log_error("Got invalid event from epoll for datagram fd: %"PRIx32, revents);
1296 return -EIO;
1297 }
1298
a315ac4e
LP
1299 /* Try to get the right size, if we can. (Not all
1300 * sockets support SIOCINQ, hence we just try, but
1301 * don't rely on it. */
1302 (void) ioctl(fd, SIOCINQ, &v);
d025f1e4 1303
a315ac4e
LP
1304 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1305 m = PAGE_ALIGN(MAX3((size_t) v + 1,
1306 (size_t) LINE_MAX,
1307 ALIGN(sizeof(struct nlmsghdr)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH)) + 1);
d025f1e4 1308
a315ac4e
LP
1309 if (!GREEDY_REALLOC(s->buffer, s->buffer_size, m))
1310 return log_oom();
875c2e22 1311
a315ac4e
LP
1312 iovec.iov_base = s->buffer;
1313 iovec.iov_len = s->buffer_size - 1; /* Leave room for trailing NUL we add later */
d025f1e4 1314
a315ac4e
LP
1315 n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1316 if (n < 0) {
1317 if (errno == EINTR || errno == EAGAIN)
1318 return 0;
875c2e22 1319
a315ac4e
LP
1320 return log_error_errno(errno, "recvmsg() failed: %m");
1321 }
875c2e22 1322
a315ac4e
LP
1323 CMSG_FOREACH(cmsg, &msghdr) {
1324
1325 if (cmsg->cmsg_level == SOL_SOCKET &&
1326 cmsg->cmsg_type == SCM_CREDENTIALS &&
1327 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1328 ucred = (struct ucred*) CMSG_DATA(cmsg);
1329 else if (cmsg->cmsg_level == SOL_SOCKET &&
1330 cmsg->cmsg_type == SCM_SECURITY) {
1331 label = (char*) CMSG_DATA(cmsg);
1332 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1333 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1334 cmsg->cmsg_type == SO_TIMESTAMP &&
1335 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1336 tv = (struct timeval*) CMSG_DATA(cmsg);
1337 else if (cmsg->cmsg_level == SOL_SOCKET &&
1338 cmsg->cmsg_type == SCM_RIGHTS) {
1339 fds = (int*) CMSG_DATA(cmsg);
1340 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
d025f1e4 1341 }
a315ac4e 1342 }
d025f1e4 1343
a315ac4e
LP
1344 /* And a trailing NUL, just in case */
1345 s->buffer[n] = 0;
1346
1347 if (fd == s->syslog_fd) {
1348 if (n > 0 && n_fds == 0)
1349 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1350 else if (n_fds > 0)
1351 log_warning("Got file descriptors via syslog socket. Ignoring.");
1352
1353 } else if (fd == s->native_fd) {
1354 if (n > 0 && n_fds == 0)
1355 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1356 else if (n == 0 && n_fds == 1)
1357 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1358 else if (n_fds > 0)
1359 log_warning("Got too many file descriptors via native socket. Ignoring.");
1360
1361 } else {
1362 assert(fd == s->audit_fd);
1363
1364 if (n > 0 && n_fds == 0)
1365 server_process_audit_message(s, s->buffer, n, ucred, &sa, msghdr.msg_namelen);
1366 else if (n_fds > 0)
1367 log_warning("Got file descriptors via audit socket. Ignoring.");
f9a810be 1368 }
a315ac4e
LP
1369
1370 close_many(fds, n_fds);
1371 return 0;
f9a810be 1372}
d025f1e4 1373
f9a810be
LP
1374static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1375 Server *s = userdata;
33d52ab9 1376 int r;
d025f1e4 1377
f9a810be 1378 assert(s);
d025f1e4 1379
94b65516 1380 log_info("Received request to flush runtime journal from PID " PID_FMT, si->ssi_pid);
d025f1e4 1381
929eeb54 1382 (void) server_flush_to_var(s);
f9a810be 1383 server_sync(s);
8580d1f7 1384 server_vacuum(s, false, false);
d025f1e4 1385
33d52ab9
LP
1386 r = touch("/run/systemd/journal/flushed");
1387 if (r < 0)
1388 log_warning_errno(r, "Failed to touch /run/systemd/journal/flushed, ignoring: %m");
74055aa7 1389
f9a810be
LP
1390 return 0;
1391}
d025f1e4 1392
f9a810be
LP
1393static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1394 Server *s = userdata;
33d52ab9 1395 int r;
d025f1e4 1396
f9a810be 1397 assert(s);
d025f1e4 1398
94b65516 1399 log_info("Received request to rotate journal from PID " PID_FMT, si->ssi_pid);
f9a810be 1400 server_rotate(s);
8580d1f7 1401 server_vacuum(s, true, true);
d025f1e4 1402
dbd6e31c 1403 /* Let clients know when the most recent rotation happened. */
33d52ab9
LP
1404 r = write_timestamp_file_atomic("/run/systemd/journal/rotated", now(CLOCK_MONOTONIC));
1405 if (r < 0)
1406 log_warning_errno(r, "Failed to write /run/systemd/journal/rotated, ignoring: %m");
dbd6e31c 1407
f9a810be
LP
1408 return 0;
1409}
d025f1e4 1410
f9a810be
LP
1411static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1412 Server *s = userdata;
d025f1e4 1413
f9a810be 1414 assert(s);
d025f1e4 1415
4daf54a8 1416 log_received_signal(LOG_INFO, si);
d025f1e4 1417
6203e07a 1418 sd_event_exit(s->event, 0);
d025f1e4
ZJS
1419 return 0;
1420}
1421
94b65516
LP
1422static int dispatch_sigrtmin1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1423 Server *s = userdata;
33d52ab9 1424 int r;
94b65516
LP
1425
1426 assert(s);
1427
1428 log_debug("Received request to sync from PID " PID_FMT, si->ssi_pid);
1429
1430 server_sync(s);
1431
1432 /* Let clients know when the most recent sync happened. */
33d52ab9
LP
1433 r = write_timestamp_file_atomic("/run/systemd/journal/synced", now(CLOCK_MONOTONIC));
1434 if (r < 0)
1435 log_warning_errno(r, "Failed to write /run/systemd/journal/synced, ignoring: %m");
94b65516
LP
1436
1437 return 0;
1438}
1439
f9a810be 1440static int setup_signals(Server *s) {
f9a810be 1441 int r;
d025f1e4
ZJS
1442
1443 assert(s);
1444
94b65516 1445 assert(sigprocmask_many(SIG_SETMASK, NULL, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, SIGRTMIN+1, -1) >= 0);
d025f1e4 1446
151b9b96 1447 r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
f9a810be
LP
1448 if (r < 0)
1449 return r;
1450
151b9b96 1451 r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s);
f9a810be
LP
1452 if (r < 0)
1453 return r;
d025f1e4 1454
151b9b96 1455 r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s);
f9a810be
LP
1456 if (r < 0)
1457 return r;
d025f1e4 1458
b374689c
LP
1459 /* Let's process SIGTERM late, so that we flush all queued
1460 * messages to disk before we exit */
1461 r = sd_event_source_set_priority(s->sigterm_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1462 if (r < 0)
1463 return r;
1464
1465 /* When journald is invoked on the terminal (when debugging),
1466 * it's useful if C-c is handled equivalent to SIGTERM. */
151b9b96 1467 r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s);
f9a810be
LP
1468 if (r < 0)
1469 return r;
d025f1e4 1470
b374689c
LP
1471 r = sd_event_source_set_priority(s->sigint_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1472 if (r < 0)
1473 return r;
1474
94b65516
LP
1475 /* SIGRTMIN+1 causes an immediate sync. We process this very
1476 * late, so that everything else queued at this point is
1477 * really written to disk. Clients can watch
1478 * /run/systemd/journal/synced with inotify until its mtime
1479 * changes to see when a sync happened. */
1480 r = sd_event_add_signal(s->event, &s->sigrtmin1_event_source, SIGRTMIN+1, dispatch_sigrtmin1, s);
1481 if (r < 0)
1482 return r;
1483
1484 r = sd_event_source_set_priority(s->sigrtmin1_event_source, SD_EVENT_PRIORITY_NORMAL+15);
1485 if (r < 0)
1486 return r;
1487
d025f1e4
ZJS
1488 return 0;
1489}
1490
1491static int server_parse_proc_cmdline(Server *s) {
7fd1b19b 1492 _cleanup_free_ char *line = NULL;
d581d9d9 1493 const char *p;
74df0fca 1494 int r;
d025f1e4 1495
74df0fca 1496 r = proc_cmdline(&line);
b5884878 1497 if (r < 0) {
da927ba9 1498 log_warning_errno(r, "Failed to read /proc/cmdline, ignoring: %m");
d025f1e4 1499 return 0;
b5884878 1500 }
d025f1e4 1501
d581d9d9 1502 p = line;
9ed794a3 1503 for (;;) {
ff82c36c 1504 _cleanup_free_ char *word = NULL;
d025f1e4 1505
d581d9d9
SS
1506 r = extract_first_word(&p, &word, NULL, 0);
1507 if (r < 0)
1508 return log_error_errno(r, "Failed to parse journald syntax \"%s\": %m", line);
1509
1510 if (r == 0)
1511 break;
d025f1e4
ZJS
1512
1513 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1514 r = parse_boolean(word + 35);
1515 if (r < 0)
1516 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1517 else
1518 s->forward_to_syslog = r;
1519 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1520 r = parse_boolean(word + 33);
1521 if (r < 0)
1522 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1523 else
1524 s->forward_to_kmsg = r;
1525 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1526 r = parse_boolean(word + 36);
1527 if (r < 0)
1528 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1529 else
1530 s->forward_to_console = r;
40b71e89
ST
1531 } else if (startswith(word, "systemd.journald.forward_to_wall=")) {
1532 r = parse_boolean(word + 33);
1533 if (r < 0)
1534 log_warning("Failed to parse forward to wall switch %s. Ignoring.", word + 33);
1535 else
1536 s->forward_to_wall = r;
d025f1e4
ZJS
1537 } else if (startswith(word, "systemd.journald"))
1538 log_warning("Invalid systemd.journald parameter. Ignoring.");
d025f1e4
ZJS
1539 }
1540
804ae586 1541 /* do not warn about state here, since probably systemd already did */
db91ea32 1542 return 0;
d025f1e4
ZJS
1543}
1544
1545static int server_parse_config_file(Server *s) {
d025f1e4
ZJS
1546 assert(s);
1547
43688c49 1548 return config_parse_many_nulstr(PKGSYSCONFDIR "/journald.conf",
75eb6154 1549 CONF_PATHS_NULSTR("systemd/journald.conf.d"),
a9edaeff
JT
1550 "Journal\0",
1551 config_item_perf_lookup, journald_gperf_lookup,
1552 false, s);
d025f1e4
ZJS
1553}
1554
f9a810be
LP
1555static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
1556 Server *s = userdata;
26687bf8
OS
1557
1558 assert(s);
1559
f9a810be 1560 server_sync(s);
26687bf8
OS
1561 return 0;
1562}
1563
d07f7b9e 1564int server_schedule_sync(Server *s, int priority) {
26687bf8
OS
1565 int r;
1566
26687bf8
OS
1567 assert(s);
1568
d07f7b9e
LP
1569 if (priority <= LOG_CRIT) {
1570 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1571 server_sync(s);
1572 return 0;
1573 }
1574
26687bf8
OS
1575 if (s->sync_scheduled)
1576 return 0;
1577
f9a810be
LP
1578 if (s->sync_interval_usec > 0) {
1579 usec_t when;
ca267016 1580
6a0f1f6d 1581 r = sd_event_now(s->event, CLOCK_MONOTONIC, &when);
f9a810be
LP
1582 if (r < 0)
1583 return r;
26687bf8 1584
f9a810be
LP
1585 when += s->sync_interval_usec;
1586
1587 if (!s->sync_event_source) {
6a0f1f6d
LP
1588 r = sd_event_add_time(
1589 s->event,
1590 &s->sync_event_source,
1591 CLOCK_MONOTONIC,
1592 when, 0,
1593 server_dispatch_sync, s);
f9a810be
LP
1594 if (r < 0)
1595 return r;
1596
1597 r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
1598 } else {
1599 r = sd_event_source_set_time(s->sync_event_source, when);
1600 if (r < 0)
1601 return r;
1602
1603 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
1604 }
26687bf8 1605 if (r < 0)
f9a810be 1606 return r;
26687bf8 1607
f9a810be
LP
1608 s->sync_scheduled = true;
1609 }
26687bf8
OS
1610
1611 return 0;
1612}
1613
0c24bb23
LP
1614static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1615 Server *s = userdata;
1616
1617 assert(s);
1618
1619 server_cache_hostname(s);
1620 return 0;
1621}
1622
1623static int server_open_hostname(Server *s) {
1624 int r;
1625
1626 assert(s);
1627
1628 s->hostname_fd = open("/proc/sys/kernel/hostname", O_RDONLY|O_CLOEXEC|O_NDELAY|O_NOCTTY);
4a62c710
MS
1629 if (s->hostname_fd < 0)
1630 return log_error_errno(errno, "Failed to open /proc/sys/kernel/hostname: %m");
0c24bb23 1631
151b9b96 1632 r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s);
0c24bb23 1633 if (r < 0) {
28def94c
DR
1634 /* kernels prior to 3.2 don't support polling this file. Ignore
1635 * the failure. */
1636 if (r == -EPERM) {
e53fc357 1637 log_warning_errno(r, "Failed to register hostname fd in event loop, ignoring: %m");
03e334a1 1638 s->hostname_fd = safe_close(s->hostname_fd);
28def94c
DR
1639 return 0;
1640 }
1641
23bbb0de 1642 return log_error_errno(r, "Failed to register hostname fd in event loop: %m");
0c24bb23
LP
1643 }
1644
1645 r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
23bbb0de
MS
1646 if (r < 0)
1647 return log_error_errno(r, "Failed to adjust priority of host name event source: %m");
0c24bb23
LP
1648
1649 return 0;
1650}
1651
e22aa3d3
LP
1652static int dispatch_notify_event(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1653 Server *s = userdata;
1654 int r;
1655
1656 assert(s);
1657 assert(s->notify_event_source == es);
1658 assert(s->notify_fd == fd);
1659
e22aa3d3 1660 /* The $NOTIFY_SOCKET is writable again, now send exactly one
dd835265 1661 * message on it. Either it's the watchdog event, the initial
119e9655
LP
1662 * READY=1 event or an stdout stream event. If there's nothing
1663 * to write anymore, turn our event source off. The next time
1664 * there's something to send it will be turned on again. */
e22aa3d3
LP
1665
1666 if (!s->sent_notify_ready) {
1667 static const char p[] =
1668 "READY=1\n"
1669 "STATUS=Processing requests...";
1670 ssize_t l;
1671
1672 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1673 if (l < 0) {
1674 if (errno == EAGAIN)
1675 return 0;
1676
1677 return log_error_errno(errno, "Failed to send READY=1 notification message: %m");
1678 }
1679
1680 s->sent_notify_ready = true;
1681 log_debug("Sent READY=1 notification.");
1682
119e9655
LP
1683 } else if (s->send_watchdog) {
1684
1685 static const char p[] =
1686 "WATCHDOG=1";
1687
1688 ssize_t l;
1689
1690 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1691 if (l < 0) {
1692 if (errno == EAGAIN)
1693 return 0;
1694
1695 return log_error_errno(errno, "Failed to send WATCHDOG=1 notification message: %m");
1696 }
1697
1698 s->send_watchdog = false;
1699 log_debug("Sent WATCHDOG=1 notification.");
1700
e22aa3d3
LP
1701 } else if (s->stdout_streams_notify_queue)
1702 /* Dispatch one stream notification event */
1703 stdout_stream_send_notify(s->stdout_streams_notify_queue);
1704
61233823 1705 /* Leave us enabled if there's still more to do. */
119e9655 1706 if (s->send_watchdog || s->stdout_streams_notify_queue)
e22aa3d3
LP
1707 return 0;
1708
1709 /* There was nothing to do anymore, let's turn ourselves off. */
1710 r = sd_event_source_set_enabled(es, SD_EVENT_OFF);
1711 if (r < 0)
1712 return log_error_errno(r, "Failed to turn off notify event source: %m");
1713
1714 return 0;
1715}
1716
119e9655
LP
1717static int dispatch_watchdog(sd_event_source *es, uint64_t usec, void *userdata) {
1718 Server *s = userdata;
1719 int r;
1720
1721 assert(s);
1722
1723 s->send_watchdog = true;
1724
1725 r = sd_event_source_set_enabled(s->notify_event_source, SD_EVENT_ON);
1726 if (r < 0)
1727 log_warning_errno(r, "Failed to turn on notify event source: %m");
1728
1729 r = sd_event_source_set_time(s->watchdog_event_source, usec + s->watchdog_usec / 2);
1730 if (r < 0)
1731 return log_error_errno(r, "Failed to restart watchdog event source: %m");
1732
1733 r = sd_event_source_set_enabled(s->watchdog_event_source, SD_EVENT_ON);
1734 if (r < 0)
1735 return log_error_errno(r, "Failed to enable watchdog event source: %m");
1736
1737 return 0;
1738}
1739
e22aa3d3
LP
1740static int server_connect_notify(Server *s) {
1741 union sockaddr_union sa = {
1742 .un.sun_family = AF_UNIX,
1743 };
1744 const char *e;
1745 int r;
1746
1747 assert(s);
1748 assert(s->notify_fd < 0);
1749 assert(!s->notify_event_source);
1750
1751 /*
1752 So here's the problem: we'd like to send notification
1753 messages to PID 1, but we cannot do that via sd_notify(),
1754 since that's synchronous, and we might end up blocking on
1755 it. Specifically: given that PID 1 might block on
1756 dbus-daemon during IPC, and dbus-daemon is logging to us,
1757 and might hence block on us, we might end up in a deadlock
ccddd104 1758 if we block on sending PID 1 notification messages — by
e22aa3d3
LP
1759 generating a full blocking circle. To avoid this, let's
1760 create a non-blocking socket, and connect it to the
1761 notification socket, and then wait for POLLOUT before we
1762 send anything. This should efficiently avoid any deadlocks,
1763 as we'll never block on PID 1, hence PID 1 can safely block
1764 on dbus-daemon which can safely block on us again.
1765
1766 Don't think that this issue is real? It is, see:
1767 https://github.com/systemd/systemd/issues/1505
1768 */
1769
1770 e = getenv("NOTIFY_SOCKET");
1771 if (!e)
1772 return 0;
1773
1774 if ((e[0] != '@' && e[0] != '/') || e[1] == 0) {
1775 log_error("NOTIFY_SOCKET set to an invalid value: %s", e);
1776 return -EINVAL;
1777 }
1778
1779 if (strlen(e) > sizeof(sa.un.sun_path)) {
1780 log_error("NOTIFY_SOCKET path too long: %s", e);
1781 return -EINVAL;
1782 }
1783
1784 s->notify_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
1785 if (s->notify_fd < 0)
1786 return log_error_errno(errno, "Failed to create notify socket: %m");
1787
1788 (void) fd_inc_sndbuf(s->notify_fd, NOTIFY_SNDBUF_SIZE);
1789
1790 strncpy(sa.un.sun_path, e, sizeof(sa.un.sun_path));
1791 if (sa.un.sun_path[0] == '@')
1792 sa.un.sun_path[0] = 0;
1793
fc2fffe7 1794 r = connect(s->notify_fd, &sa.sa, SOCKADDR_UN_LEN(sa.un));
e22aa3d3
LP
1795 if (r < 0)
1796 return log_error_errno(errno, "Failed to connect to notify socket: %m");
1797
1798 r = sd_event_add_io(s->event, &s->notify_event_source, s->notify_fd, EPOLLOUT, dispatch_notify_event, s);
1799 if (r < 0)
1800 return log_error_errno(r, "Failed to watch notification socket: %m");
1801
119e9655
LP
1802 if (sd_watchdog_enabled(false, &s->watchdog_usec) > 0) {
1803 s->send_watchdog = true;
1804
4de2402b 1805 r = sd_event_add_time(s->event, &s->watchdog_event_source, CLOCK_MONOTONIC, now(CLOCK_MONOTONIC) + s->watchdog_usec/2, s->watchdog_usec/4, dispatch_watchdog, s);
119e9655
LP
1806 if (r < 0)
1807 return log_error_errno(r, "Failed to add watchdog time event: %m");
1808 }
1809
e22aa3d3
LP
1810 /* This should fire pretty soon, which we'll use to send the
1811 * READY=1 event. */
1812
1813 return 0;
1814}
1815
d025f1e4 1816int server_init(Server *s) {
13790add 1817 _cleanup_fdset_free_ FDSet *fds = NULL;
d025f1e4 1818 int n, r, fd;
7d18d348 1819 bool no_sockets;
d025f1e4
ZJS
1820
1821 assert(s);
1822
1823 zero(*s);
e22aa3d3 1824 s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->audit_fd = s->hostname_fd = s->notify_fd = -1;
d025f1e4
ZJS
1825 s->compress = true;
1826 s->seal = true;
1827
119e9655
LP
1828 s->watchdog_usec = USEC_INFINITY;
1829
26687bf8
OS
1830 s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1831 s->sync_scheduled = false;
1832
d025f1e4
ZJS
1833 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1834 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1835
40b71e89 1836 s->forward_to_wall = true;
d025f1e4 1837
e150e820
MB
1838 s->max_file_usec = DEFAULT_MAX_FILE_USEC;
1839
d025f1e4
ZJS
1840 s->max_level_store = LOG_DEBUG;
1841 s->max_level_syslog = LOG_DEBUG;
1842 s->max_level_kmsg = LOG_NOTICE;
1843 s->max_level_console = LOG_INFO;
40b71e89 1844 s->max_level_wall = LOG_EMERG;
d025f1e4 1845
8580d1f7
LP
1846 journal_reset_metrics(&s->system_metrics);
1847 journal_reset_metrics(&s->runtime_metrics);
d025f1e4
ZJS
1848
1849 server_parse_config_file(s);
1850 server_parse_proc_cmdline(s);
8580d1f7 1851
d288f79f 1852 if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
b1389b0d
ZJS
1853 log_debug("Setting both rate limit interval and burst from "USEC_FMT",%u to 0,0",
1854 s->rate_limit_interval, s->rate_limit_burst);
d288f79f
ZJS
1855 s->rate_limit_interval = s->rate_limit_burst = 0;
1856 }
d025f1e4 1857
8580d1f7 1858 (void) mkdir_p("/run/systemd/journal", 0755);
d025f1e4 1859
43cf8388 1860 s->user_journals = ordered_hashmap_new(NULL);
d025f1e4
ZJS
1861 if (!s->user_journals)
1862 return log_oom();
1863
1864 s->mmap = mmap_cache_new();
1865 if (!s->mmap)
1866 return log_oom();
1867
b58c888f
VC
1868 s->deferred_closes = set_new(NULL);
1869 if (!s->deferred_closes)
1870 return log_oom();
1871
f9a810be 1872 r = sd_event_default(&s->event);
23bbb0de
MS
1873 if (r < 0)
1874 return log_error_errno(r, "Failed to create event loop: %m");
d025f1e4
ZJS
1875
1876 n = sd_listen_fds(true);
23bbb0de
MS
1877 if (n < 0)
1878 return log_error_errno(n, "Failed to read listening file descriptors from environment: %m");
d025f1e4
ZJS
1879
1880 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1881
1882 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1883
1884 if (s->native_fd >= 0) {
1885 log_error("Too many native sockets passed.");
1886 return -EINVAL;
1887 }
1888
1889 s->native_fd = fd;
1890
1891 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1892
1893 if (s->stdout_fd >= 0) {
1894 log_error("Too many stdout sockets passed.");
1895 return -EINVAL;
1896 }
1897
1898 s->stdout_fd = fd;
1899
03ee5c38
LP
1900 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0 ||
1901 sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/dev-log", 0) > 0) {
d025f1e4
ZJS
1902
1903 if (s->syslog_fd >= 0) {
1904 log_error("Too many /dev/log sockets passed.");
1905 return -EINVAL;
1906 }
1907
1908 s->syslog_fd = fd;
1909
875c2e22
LP
1910 } else if (sd_is_socket(fd, AF_NETLINK, SOCK_RAW, -1) > 0) {
1911
1912 if (s->audit_fd >= 0) {
1913 log_error("Too many audit sockets passed.");
1914 return -EINVAL;
1915 }
1916
1917 s->audit_fd = fd;
1918
4ec3cd73 1919 } else {
4ec3cd73 1920
13790add
LP
1921 if (!fds) {
1922 fds = fdset_new();
1923 if (!fds)
1924 return log_oom();
1925 }
4ec3cd73 1926
13790add
LP
1927 r = fdset_put(fds, fd);
1928 if (r < 0)
1929 return log_oom();
4ec3cd73 1930 }
d025f1e4
ZJS
1931 }
1932
15d91bff
ZJS
1933 /* Try to restore streams, but don't bother if this fails */
1934 (void) server_restore_streams(s, fds);
d025f1e4 1935
13790add
LP
1936 if (fdset_size(fds) > 0) {
1937 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds));
1938 fds = fdset_free(fds);
1939 }
1940
7d18d348
ZJS
1941 no_sockets = s->native_fd < 0 && s->stdout_fd < 0 && s->syslog_fd < 0 && s->audit_fd < 0;
1942
1943 /* always open stdout, syslog, native, and kmsg sockets */
37b7affe
ZJS
1944
1945 /* systemd-journald.socket: /run/systemd/journal/stdout */
15d91bff
ZJS
1946 r = server_open_stdout_socket(s);
1947 if (r < 0)
1948 return r;
1949
37b7affe 1950 /* systemd-journald-dev-log.socket: /run/systemd/journal/dev-log */
13790add 1951 r = server_open_syslog_socket(s);
d025f1e4
ZJS
1952 if (r < 0)
1953 return r;
1954
37b7affe 1955 /* systemd-journald.socket: /run/systemd/journal/socket */
13790add 1956 r = server_open_native_socket(s);
d025f1e4
ZJS
1957 if (r < 0)
1958 return r;
1959
37b7affe 1960 /* /dev/ksmg */
d025f1e4
ZJS
1961 r = server_open_dev_kmsg(s);
1962 if (r < 0)
1963 return r;
1964
7d18d348
ZJS
1965 /* Unless we got *some* sockets and not audit, open audit socket */
1966 if (s->audit_fd >= 0 || no_sockets) {
1967 r = server_open_audit(s);
1968 if (r < 0)
1969 return r;
1970 }
875c2e22 1971
d025f1e4
ZJS
1972 r = server_open_kernel_seqnum(s);
1973 if (r < 0)
1974 return r;
1975
0c24bb23
LP
1976 r = server_open_hostname(s);
1977 if (r < 0)
1978 return r;
1979
f9a810be 1980 r = setup_signals(s);
d025f1e4
ZJS
1981 if (r < 0)
1982 return r;
1983
1984 s->udev = udev_new();
1985 if (!s->udev)
1986 return -ENOMEM;
1987
f9a810be 1988 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
d025f1e4
ZJS
1989 if (!s->rate_limit)
1990 return -ENOMEM;
1991
e9174f29
LP
1992 r = cg_get_root_path(&s->cgroup_root);
1993 if (r < 0)
1994 return r;
1995
0c24bb23
LP
1996 server_cache_hostname(s);
1997 server_cache_boot_id(s);
1998 server_cache_machine_id(s);
1999
e22aa3d3
LP
2000 (void) server_connect_notify(s);
2001
804ae586 2002 return system_journal_open(s, false);
d025f1e4
ZJS
2003}
2004
2005void server_maybe_append_tags(Server *s) {
2006#ifdef HAVE_GCRYPT
2007 JournalFile *f;
2008 Iterator i;
2009 usec_t n;
2010
2011 n = now(CLOCK_REALTIME);
2012
2013 if (s->system_journal)
2014 journal_file_maybe_append_tag(s->system_journal, n);
2015
43cf8388 2016 ORDERED_HASHMAP_FOREACH(f, s->user_journals, i)
d025f1e4
ZJS
2017 journal_file_maybe_append_tag(f, n);
2018#endif
2019}
2020
2021void server_done(Server *s) {
2022 JournalFile *f;
2023 assert(s);
2024
b58c888f
VC
2025 if (s->deferred_closes) {
2026 journal_file_close_set(s->deferred_closes);
2027 set_free(s->deferred_closes);
2028 }
2029
d025f1e4
ZJS
2030 while (s->stdout_streams)
2031 stdout_stream_free(s->stdout_streams);
2032
2033 if (s->system_journal)
69a3a6fd 2034 (void) journal_file_close(s->system_journal);
d025f1e4
ZJS
2035
2036 if (s->runtime_journal)
69a3a6fd 2037 (void) journal_file_close(s->runtime_journal);
d025f1e4 2038
43cf8388 2039 while ((f = ordered_hashmap_steal_first(s->user_journals)))
69a3a6fd 2040 (void) journal_file_close(f);
d025f1e4 2041
43cf8388 2042 ordered_hashmap_free(s->user_journals);
d025f1e4 2043
f9a810be
LP
2044 sd_event_source_unref(s->syslog_event_source);
2045 sd_event_source_unref(s->native_event_source);
2046 sd_event_source_unref(s->stdout_event_source);
2047 sd_event_source_unref(s->dev_kmsg_event_source);
875c2e22 2048 sd_event_source_unref(s->audit_event_source);
f9a810be
LP
2049 sd_event_source_unref(s->sync_event_source);
2050 sd_event_source_unref(s->sigusr1_event_source);
2051 sd_event_source_unref(s->sigusr2_event_source);
2052 sd_event_source_unref(s->sigterm_event_source);
2053 sd_event_source_unref(s->sigint_event_source);
94b65516 2054 sd_event_source_unref(s->sigrtmin1_event_source);
0c24bb23 2055 sd_event_source_unref(s->hostname_event_source);
e22aa3d3 2056 sd_event_source_unref(s->notify_event_source);
119e9655 2057 sd_event_source_unref(s->watchdog_event_source);
f9a810be 2058 sd_event_unref(s->event);
d025f1e4 2059
03e334a1
LP
2060 safe_close(s->syslog_fd);
2061 safe_close(s->native_fd);
2062 safe_close(s->stdout_fd);
2063 safe_close(s->dev_kmsg_fd);
875c2e22 2064 safe_close(s->audit_fd);
03e334a1 2065 safe_close(s->hostname_fd);
e22aa3d3 2066 safe_close(s->notify_fd);
0c24bb23 2067
d025f1e4
ZJS
2068 if (s->rate_limit)
2069 journal_rate_limit_free(s->rate_limit);
2070
2071 if (s->kernel_seqnum)
2072 munmap(s->kernel_seqnum, sizeof(uint64_t));
2073
2074 free(s->buffer);
2075 free(s->tty_path);
e9174f29 2076 free(s->cgroup_root);
99d0966e 2077 free(s->hostname_field);
d025f1e4
ZJS
2078
2079 if (s->mmap)
2080 mmap_cache_unref(s->mmap);
2081
3e044c49 2082 udev_unref(s->udev);
d025f1e4 2083}
8580d1f7
LP
2084
2085static const char* const storage_table[_STORAGE_MAX] = {
2086 [STORAGE_AUTO] = "auto",
2087 [STORAGE_VOLATILE] = "volatile",
2088 [STORAGE_PERSISTENT] = "persistent",
2089 [STORAGE_NONE] = "none"
2090};
2091
2092DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
2093DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
2094
2095static const char* const split_mode_table[_SPLIT_MAX] = {
2096 [SPLIT_LOGIN] = "login",
2097 [SPLIT_UID] = "uid",
2098 [SPLIT_NONE] = "none",
2099};
2100
2101DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
2102DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");