]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/journal/journald-server.c
sd-bus: use IN_SET
[thirdparty/systemd.git] / src / journal / journald-server.c
CommitLineData
d025f1e4
ZJS
1/***
2 This file is part of systemd.
3
4 Copyright 2011 Lennart Poettering
5
6 systemd is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
10
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public License
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
18***/
19
24882e06
LP
20#ifdef HAVE_SELINUX
21#include <selinux/selinux.h>
22#endif
8580d1f7
LP
23#include <sys/ioctl.h>
24#include <sys/mman.h>
25#include <sys/signalfd.h>
26#include <sys/statvfs.h>
07630cea 27#include <linux/sockios.h>
24882e06 28
b4bbcaa9 29#include "libudev.h"
8580d1f7 30#include "sd-daemon.h"
74df0fca
LP
31#include "sd-journal.h"
32#include "sd-messages.h"
8580d1f7
LP
33
34#include "acl-util.h"
b5efdb8a 35#include "alloc-util.h"
430f0182 36#include "audit-util.h"
d025f1e4 37#include "cgroup-util.h"
d025f1e4 38#include "conf-parser.h"
a0956174 39#include "dirent-util.h"
0dec689b 40#include "extract-word.h"
3ffd4af2 41#include "fd-util.h"
33d52ab9 42#include "fileio.h"
958b66ea 43#include "formats-util.h"
f4f15635 44#include "fs-util.h"
8580d1f7 45#include "hashmap.h"
958b66ea 46#include "hostname-util.h"
afc5dbf3 47#include "io-util.h"
8580d1f7
LP
48#include "journal-authenticate.h"
49#include "journal-file.h"
d025f1e4
ZJS
50#include "journal-internal.h"
51#include "journal-vacuum.h"
8580d1f7 52#include "journald-audit.h"
d025f1e4 53#include "journald-kmsg.h"
d025f1e4 54#include "journald-native.h"
8580d1f7 55#include "journald-rate-limit.h"
3ffd4af2 56#include "journald-server.h"
8580d1f7
LP
57#include "journald-stream.h"
58#include "journald-syslog.h"
07630cea
LP
59#include "missing.h"
60#include "mkdir.h"
6bedfcbb 61#include "parse-util.h"
4e731273 62#include "proc-cmdline.h"
07630cea
LP
63#include "process-util.h"
64#include "rm-rf.h"
65#include "selinux-util.h"
66#include "signal-util.h"
67#include "socket-util.h"
32917e33 68#include "stdio-util.h"
8b43440b 69#include "string-table.h"
07630cea 70#include "string-util.h"
4a0b58c4 71#include "user-util.h"
8a03c9ef 72#include "log.h"
d025f1e4 73
d025f1e4
ZJS
74#define USER_JOURNALS_MAX 1024
75
26687bf8 76#define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
7f1ad696
LP
77#define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
78#define DEFAULT_RATE_LIMIT_BURST 1000
e150e820 79#define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
d025f1e4 80
8580d1f7 81#define RECHECK_SPACE_USEC (30*USEC_PER_SEC)
d025f1e4 82
e22aa3d3
LP
83#define NOTIFY_SNDBUF_SIZE (8*1024*1024)
84
7a24f3bf
VC
85/* The period to insert between posting changes for coalescing */
86#define POST_CHANGE_TIMER_INTERVAL_USEC (250*USEC_PER_MSEC)
87
8580d1f7
LP
88static int determine_space_for(
89 Server *s,
90 JournalMetrics *metrics,
91 const char *path,
92 const char *name,
93 bool verbose,
94 bool patch_min_use,
95 uint64_t *available,
96 uint64_t *limit) {
97
98 uint64_t sum = 0, ss_avail, avail;
7fd1b19b 99 _cleanup_closedir_ DIR *d = NULL;
8580d1f7
LP
100 struct dirent *de;
101 struct statvfs ss;
102 const char *p;
d025f1e4 103 usec_t ts;
d025f1e4 104
8580d1f7
LP
105 assert(s);
106 assert(metrics);
107 assert(path);
108 assert(name);
d025f1e4 109
8580d1f7 110 ts = now(CLOCK_MONOTONIC);
d025f1e4 111
8580d1f7 112 if (!verbose && s->cached_space_timestamp + RECHECK_SPACE_USEC > ts) {
d025f1e4 113
8580d1f7
LP
114 if (available)
115 *available = s->cached_space_available;
116 if (limit)
117 *limit = s->cached_space_limit;
d025f1e4 118
d025f1e4 119 return 0;
8580d1f7 120 }
d025f1e4 121
8580d1f7 122 p = strjoina(path, SERVER_MACHINE_ID(s));
d025f1e4 123 d = opendir(p);
d025f1e4 124 if (!d)
8580d1f7 125 return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR, errno, "Failed to open %s: %m", p);
d025f1e4
ZJS
126
127 if (fstatvfs(dirfd(d), &ss) < 0)
8580d1f7 128 return log_error_errno(errno, "Failed to fstatvfs(%s): %m", p);
d025f1e4 129
8580d1f7 130 FOREACH_DIRENT_ALL(de, d, break) {
d025f1e4 131 struct stat st;
d025f1e4
ZJS
132
133 if (!endswith(de->d_name, ".journal") &&
134 !endswith(de->d_name, ".journal~"))
135 continue;
136
8580d1f7
LP
137 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) {
138 log_debug_errno(errno, "Failed to stat %s/%s, ignoring: %m", p, de->d_name);
d025f1e4 139 continue;
8580d1f7 140 }
d025f1e4
ZJS
141
142 if (!S_ISREG(st.st_mode))
143 continue;
144
145 sum += (uint64_t) st.st_blocks * 512UL;
146 }
147
8a03c9ef 148 /* If requested, then let's bump the min_use limit to the
8580d1f7
LP
149 * current usage on disk. We do this when starting up and
150 * first opening the journal files. This way sudden spikes in
151 * disk usage will not cause journald to vacuum files without
152 * bounds. Note that this means that only a restart of
153 * journald will make it reset this value. */
d025f1e4 154
8580d1f7
LP
155 if (patch_min_use)
156 metrics->min_use = MAX(metrics->min_use, sum);
348ced90 157
8580d1f7
LP
158 ss_avail = ss.f_bsize * ss.f_bavail;
159 avail = LESS_BY(ss_avail, metrics->keep_free);
348ced90 160
8580d1f7
LP
161 s->cached_space_limit = MIN(MAX(sum + avail, metrics->min_use), metrics->max_use);
162 s->cached_space_available = LESS_BY(s->cached_space_limit, sum);
163 s->cached_space_timestamp = ts;
d025f1e4 164
670b110c
ZJS
165 if (verbose) {
166 char fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
8580d1f7 167 fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX], fb6[FORMAT_BYTES_MAX];
282c5c4e
ZJS
168 format_bytes(fb1, sizeof(fb1), sum);
169 format_bytes(fb2, sizeof(fb2), metrics->max_use);
170 format_bytes(fb3, sizeof(fb3), metrics->keep_free);
171 format_bytes(fb4, sizeof(fb4), ss_avail);
172 format_bytes(fb5, sizeof(fb5), s->cached_space_limit);
173 format_bytes(fb6, sizeof(fb6), s->cached_space_available);
670b110c
ZJS
174
175 server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
282c5c4e
ZJS
176 LOG_MESSAGE("%s (%s) is %s, max %s, %s free.",
177 name, path, fb1, fb5, fb6),
178 "JOURNAL_NAME=%s", name,
179 "JOURNAL_PATH=%s", path,
180 "CURRENT_USE=%"PRIu64, sum,
181 "CURRENT_USE_PRETTY=%s", fb1,
182 "MAX_USE=%"PRIu64, metrics->max_use,
183 "MAX_USE_PRETTY=%s", fb2,
184 "DISK_KEEP_FREE=%"PRIu64, metrics->keep_free,
185 "DISK_KEEP_FREE_PRETTY=%s", fb3,
186 "DISK_AVAILABLE=%"PRIu64, ss_avail,
187 "DISK_AVAILABLE_PRETTY=%s", fb4,
188 "LIMIT=%"PRIu64, s->cached_space_limit,
189 "LIMIT_PRETTY=%s", fb5,
190 "AVAILABLE=%"PRIu64, s->cached_space_available,
191 "AVAILABLE_PRETTY=%s", fb6,
8a03c9ef 192 NULL);
8580d1f7
LP
193 }
194
195 if (available)
196 *available = s->cached_space_available;
197 if (limit)
198 *limit = s->cached_space_limit;
199
200 return 1;
201}
202
203static int determine_space(Server *s, bool verbose, bool patch_min_use, uint64_t *available, uint64_t *limit) {
204 JournalMetrics *metrics;
205 const char *path, *name;
206
207 assert(s);
208
209 if (s->system_journal) {
210 path = "/var/log/journal/";
211 metrics = &s->system_metrics;
212 name = "System journal";
213 } else {
214 path = "/run/log/journal/";
215 metrics = &s->runtime_metrics;
216 name = "Runtime journal";
670b110c
ZJS
217 }
218
8580d1f7 219 return determine_space_for(s, metrics, path, name, verbose, patch_min_use, available, limit);
d025f1e4
ZJS
220}
221
5c3bde3f 222static void server_add_acls(JournalFile *f, uid_t uid) {
d025f1e4 223#ifdef HAVE_ACL
5c3bde3f 224 int r;
d025f1e4 225#endif
d025f1e4
ZJS
226 assert(f);
227
d025f1e4 228#ifdef HAVE_ACL
34c10968 229 if (uid <= SYSTEM_UID_MAX)
d025f1e4
ZJS
230 return;
231
5c3bde3f
ZJS
232 r = add_acls_for_user(f->fd, uid);
233 if (r < 0)
234 log_warning_errno(r, "Failed to set ACL on %s, ignoring: %m", f->path);
d025f1e4
ZJS
235#endif
236}
237
7a24f3bf
VC
238static int open_journal(
239 Server *s,
240 bool reliably,
241 const char *fname,
242 int flags,
243 bool seal,
244 JournalMetrics *metrics,
7a24f3bf
VC
245 JournalFile **ret) {
246 int r;
e167d7fd 247 JournalFile *f;
7a24f3bf
VC
248
249 assert(s);
250 assert(fname);
251 assert(ret);
252
253 if (reliably)
b58c888f 254 r = journal_file_open_reliably(fname, flags, 0640, s->compress, seal, metrics, s->mmap, s->deferred_closes, NULL, &f);
7a24f3bf 255 else
b58c888f 256 r = journal_file_open(fname, flags, 0640, s->compress, seal, metrics, s->mmap, s->deferred_closes, NULL, &f);
7a24f3bf
VC
257 if (r < 0)
258 return r;
259
e167d7fd 260 r = journal_file_enable_post_change_timer(f, s->event, POST_CHANGE_TIMER_INTERVAL_USEC);
7a24f3bf 261 if (r < 0) {
69a3a6fd 262 (void) journal_file_close(f);
7a24f3bf
VC
263 return r;
264 }
265
e167d7fd 266 *ret = f;
7a24f3bf
VC
267 return r;
268}
269
d025f1e4 270static JournalFile* find_journal(Server *s, uid_t uid) {
ed375beb 271 _cleanup_free_ char *p = NULL;
d025f1e4
ZJS
272 int r;
273 JournalFile *f;
274 sd_id128_t machine;
275
276 assert(s);
277
278 /* We split up user logs only on /var, not on /run. If the
279 * runtime file is open, we write to it exclusively, in order
280 * to guarantee proper order as soon as we flush /run to
281 * /var and close the runtime file. */
282
283 if (s->runtime_journal)
284 return s->runtime_journal;
285
f7dc3ab9 286 if (uid <= SYSTEM_UID_MAX)
d025f1e4
ZJS
287 return s->system_journal;
288
289 r = sd_id128_get_machine(&machine);
290 if (r < 0)
291 return s->system_journal;
292
4a0b58c4 293 f = ordered_hashmap_get(s->user_journals, UID_TO_PTR(uid));
d025f1e4
ZJS
294 if (f)
295 return f;
296
de0671ee
ZJS
297 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-"UID_FMT".journal",
298 SD_ID128_FORMAT_VAL(machine), uid) < 0)
d025f1e4
ZJS
299 return s->system_journal;
300
43cf8388 301 while (ordered_hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
d025f1e4 302 /* Too many open? Then let's close one */
43cf8388 303 f = ordered_hashmap_steal_first(s->user_journals);
d025f1e4 304 assert(f);
69a3a6fd 305 (void) journal_file_close(f);
d025f1e4
ZJS
306 }
307
089ed40b 308 r = open_journal(s, true, p, O_RDWR|O_CREAT, s->seal, &s->system_metrics, &f);
d025f1e4
ZJS
309 if (r < 0)
310 return s->system_journal;
311
5c3bde3f 312 server_add_acls(f, uid);
d025f1e4 313
4a0b58c4 314 r = ordered_hashmap_put(s->user_journals, UID_TO_PTR(uid), f);
d025f1e4 315 if (r < 0) {
69a3a6fd 316 (void) journal_file_close(f);
d025f1e4
ZJS
317 return s->system_journal;
318 }
319
320 return f;
321}
322
ea69bd41
LP
323static int do_rotate(
324 Server *s,
325 JournalFile **f,
326 const char* name,
327 bool seal,
328 uint32_t uid) {
329
fc55baee
ZJS
330 int r;
331 assert(s);
332
333 if (!*f)
334 return -EINVAL;
335
b58c888f 336 r = journal_file_rotate(f, s->compress, seal, s->deferred_closes);
fc55baee
ZJS
337 if (r < 0)
338 if (*f)
ea69bd41 339 log_error_errno(r, "Failed to rotate %s: %m", (*f)->path);
fc55baee 340 else
ea69bd41 341 log_error_errno(r, "Failed to create new %s journal: %m", name);
fc55baee 342 else
5c3bde3f 343 server_add_acls(*f, uid);
2678031a 344
fc55baee
ZJS
345 return r;
346}
347
d025f1e4
ZJS
348void server_rotate(Server *s) {
349 JournalFile *f;
350 void *k;
351 Iterator i;
352 int r;
353
354 log_debug("Rotating...");
355
8580d1f7
LP
356 (void) do_rotate(s, &s->runtime_journal, "runtime", false, 0);
357 (void) do_rotate(s, &s->system_journal, "system", s->seal, 0);
d025f1e4 358
43cf8388 359 ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
4a0b58c4 360 r = do_rotate(s, &f, "user", s->seal, PTR_TO_UID(k));
fc55baee 361 if (r >= 0)
43cf8388 362 ordered_hashmap_replace(s->user_journals, k, f);
fc55baee
ZJS
363 else if (!f)
364 /* Old file has been closed and deallocated */
43cf8388 365 ordered_hashmap_remove(s->user_journals, k);
d025f1e4 366 }
b58c888f
VC
367
368 /* Perform any deferred closes which aren't still offlining. */
369 SET_FOREACH(f, s->deferred_closes, i)
370 if (!journal_file_is_offlining(f)) {
371 (void) set_remove(s->deferred_closes, f);
372 (void) journal_file_close(f);
373 }
d025f1e4
ZJS
374}
375
26687bf8
OS
376void server_sync(Server *s) {
377 JournalFile *f;
26687bf8
OS
378 Iterator i;
379 int r;
380
26687bf8 381 if (s->system_journal) {
ac2e41f5 382 r = journal_file_set_offline(s->system_journal, false);
26687bf8 383 if (r < 0)
65089b82 384 log_warning_errno(r, "Failed to sync system journal, ignoring: %m");
26687bf8
OS
385 }
386
65c1d46b 387 ORDERED_HASHMAP_FOREACH(f, s->user_journals, i) {
ac2e41f5 388 r = journal_file_set_offline(f, false);
26687bf8 389 if (r < 0)
65089b82 390 log_warning_errno(r, "Failed to sync user journal, ignoring: %m");
26687bf8
OS
391 }
392
f9a810be
LP
393 if (s->sync_event_source) {
394 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
395 if (r < 0)
da927ba9 396 log_error_errno(r, "Failed to disable sync timer source: %m");
f9a810be 397 }
26687bf8
OS
398
399 s->sync_scheduled = false;
400}
401
ea69bd41
LP
402static void do_vacuum(
403 Server *s,
ea69bd41 404 JournalFile *f,
8580d1f7
LP
405 JournalMetrics *metrics,
406 const char *path,
407 const char *name,
408 bool verbose,
409 bool patch_min_use) {
ea69bd41
LP
410
411 const char *p;
8580d1f7 412 uint64_t limit;
63c8666b
ZJS
413 int r;
414
8580d1f7
LP
415 assert(s);
416 assert(metrics);
417 assert(path);
418 assert(name);
419
63c8666b
ZJS
420 if (!f)
421 return;
422
8580d1f7
LP
423 p = strjoina(path, SERVER_MACHINE_ID(s));
424
425 limit = metrics->max_use;
426 (void) determine_space_for(s, metrics, path, name, verbose, patch_min_use, NULL, &limit);
427
428 r = journal_directory_vacuum(p, limit, metrics->n_max_files, s->max_retention_usec, &s->oldest_file_usec, verbose);
63c8666b 429 if (r < 0 && r != -ENOENT)
8580d1f7 430 log_warning_errno(r, "Failed to vacuum %s, ignoring: %m", p);
63c8666b
ZJS
431}
432
8580d1f7
LP
433int server_vacuum(Server *s, bool verbose, bool patch_min_use) {
434 assert(s);
d025f1e4
ZJS
435
436 log_debug("Vacuuming...");
437
438 s->oldest_file_usec = 0;
439
8580d1f7
LP
440 do_vacuum(s, s->system_journal, &s->system_metrics, "/var/log/journal/", "System journal", verbose, patch_min_use);
441 do_vacuum(s, s->runtime_journal, &s->runtime_metrics, "/run/log/journal/", "Runtime journal", verbose, patch_min_use);
d025f1e4 442
8580d1f7
LP
443 s->cached_space_limit = 0;
444 s->cached_space_available = 0;
445 s->cached_space_timestamp = 0;
d025f1e4 446
8580d1f7 447 return 0;
d025f1e4
ZJS
448}
449
0c24bb23
LP
450static void server_cache_machine_id(Server *s) {
451 sd_id128_t id;
452 int r;
453
454 assert(s);
455
456 r = sd_id128_get_machine(&id);
457 if (r < 0)
458 return;
459
460 sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
461}
462
463static void server_cache_boot_id(Server *s) {
464 sd_id128_t id;
465 int r;
466
467 assert(s);
468
469 r = sd_id128_get_boot(&id);
470 if (r < 0)
471 return;
472
473 sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
474}
475
476static void server_cache_hostname(Server *s) {
477 _cleanup_free_ char *t = NULL;
478 char *x;
479
480 assert(s);
481
482 t = gethostname_malloc();
483 if (!t)
484 return;
485
486 x = strappend("_HOSTNAME=", t);
487 if (!x)
488 return;
489
490 free(s->hostname_field);
491 s->hostname_field = x;
492}
493
8531ae70 494static bool shall_try_append_again(JournalFile *f, int r) {
d025f1e4
ZJS
495
496 /* -E2BIG Hit configured limit
497 -EFBIG Hit fs limit
498 -EDQUOT Quota limit hit
499 -ENOSPC Disk full
fa6ac760 500 -EIO I/O error of some kind (mmap)
d025f1e4
ZJS
501 -EHOSTDOWN Other machine
502 -EBUSY Unclean shutdown
503 -EPROTONOSUPPORT Unsupported feature
504 -EBADMSG Corrupted
505 -ENODATA Truncated
2678031a
LP
506 -ESHUTDOWN Already archived
507 -EIDRM Journal file has been deleted */
d025f1e4
ZJS
508
509 if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
510 log_debug("%s: Allocation limit reached, rotating.", f->path);
511 else if (r == -EHOSTDOWN)
512 log_info("%s: Journal file from other machine, rotating.", f->path);
513 else if (r == -EBUSY)
514 log_info("%s: Unclean shutdown, rotating.", f->path);
515 else if (r == -EPROTONOSUPPORT)
516 log_info("%s: Unsupported feature, rotating.", f->path);
517 else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
518 log_warning("%s: Journal file corrupted, rotating.", f->path);
fa6ac760
LP
519 else if (r == -EIO)
520 log_warning("%s: IO error, rotating.", f->path);
2678031a
LP
521 else if (r == -EIDRM)
522 log_warning("%s: Journal file has been deleted, rotating.", f->path);
d025f1e4
ZJS
523 else
524 return false;
525
526 return true;
527}
528
d07f7b9e 529static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
d025f1e4
ZJS
530 JournalFile *f;
531 bool vacuumed = false;
532 int r;
533
534 assert(s);
535 assert(iovec);
536 assert(n > 0);
537
538 f = find_journal(s, uid);
539 if (!f)
540 return;
541
542 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
543 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
544 server_rotate(s);
8580d1f7 545 server_vacuum(s, false, false);
d025f1e4
ZJS
546 vacuumed = true;
547
548 f = find_journal(s, uid);
549 if (!f)
550 return;
551 }
552
553 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
26687bf8 554 if (r >= 0) {
d07f7b9e 555 server_schedule_sync(s, priority);
d025f1e4 556 return;
26687bf8 557 }
d025f1e4
ZJS
558
559 if (vacuumed || !shall_try_append_again(f, r)) {
8266e1c0 560 log_error_errno(r, "Failed to write entry (%d items, %zu bytes), ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
d025f1e4
ZJS
561 return;
562 }
563
564 server_rotate(s);
8580d1f7 565 server_vacuum(s, false, false);
d025f1e4
ZJS
566
567 f = find_journal(s, uid);
568 if (!f)
569 return;
570
571 log_debug("Retrying write.");
572 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
8266e1c0
LP
573 if (r < 0)
574 log_error_errno(r, "Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
575 else
d07f7b9e 576 server_schedule_sync(s, priority);
d025f1e4
ZJS
577}
578
579static void dispatch_message_real(
580 Server *s,
581 struct iovec *iovec, unsigned n, unsigned m,
3b3154df
LP
582 const struct ucred *ucred,
583 const struct timeval *tv,
d025f1e4 584 const char *label, size_t label_len,
968f3196 585 const char *unit_id,
d07f7b9e 586 int priority,
968f3196 587 pid_t object_pid) {
d025f1e4 588
968f3196 589 char pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
ae018d9b
LP
590 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
591 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
592 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
d3789917 593 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
968f3196
ZJS
594 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
595 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
596 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
597 uid_t object_uid;
598 gid_t object_gid;
968f3196 599 char *x;
d025f1e4 600 int r;
ae018d9b 601 char *t, *c;
82499507
LP
602 uid_t realuid = 0, owner = 0, journal_uid;
603 bool owner_valid = false;
ae018d9b 604#ifdef HAVE_AUDIT
968f3196
ZJS
605 char audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
606 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
607 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
608 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
ae018d9b
LP
609
610 uint32_t audit;
611 uid_t loginuid;
612#endif
d025f1e4
ZJS
613
614 assert(s);
615 assert(iovec);
616 assert(n > 0);
968f3196 617 assert(n + N_IOVEC_META_FIELDS + (object_pid ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
d025f1e4
ZJS
618
619 if (ucred) {
d025f1e4
ZJS
620 realuid = ucred->uid;
621
de0671ee 622 sprintf(pid, "_PID="PID_FMT, ucred->pid);
c2457105 623 IOVEC_SET_STRING(iovec[n++], pid);
d025f1e4 624
de0671ee 625 sprintf(uid, "_UID="UID_FMT, ucred->uid);
c2457105 626 IOVEC_SET_STRING(iovec[n++], uid);
d025f1e4 627
de0671ee 628 sprintf(gid, "_GID="GID_FMT, ucred->gid);
c2457105 629 IOVEC_SET_STRING(iovec[n++], gid);
d025f1e4
ZJS
630
631 r = get_process_comm(ucred->pid, &t);
632 if (r >= 0) {
63c372cb 633 x = strjoina("_COMM=", t);
d025f1e4 634 free(t);
968f3196 635 IOVEC_SET_STRING(iovec[n++], x);
d025f1e4
ZJS
636 }
637
638 r = get_process_exe(ucred->pid, &t);
639 if (r >= 0) {
63c372cb 640 x = strjoina("_EXE=", t);
d025f1e4 641 free(t);
968f3196 642 IOVEC_SET_STRING(iovec[n++], x);
d025f1e4
ZJS
643 }
644
9bdbc2e2 645 r = get_process_cmdline(ucred->pid, 0, false, &t);
d025f1e4 646 if (r >= 0) {
63c372cb 647 x = strjoina("_CMDLINE=", t);
d025f1e4 648 free(t);
3a832116
SL
649 IOVEC_SET_STRING(iovec[n++], x);
650 }
651
652 r = get_process_capeff(ucred->pid, &t);
653 if (r >= 0) {
63c372cb 654 x = strjoina("_CAP_EFFECTIVE=", t);
3a832116 655 free(t);
968f3196 656 IOVEC_SET_STRING(iovec[n++], x);
d025f1e4
ZJS
657 }
658
0a20e3c1 659#ifdef HAVE_AUDIT
d025f1e4 660 r = audit_session_from_pid(ucred->pid, &audit);
ae018d9b 661 if (r >= 0) {
de0671ee 662 sprintf(audit_session, "_AUDIT_SESSION=%"PRIu32, audit);
ae018d9b
LP
663 IOVEC_SET_STRING(iovec[n++], audit_session);
664 }
d025f1e4
ZJS
665
666 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
7027ff61 667 if (r >= 0) {
de0671ee 668 sprintf(audit_loginuid, "_AUDIT_LOGINUID="UID_FMT, loginuid);
ae018d9b 669 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
d025f1e4 670 }
ae018d9b 671#endif
d025f1e4 672
e9174f29 673 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &c);
7027ff61 674 if (r >= 0) {
968f3196
ZJS
675 char *session = NULL;
676
63c372cb 677 x = strjoina("_SYSTEMD_CGROUP=", c);
968f3196 678 IOVEC_SET_STRING(iovec[n++], x);
d025f1e4 679
ae018d9b
LP
680 r = cg_path_get_session(c, &t);
681 if (r >= 0) {
63c372cb 682 session = strjoina("_SYSTEMD_SESSION=", t);
ae018d9b 683 free(t);
d025f1e4 684 IOVEC_SET_STRING(iovec[n++], session);
ae018d9b
LP
685 }
686
687 if (cg_path_get_owner_uid(c, &owner) >= 0) {
688 owner_valid = true;
d025f1e4 689
de0671ee 690 sprintf(owner_uid, "_SYSTEMD_OWNER_UID="UID_FMT, owner);
d025f1e4 691 IOVEC_SET_STRING(iovec[n++], owner_uid);
ae018d9b 692 }
d025f1e4 693
ae018d9b 694 if (cg_path_get_unit(c, &t) >= 0) {
63c372cb 695 x = strjoina("_SYSTEMD_UNIT=", t);
ae018d9b 696 free(t);
19cace37
LP
697 IOVEC_SET_STRING(iovec[n++], x);
698 } else if (unit_id && !session) {
63c372cb 699 x = strjoina("_SYSTEMD_UNIT=", unit_id);
19cace37
LP
700 IOVEC_SET_STRING(iovec[n++], x);
701 }
702
703 if (cg_path_get_user_unit(c, &t) >= 0) {
63c372cb 704 x = strjoina("_SYSTEMD_USER_UNIT=", t);
ae018d9b 705 free(t);
968f3196 706 IOVEC_SET_STRING(iovec[n++], x);
19cace37 707 } else if (unit_id && session) {
63c372cb 708 x = strjoina("_SYSTEMD_USER_UNIT=", unit_id);
19cace37
LP
709 IOVEC_SET_STRING(iovec[n++], x);
710 }
ae018d9b 711
0a244b8e 712 if (cg_path_get_slice(c, &t) >= 0) {
63c372cb 713 x = strjoina("_SYSTEMD_SLICE=", t);
0a244b8e
LP
714 free(t);
715 IOVEC_SET_STRING(iovec[n++], x);
716 }
717
ae018d9b 718 free(c);
2d43b190 719 } else if (unit_id) {
63c372cb 720 x = strjoina("_SYSTEMD_UNIT=", unit_id);
2d43b190 721 IOVEC_SET_STRING(iovec[n++], x);
ef1673d1 722 }
d025f1e4 723
d025f1e4 724#ifdef HAVE_SELINUX
6355e756 725 if (mac_selinux_have()) {
d682b3a7 726 if (label) {
f8294e41 727 x = alloca(strlen("_SELINUX_CONTEXT=") + label_len + 1);
ae018d9b 728
d682b3a7
LP
729 *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
730 IOVEC_SET_STRING(iovec[n++], x);
731 } else {
732 security_context_t con;
d025f1e4 733
d682b3a7 734 if (getpidcon(ucred->pid, &con) >= 0) {
63c372cb 735 x = strjoina("_SELINUX_CONTEXT=", con);
e7ff4e7f 736
d682b3a7
LP
737 freecon(con);
738 IOVEC_SET_STRING(iovec[n++], x);
739 }
d025f1e4
ZJS
740 }
741 }
742#endif
743 }
968f3196
ZJS
744 assert(n <= m);
745
746 if (object_pid) {
747 r = get_process_uid(object_pid, &object_uid);
748 if (r >= 0) {
de0671ee 749 sprintf(o_uid, "OBJECT_UID="UID_FMT, object_uid);
968f3196
ZJS
750 IOVEC_SET_STRING(iovec[n++], o_uid);
751 }
752
753 r = get_process_gid(object_pid, &object_gid);
754 if (r >= 0) {
de0671ee 755 sprintf(o_gid, "OBJECT_GID="GID_FMT, object_gid);
968f3196
ZJS
756 IOVEC_SET_STRING(iovec[n++], o_gid);
757 }
758
759 r = get_process_comm(object_pid, &t);
760 if (r >= 0) {
63c372cb 761 x = strjoina("OBJECT_COMM=", t);
968f3196
ZJS
762 free(t);
763 IOVEC_SET_STRING(iovec[n++], x);
764 }
765
766 r = get_process_exe(object_pid, &t);
767 if (r >= 0) {
63c372cb 768 x = strjoina("OBJECT_EXE=", t);
968f3196
ZJS
769 free(t);
770 IOVEC_SET_STRING(iovec[n++], x);
771 }
772
773 r = get_process_cmdline(object_pid, 0, false, &t);
774 if (r >= 0) {
63c372cb 775 x = strjoina("OBJECT_CMDLINE=", t);
968f3196
ZJS
776 free(t);
777 IOVEC_SET_STRING(iovec[n++], x);
778 }
779
780#ifdef HAVE_AUDIT
781 r = audit_session_from_pid(object_pid, &audit);
782 if (r >= 0) {
de0671ee 783 sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%"PRIu32, audit);
968f3196
ZJS
784 IOVEC_SET_STRING(iovec[n++], o_audit_session);
785 }
786
787 r = audit_loginuid_from_pid(object_pid, &loginuid);
788 if (r >= 0) {
de0671ee 789 sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID="UID_FMT, loginuid);
968f3196
ZJS
790 IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
791 }
792#endif
793
e9174f29 794 r = cg_pid_get_path_shifted(object_pid, s->cgroup_root, &c);
968f3196 795 if (r >= 0) {
63c372cb 796 x = strjoina("OBJECT_SYSTEMD_CGROUP=", c);
968f3196
ZJS
797 IOVEC_SET_STRING(iovec[n++], x);
798
799 r = cg_path_get_session(c, &t);
800 if (r >= 0) {
63c372cb 801 x = strjoina("OBJECT_SYSTEMD_SESSION=", t);
968f3196
ZJS
802 free(t);
803 IOVEC_SET_STRING(iovec[n++], x);
804 }
805
806 if (cg_path_get_owner_uid(c, &owner) >= 0) {
de0671ee 807 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID="UID_FMT, owner);
968f3196
ZJS
808 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
809 }
810
811 if (cg_path_get_unit(c, &t) >= 0) {
63c372cb 812 x = strjoina("OBJECT_SYSTEMD_UNIT=", t);
968f3196 813 free(t);
19cace37
LP
814 IOVEC_SET_STRING(iovec[n++], x);
815 }
816
817 if (cg_path_get_user_unit(c, &t) >= 0) {
63c372cb 818 x = strjoina("OBJECT_SYSTEMD_USER_UNIT=", t);
968f3196 819 free(t);
968f3196 820 IOVEC_SET_STRING(iovec[n++], x);
19cace37 821 }
968f3196
ZJS
822
823 free(c);
824 }
825 }
826 assert(n <= m);
d025f1e4
ZJS
827
828 if (tv) {
ae018d9b 829 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv));
a5693989 830 IOVEC_SET_STRING(iovec[n++], source_time);
d025f1e4
ZJS
831 }
832
833 /* Note that strictly speaking storing the boot id here is
834 * redundant since the entry includes this in-line
835 * anyway. However, we need this indexed, too. */
0c24bb23
LP
836 if (!isempty(s->boot_id_field))
837 IOVEC_SET_STRING(iovec[n++], s->boot_id_field);
d025f1e4 838
0c24bb23
LP
839 if (!isempty(s->machine_id_field))
840 IOVEC_SET_STRING(iovec[n++], s->machine_id_field);
d025f1e4 841
0c24bb23
LP
842 if (!isempty(s->hostname_field))
843 IOVEC_SET_STRING(iovec[n++], s->hostname_field);
d025f1e4
ZJS
844
845 assert(n <= m);
846
da499392 847 if (s->split_mode == SPLIT_UID && realuid > 0)
40adcda8 848 /* Split up strictly by any UID */
759c945a 849 journal_uid = realuid;
82499507 850 else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
edc3797f
LP
851 /* Split up by login UIDs. We do this only if the
852 * realuid is not root, in order not to accidentally
853 * leak privileged information to the user that is
854 * logged by a privileged process that is part of an
7517e174 855 * unprivileged session. */
8a0889df 856 journal_uid = owner;
da499392
KS
857 else
858 journal_uid = 0;
759c945a 859
d07f7b9e 860 write_to_journal(s, journal_uid, iovec, n, priority);
d025f1e4
ZJS
861}
862
863void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
864 char mid[11 + 32 + 1];
8a03c9ef
ZJS
865 struct iovec iovec[N_IOVEC_META_FIELDS + 5 + N_IOVEC_PAYLOAD_FIELDS];
866 unsigned n = 0, m;
32917e33 867 int r;
d025f1e4 868 va_list ap;
b92bea5d 869 struct ucred ucred = {};
d025f1e4
ZJS
870
871 assert(s);
872 assert(format);
873
4850d39a 874 assert_cc(3 == LOG_FAC(LOG_DAEMON));
b6fa2555
EV
875 IOVEC_SET_STRING(iovec[n++], "SYSLOG_FACILITY=3");
876 IOVEC_SET_STRING(iovec[n++], "SYSLOG_IDENTIFIER=systemd-journald");
877
d025f1e4 878 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
4850d39a 879 assert_cc(6 == LOG_INFO);
32917e33 880 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
d025f1e4
ZJS
881
882 if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
e2cc6eca 883 snprintf(mid, sizeof(mid), LOG_MESSAGE_ID(message_id));
d025f1e4
ZJS
884 IOVEC_SET_STRING(iovec[n++], mid);
885 }
886
8a03c9ef
ZJS
887 m = n;
888
889 va_start(ap, format);
32917e33
ZJS
890 r = log_format_iovec(iovec, ELEMENTSOF(iovec), &n, false, 0, format, ap);
891 /* Error handling below */
8a03c9ef
ZJS
892 va_end(ap);
893
d025f1e4
ZJS
894 ucred.pid = getpid();
895 ucred.uid = getuid();
896 ucred.gid = getgid();
897
32917e33
ZJS
898 if (r >= 0)
899 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
8a03c9ef
ZJS
900
901 while (m < n)
902 free(iovec[m++].iov_base);
32917e33
ZJS
903
904 if (r < 0) {
905 /* We failed to format the message. Emit a warning instead. */
906 char buf[LINE_MAX];
907
908 xsprintf(buf, "MESSAGE=Entry printing failed: %s", strerror(-r));
909
910 n = 3;
911 IOVEC_SET_STRING(iovec[n++], "PRIORITY=4");
912 IOVEC_SET_STRING(iovec[n++], buf);
913 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
914 }
d025f1e4
ZJS
915}
916
917void server_dispatch_message(
918 Server *s,
919 struct iovec *iovec, unsigned n, unsigned m,
3b3154df
LP
920 const struct ucred *ucred,
921 const struct timeval *tv,
d025f1e4
ZJS
922 const char *label, size_t label_len,
923 const char *unit_id,
968f3196
ZJS
924 int priority,
925 pid_t object_pid) {
d025f1e4 926
7027ff61 927 int rl, r;
7fd1b19b 928 _cleanup_free_ char *path = NULL;
8580d1f7 929 uint64_t available = 0;
db91ea32 930 char *c;
d025f1e4
ZJS
931
932 assert(s);
933 assert(iovec || n == 0);
934
935 if (n == 0)
936 return;
937
938 if (LOG_PRI(priority) > s->max_level_store)
939 return;
940
2f5df74a
HHPF
941 /* Stop early in case the information will not be stored
942 * in a journal. */
943 if (s->storage == STORAGE_NONE)
944 return;
945
d025f1e4
ZJS
946 if (!ucred)
947 goto finish;
948
e9174f29 949 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &path);
7027ff61 950 if (r < 0)
d025f1e4
ZJS
951 goto finish;
952
953 /* example: /user/lennart/3/foobar
954 * /system/dbus.service/foobar
955 *
956 * So let's cut of everything past the third /, since that is
957 * where user directories start */
958
959 c = strchr(path, '/');
960 if (c) {
961 c = strchr(c+1, '/');
962 if (c) {
963 c = strchr(c+1, '/');
964 if (c)
965 *c = 0;
966 }
967 }
968
8580d1f7
LP
969 (void) determine_space(s, false, false, &available, NULL);
970 rl = journal_rate_limit_test(s->rate_limit, path, priority & LOG_PRIMASK, available);
db91ea32 971 if (rl == 0)
d025f1e4 972 return;
d025f1e4
ZJS
973
974 /* Write a suppression message if we suppressed something */
975 if (rl > 1)
db91ea32 976 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
8a03c9ef
ZJS
977 LOG_MESSAGE("Suppressed %u messages from %s", rl - 1, path),
978 NULL);
d025f1e4
ZJS
979
980finish:
d07f7b9e 981 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, priority, object_pid);
d025f1e4
ZJS
982}
983
984
caa2f4c0 985static int system_journal_open(Server *s, bool flush_requested) {
84267e40 986 const char *fn;
09eba4d4 987 int r = 0;
d025f1e4
ZJS
988
989 if (!s->system_journal &&
990 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
caa2f4c0
ZJS
991 (flush_requested
992 || access("/run/systemd/journal/flushed", F_OK) >= 0)) {
d025f1e4
ZJS
993
994 /* If in auto mode: first try to create the machine
995 * path, but not the prefix.
996 *
997 * If in persistent mode: create /var/log/journal and
998 * the machine path */
999
1000 if (s->storage == STORAGE_PERSISTENT)
ac892057 1001 (void) mkdir_p("/var/log/journal/", 0755);
d025f1e4 1002
8580d1f7 1003 fn = strjoina("/var/log/journal/", SERVER_MACHINE_ID(s));
d025f1e4 1004 (void) mkdir(fn, 0755);
d025f1e4 1005
63c372cb 1006 fn = strjoina(fn, "/system.journal");
089ed40b 1007 r = open_journal(s, true, fn, O_RDWR|O_CREAT, s->seal, &s->system_metrics, &s->system_journal);
8580d1f7 1008 if (r >= 0) {
5c3bde3f 1009 server_add_acls(s->system_journal, 0);
8580d1f7
LP
1010 (void) determine_space_for(s, &s->system_metrics, "/var/log/journal/", "System journal", true, true, NULL, NULL);
1011 } else if (r < 0) {
433dd100 1012 if (r != -ENOENT && r != -EROFS)
da927ba9 1013 log_warning_errno(r, "Failed to open system journal: %m");
e40ec7ae 1014
433dd100
LN
1015 r = 0;
1016 }
d025f1e4
ZJS
1017 }
1018
1019 if (!s->runtime_journal &&
1020 (s->storage != STORAGE_NONE)) {
1021
8580d1f7 1022 fn = strjoina("/run/log/journal/", SERVER_MACHINE_ID(s), "/system.journal");
d025f1e4
ZJS
1023
1024 if (s->system_journal) {
1025
1026 /* Try to open the runtime journal, but only
1027 * if it already exists, so that we can flush
1028 * it into the system journal */
1029
089ed40b 1030 r = open_journal(s, false, fn, O_RDWR, false, &s->runtime_metrics, &s->runtime_journal);
d025f1e4
ZJS
1031 if (r < 0) {
1032 if (r != -ENOENT)
da927ba9 1033 log_warning_errno(r, "Failed to open runtime journal: %m");
d025f1e4
ZJS
1034
1035 r = 0;
1036 }
1037
1038 } else {
1039
1040 /* OK, we really need the runtime journal, so create
1041 * it if necessary. */
1042
fc1d70af
LP
1043 (void) mkdir("/run/log", 0755);
1044 (void) mkdir("/run/log/journal", 0755);
1045 (void) mkdir_parents(fn, 0750);
1046
089ed40b 1047 r = open_journal(s, true, fn, O_RDWR|O_CREAT, false, &s->runtime_metrics, &s->runtime_journal);
23bbb0de
MS
1048 if (r < 0)
1049 return log_error_errno(r, "Failed to open runtime journal: %m");
d025f1e4
ZJS
1050 }
1051
8580d1f7 1052 if (s->runtime_journal) {
5c3bde3f 1053 server_add_acls(s->runtime_journal, 0);
8580d1f7
LP
1054 (void) determine_space_for(s, &s->runtime_metrics, "/run/log/journal/", "Runtime journal", true, true, NULL, NULL);
1055 }
d025f1e4
ZJS
1056 }
1057
1058 return r;
1059}
1060
1061int server_flush_to_var(Server *s) {
d025f1e4
ZJS
1062 sd_id128_t machine;
1063 sd_journal *j = NULL;
fbb63411
LP
1064 char ts[FORMAT_TIMESPAN_MAX];
1065 usec_t start;
1066 unsigned n = 0;
1067 int r;
d025f1e4
ZJS
1068
1069 assert(s);
1070
1071 if (s->storage != STORAGE_AUTO &&
1072 s->storage != STORAGE_PERSISTENT)
1073 return 0;
1074
1075 if (!s->runtime_journal)
1076 return 0;
1077
8580d1f7 1078 (void) system_journal_open(s, true);
d025f1e4
ZJS
1079
1080 if (!s->system_journal)
1081 return 0;
1082
1083 log_debug("Flushing to /var...");
1084
fbb63411
LP
1085 start = now(CLOCK_MONOTONIC);
1086
d025f1e4 1087 r = sd_id128_get_machine(&machine);
00a16861 1088 if (r < 0)
d025f1e4 1089 return r;
d025f1e4
ZJS
1090
1091 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
23bbb0de
MS
1092 if (r < 0)
1093 return log_error_errno(r, "Failed to read runtime journal: %m");
d025f1e4 1094
93b73b06
LP
1095 sd_journal_set_data_threshold(j, 0);
1096
d025f1e4
ZJS
1097 SD_JOURNAL_FOREACH(j) {
1098 Object *o = NULL;
1099 JournalFile *f;
1100
1101 f = j->current_file;
1102 assert(f && f->current_offset > 0);
1103
fbb63411
LP
1104 n++;
1105
d025f1e4
ZJS
1106 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1107 if (r < 0) {
da927ba9 1108 log_error_errno(r, "Can't read entry: %m");
d025f1e4
ZJS
1109 goto finish;
1110 }
1111
1112 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1113 if (r >= 0)
1114 continue;
1115
1116 if (!shall_try_append_again(s->system_journal, r)) {
da927ba9 1117 log_error_errno(r, "Can't write entry: %m");
d025f1e4
ZJS
1118 goto finish;
1119 }
1120
1121 server_rotate(s);
8580d1f7 1122 server_vacuum(s, false, false);
d025f1e4 1123
253f59df
LP
1124 if (!s->system_journal) {
1125 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1126 r = -EIO;
1127 goto finish;
1128 }
1129
d025f1e4
ZJS
1130 log_debug("Retrying write.");
1131 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1132 if (r < 0) {
da927ba9 1133 log_error_errno(r, "Can't write entry: %m");
d025f1e4
ZJS
1134 goto finish;
1135 }
1136 }
1137
804ae586
LP
1138 r = 0;
1139
d025f1e4
ZJS
1140finish:
1141 journal_file_post_change(s->system_journal);
1142
804ae586 1143 s->runtime_journal = journal_file_close(s->runtime_journal);
d025f1e4
ZJS
1144
1145 if (r >= 0)
c6878637 1146 (void) rm_rf("/run/log/journal", REMOVE_ROOT);
d025f1e4 1147
763c7aa2 1148 sd_journal_close(j);
d025f1e4 1149
8a03c9ef
ZJS
1150 server_driver_message(s, SD_ID128_NULL,
1151 LOG_MESSAGE("Time spent on flushing to /var is %s for %u entries.",
1152 format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0),
1153 n),
1154 NULL);
fbb63411 1155
d025f1e4
ZJS
1156 return r;
1157}
1158
8531ae70 1159int server_process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
f9a810be 1160 Server *s = userdata;
a315ac4e
LP
1161 struct ucred *ucred = NULL;
1162 struct timeval *tv = NULL;
1163 struct cmsghdr *cmsg;
1164 char *label = NULL;
1165 size_t label_len = 0, m;
1166 struct iovec iovec;
1167 ssize_t n;
1168 int *fds = NULL, v = 0;
1169 unsigned n_fds = 0;
1170
1171 union {
1172 struct cmsghdr cmsghdr;
1173
1174 /* We use NAME_MAX space for the SELinux label
1175 * here. The kernel currently enforces no
1176 * limit, but according to suggestions from
1177 * the SELinux people this will change and it
1178 * will probably be identical to NAME_MAX. For
1179 * now we use that, but this should be updated
1180 * one day when the final limit is known. */
1181 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1182 CMSG_SPACE(sizeof(struct timeval)) +
1183 CMSG_SPACE(sizeof(int)) + /* fd */
1184 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1185 } control = {};
1186
1187 union sockaddr_union sa = {};
1188
1189 struct msghdr msghdr = {
1190 .msg_iov = &iovec,
1191 .msg_iovlen = 1,
1192 .msg_control = &control,
1193 .msg_controllen = sizeof(control),
1194 .msg_name = &sa,
1195 .msg_namelen = sizeof(sa),
1196 };
f9a810be 1197
d025f1e4 1198 assert(s);
875c2e22 1199 assert(fd == s->native_fd || fd == s->syslog_fd || fd == s->audit_fd);
f9a810be
LP
1200
1201 if (revents != EPOLLIN) {
1202 log_error("Got invalid event from epoll for datagram fd: %"PRIx32, revents);
1203 return -EIO;
1204 }
1205
a315ac4e
LP
1206 /* Try to get the right size, if we can. (Not all
1207 * sockets support SIOCINQ, hence we just try, but
1208 * don't rely on it. */
1209 (void) ioctl(fd, SIOCINQ, &v);
d025f1e4 1210
a315ac4e
LP
1211 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1212 m = PAGE_ALIGN(MAX3((size_t) v + 1,
1213 (size_t) LINE_MAX,
1214 ALIGN(sizeof(struct nlmsghdr)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH)) + 1);
d025f1e4 1215
a315ac4e
LP
1216 if (!GREEDY_REALLOC(s->buffer, s->buffer_size, m))
1217 return log_oom();
875c2e22 1218
a315ac4e
LP
1219 iovec.iov_base = s->buffer;
1220 iovec.iov_len = s->buffer_size - 1; /* Leave room for trailing NUL we add later */
d025f1e4 1221
a315ac4e
LP
1222 n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1223 if (n < 0) {
1224 if (errno == EINTR || errno == EAGAIN)
1225 return 0;
875c2e22 1226
a315ac4e
LP
1227 return log_error_errno(errno, "recvmsg() failed: %m");
1228 }
875c2e22 1229
a315ac4e
LP
1230 CMSG_FOREACH(cmsg, &msghdr) {
1231
1232 if (cmsg->cmsg_level == SOL_SOCKET &&
1233 cmsg->cmsg_type == SCM_CREDENTIALS &&
1234 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1235 ucred = (struct ucred*) CMSG_DATA(cmsg);
1236 else if (cmsg->cmsg_level == SOL_SOCKET &&
1237 cmsg->cmsg_type == SCM_SECURITY) {
1238 label = (char*) CMSG_DATA(cmsg);
1239 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1240 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1241 cmsg->cmsg_type == SO_TIMESTAMP &&
1242 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1243 tv = (struct timeval*) CMSG_DATA(cmsg);
1244 else if (cmsg->cmsg_level == SOL_SOCKET &&
1245 cmsg->cmsg_type == SCM_RIGHTS) {
1246 fds = (int*) CMSG_DATA(cmsg);
1247 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
d025f1e4 1248 }
a315ac4e 1249 }
d025f1e4 1250
a315ac4e
LP
1251 /* And a trailing NUL, just in case */
1252 s->buffer[n] = 0;
1253
1254 if (fd == s->syslog_fd) {
1255 if (n > 0 && n_fds == 0)
1256 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1257 else if (n_fds > 0)
1258 log_warning("Got file descriptors via syslog socket. Ignoring.");
1259
1260 } else if (fd == s->native_fd) {
1261 if (n > 0 && n_fds == 0)
1262 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1263 else if (n == 0 && n_fds == 1)
1264 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1265 else if (n_fds > 0)
1266 log_warning("Got too many file descriptors via native socket. Ignoring.");
1267
1268 } else {
1269 assert(fd == s->audit_fd);
1270
1271 if (n > 0 && n_fds == 0)
1272 server_process_audit_message(s, s->buffer, n, ucred, &sa, msghdr.msg_namelen);
1273 else if (n_fds > 0)
1274 log_warning("Got file descriptors via audit socket. Ignoring.");
f9a810be 1275 }
a315ac4e
LP
1276
1277 close_many(fds, n_fds);
1278 return 0;
f9a810be 1279}
d025f1e4 1280
f9a810be
LP
1281static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1282 Server *s = userdata;
33d52ab9 1283 int r;
d025f1e4 1284
f9a810be 1285 assert(s);
d025f1e4 1286
94b65516 1287 log_info("Received request to flush runtime journal from PID " PID_FMT, si->ssi_pid);
d025f1e4 1288
f9a810be
LP
1289 server_flush_to_var(s);
1290 server_sync(s);
8580d1f7 1291 server_vacuum(s, false, false);
d025f1e4 1292
33d52ab9
LP
1293 r = touch("/run/systemd/journal/flushed");
1294 if (r < 0)
1295 log_warning_errno(r, "Failed to touch /run/systemd/journal/flushed, ignoring: %m");
74055aa7 1296
f9a810be
LP
1297 return 0;
1298}
d025f1e4 1299
f9a810be
LP
1300static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1301 Server *s = userdata;
33d52ab9 1302 int r;
d025f1e4 1303
f9a810be 1304 assert(s);
d025f1e4 1305
94b65516 1306 log_info("Received request to rotate journal from PID " PID_FMT, si->ssi_pid);
f9a810be 1307 server_rotate(s);
8580d1f7 1308 server_vacuum(s, true, true);
d025f1e4 1309
dbd6e31c 1310 /* Let clients know when the most recent rotation happened. */
33d52ab9
LP
1311 r = write_timestamp_file_atomic("/run/systemd/journal/rotated", now(CLOCK_MONOTONIC));
1312 if (r < 0)
1313 log_warning_errno(r, "Failed to write /run/systemd/journal/rotated, ignoring: %m");
dbd6e31c 1314
f9a810be
LP
1315 return 0;
1316}
d025f1e4 1317
f9a810be
LP
1318static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1319 Server *s = userdata;
d025f1e4 1320
f9a810be 1321 assert(s);
d025f1e4 1322
4daf54a8 1323 log_received_signal(LOG_INFO, si);
d025f1e4 1324
6203e07a 1325 sd_event_exit(s->event, 0);
d025f1e4
ZJS
1326 return 0;
1327}
1328
94b65516
LP
1329static int dispatch_sigrtmin1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1330 Server *s = userdata;
33d52ab9 1331 int r;
94b65516
LP
1332
1333 assert(s);
1334
1335 log_debug("Received request to sync from PID " PID_FMT, si->ssi_pid);
1336
1337 server_sync(s);
1338
1339 /* Let clients know when the most recent sync happened. */
33d52ab9
LP
1340 r = write_timestamp_file_atomic("/run/systemd/journal/synced", now(CLOCK_MONOTONIC));
1341 if (r < 0)
1342 log_warning_errno(r, "Failed to write /run/systemd/journal/synced, ignoring: %m");
94b65516
LP
1343
1344 return 0;
1345}
1346
f9a810be 1347static int setup_signals(Server *s) {
f9a810be 1348 int r;
d025f1e4
ZJS
1349
1350 assert(s);
1351
94b65516 1352 assert(sigprocmask_many(SIG_SETMASK, NULL, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, SIGRTMIN+1, -1) >= 0);
d025f1e4 1353
151b9b96 1354 r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
f9a810be
LP
1355 if (r < 0)
1356 return r;
1357
151b9b96 1358 r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s);
f9a810be
LP
1359 if (r < 0)
1360 return r;
d025f1e4 1361
151b9b96 1362 r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s);
f9a810be
LP
1363 if (r < 0)
1364 return r;
d025f1e4 1365
b374689c
LP
1366 /* Let's process SIGTERM late, so that we flush all queued
1367 * messages to disk before we exit */
1368 r = sd_event_source_set_priority(s->sigterm_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1369 if (r < 0)
1370 return r;
1371
1372 /* When journald is invoked on the terminal (when debugging),
1373 * it's useful if C-c is handled equivalent to SIGTERM. */
151b9b96 1374 r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s);
f9a810be
LP
1375 if (r < 0)
1376 return r;
d025f1e4 1377
b374689c
LP
1378 r = sd_event_source_set_priority(s->sigint_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1379 if (r < 0)
1380 return r;
1381
94b65516
LP
1382 /* SIGRTMIN+1 causes an immediate sync. We process this very
1383 * late, so that everything else queued at this point is
1384 * really written to disk. Clients can watch
1385 * /run/systemd/journal/synced with inotify until its mtime
1386 * changes to see when a sync happened. */
1387 r = sd_event_add_signal(s->event, &s->sigrtmin1_event_source, SIGRTMIN+1, dispatch_sigrtmin1, s);
1388 if (r < 0)
1389 return r;
1390
1391 r = sd_event_source_set_priority(s->sigrtmin1_event_source, SD_EVENT_PRIORITY_NORMAL+15);
1392 if (r < 0)
1393 return r;
1394
d025f1e4
ZJS
1395 return 0;
1396}
1397
1398static int server_parse_proc_cmdline(Server *s) {
7fd1b19b 1399 _cleanup_free_ char *line = NULL;
d581d9d9 1400 const char *p;
74df0fca 1401 int r;
d025f1e4 1402
74df0fca 1403 r = proc_cmdline(&line);
b5884878 1404 if (r < 0) {
da927ba9 1405 log_warning_errno(r, "Failed to read /proc/cmdline, ignoring: %m");
d025f1e4 1406 return 0;
b5884878 1407 }
d025f1e4 1408
d581d9d9 1409 p = line;
9ed794a3 1410 for (;;) {
ff82c36c 1411 _cleanup_free_ char *word = NULL;
d025f1e4 1412
d581d9d9
SS
1413 r = extract_first_word(&p, &word, NULL, 0);
1414 if (r < 0)
1415 return log_error_errno(r, "Failed to parse journald syntax \"%s\": %m", line);
1416
1417 if (r == 0)
1418 break;
d025f1e4
ZJS
1419
1420 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1421 r = parse_boolean(word + 35);
1422 if (r < 0)
1423 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1424 else
1425 s->forward_to_syslog = r;
1426 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1427 r = parse_boolean(word + 33);
1428 if (r < 0)
1429 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1430 else
1431 s->forward_to_kmsg = r;
1432 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1433 r = parse_boolean(word + 36);
1434 if (r < 0)
1435 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1436 else
1437 s->forward_to_console = r;
40b71e89
ST
1438 } else if (startswith(word, "systemd.journald.forward_to_wall=")) {
1439 r = parse_boolean(word + 33);
1440 if (r < 0)
1441 log_warning("Failed to parse forward to wall switch %s. Ignoring.", word + 33);
1442 else
1443 s->forward_to_wall = r;
d025f1e4
ZJS
1444 } else if (startswith(word, "systemd.journald"))
1445 log_warning("Invalid systemd.journald parameter. Ignoring.");
d025f1e4
ZJS
1446 }
1447
804ae586 1448 /* do not warn about state here, since probably systemd already did */
db91ea32 1449 return 0;
d025f1e4
ZJS
1450}
1451
1452static int server_parse_config_file(Server *s) {
d025f1e4
ZJS
1453 assert(s);
1454
75eb6154
LP
1455 return config_parse_many(PKGSYSCONFDIR "/journald.conf",
1456 CONF_PATHS_NULSTR("systemd/journald.conf.d"),
a9edaeff
JT
1457 "Journal\0",
1458 config_item_perf_lookup, journald_gperf_lookup,
1459 false, s);
d025f1e4
ZJS
1460}
1461
f9a810be
LP
1462static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
1463 Server *s = userdata;
26687bf8
OS
1464
1465 assert(s);
1466
f9a810be 1467 server_sync(s);
26687bf8
OS
1468 return 0;
1469}
1470
d07f7b9e 1471int server_schedule_sync(Server *s, int priority) {
26687bf8
OS
1472 int r;
1473
26687bf8
OS
1474 assert(s);
1475
d07f7b9e
LP
1476 if (priority <= LOG_CRIT) {
1477 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1478 server_sync(s);
1479 return 0;
1480 }
1481
26687bf8
OS
1482 if (s->sync_scheduled)
1483 return 0;
1484
f9a810be
LP
1485 if (s->sync_interval_usec > 0) {
1486 usec_t when;
ca267016 1487
6a0f1f6d 1488 r = sd_event_now(s->event, CLOCK_MONOTONIC, &when);
f9a810be
LP
1489 if (r < 0)
1490 return r;
26687bf8 1491
f9a810be
LP
1492 when += s->sync_interval_usec;
1493
1494 if (!s->sync_event_source) {
6a0f1f6d
LP
1495 r = sd_event_add_time(
1496 s->event,
1497 &s->sync_event_source,
1498 CLOCK_MONOTONIC,
1499 when, 0,
1500 server_dispatch_sync, s);
f9a810be
LP
1501 if (r < 0)
1502 return r;
1503
1504 r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
1505 } else {
1506 r = sd_event_source_set_time(s->sync_event_source, when);
1507 if (r < 0)
1508 return r;
1509
1510 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
1511 }
26687bf8 1512 if (r < 0)
f9a810be 1513 return r;
26687bf8 1514
f9a810be
LP
1515 s->sync_scheduled = true;
1516 }
26687bf8
OS
1517
1518 return 0;
1519}
1520
0c24bb23
LP
1521static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1522 Server *s = userdata;
1523
1524 assert(s);
1525
1526 server_cache_hostname(s);
1527 return 0;
1528}
1529
1530static int server_open_hostname(Server *s) {
1531 int r;
1532
1533 assert(s);
1534
1535 s->hostname_fd = open("/proc/sys/kernel/hostname", O_RDONLY|O_CLOEXEC|O_NDELAY|O_NOCTTY);
4a62c710
MS
1536 if (s->hostname_fd < 0)
1537 return log_error_errno(errno, "Failed to open /proc/sys/kernel/hostname: %m");
0c24bb23 1538
151b9b96 1539 r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s);
0c24bb23 1540 if (r < 0) {
28def94c
DR
1541 /* kernels prior to 3.2 don't support polling this file. Ignore
1542 * the failure. */
1543 if (r == -EPERM) {
e53fc357 1544 log_warning_errno(r, "Failed to register hostname fd in event loop, ignoring: %m");
03e334a1 1545 s->hostname_fd = safe_close(s->hostname_fd);
28def94c
DR
1546 return 0;
1547 }
1548
23bbb0de 1549 return log_error_errno(r, "Failed to register hostname fd in event loop: %m");
0c24bb23
LP
1550 }
1551
1552 r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
23bbb0de
MS
1553 if (r < 0)
1554 return log_error_errno(r, "Failed to adjust priority of host name event source: %m");
0c24bb23
LP
1555
1556 return 0;
1557}
1558
e22aa3d3
LP
1559static int dispatch_notify_event(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1560 Server *s = userdata;
1561 int r;
1562
1563 assert(s);
1564 assert(s->notify_event_source == es);
1565 assert(s->notify_fd == fd);
1566
e22aa3d3 1567 /* The $NOTIFY_SOCKET is writable again, now send exactly one
119e9655
LP
1568 * message on it. Either it's the wtachdog event, the initial
1569 * READY=1 event or an stdout stream event. If there's nothing
1570 * to write anymore, turn our event source off. The next time
1571 * there's something to send it will be turned on again. */
e22aa3d3
LP
1572
1573 if (!s->sent_notify_ready) {
1574 static const char p[] =
1575 "READY=1\n"
1576 "STATUS=Processing requests...";
1577 ssize_t l;
1578
1579 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1580 if (l < 0) {
1581 if (errno == EAGAIN)
1582 return 0;
1583
1584 return log_error_errno(errno, "Failed to send READY=1 notification message: %m");
1585 }
1586
1587 s->sent_notify_ready = true;
1588 log_debug("Sent READY=1 notification.");
1589
119e9655
LP
1590 } else if (s->send_watchdog) {
1591
1592 static const char p[] =
1593 "WATCHDOG=1";
1594
1595 ssize_t l;
1596
1597 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1598 if (l < 0) {
1599 if (errno == EAGAIN)
1600 return 0;
1601
1602 return log_error_errno(errno, "Failed to send WATCHDOG=1 notification message: %m");
1603 }
1604
1605 s->send_watchdog = false;
1606 log_debug("Sent WATCHDOG=1 notification.");
1607
e22aa3d3
LP
1608 } else if (s->stdout_streams_notify_queue)
1609 /* Dispatch one stream notification event */
1610 stdout_stream_send_notify(s->stdout_streams_notify_queue);
1611
1612 /* Leave us enabled if there's still more to to do. */
119e9655 1613 if (s->send_watchdog || s->stdout_streams_notify_queue)
e22aa3d3
LP
1614 return 0;
1615
1616 /* There was nothing to do anymore, let's turn ourselves off. */
1617 r = sd_event_source_set_enabled(es, SD_EVENT_OFF);
1618 if (r < 0)
1619 return log_error_errno(r, "Failed to turn off notify event source: %m");
1620
1621 return 0;
1622}
1623
119e9655
LP
1624static int dispatch_watchdog(sd_event_source *es, uint64_t usec, void *userdata) {
1625 Server *s = userdata;
1626 int r;
1627
1628 assert(s);
1629
1630 s->send_watchdog = true;
1631
1632 r = sd_event_source_set_enabled(s->notify_event_source, SD_EVENT_ON);
1633 if (r < 0)
1634 log_warning_errno(r, "Failed to turn on notify event source: %m");
1635
1636 r = sd_event_source_set_time(s->watchdog_event_source, usec + s->watchdog_usec / 2);
1637 if (r < 0)
1638 return log_error_errno(r, "Failed to restart watchdog event source: %m");
1639
1640 r = sd_event_source_set_enabled(s->watchdog_event_source, SD_EVENT_ON);
1641 if (r < 0)
1642 return log_error_errno(r, "Failed to enable watchdog event source: %m");
1643
1644 return 0;
1645}
1646
e22aa3d3
LP
1647static int server_connect_notify(Server *s) {
1648 union sockaddr_union sa = {
1649 .un.sun_family = AF_UNIX,
1650 };
1651 const char *e;
1652 int r;
1653
1654 assert(s);
1655 assert(s->notify_fd < 0);
1656 assert(!s->notify_event_source);
1657
1658 /*
1659 So here's the problem: we'd like to send notification
1660 messages to PID 1, but we cannot do that via sd_notify(),
1661 since that's synchronous, and we might end up blocking on
1662 it. Specifically: given that PID 1 might block on
1663 dbus-daemon during IPC, and dbus-daemon is logging to us,
1664 and might hence block on us, we might end up in a deadlock
1665 if we block on sending PID 1 notification messages -- by
1666 generating a full blocking circle. To avoid this, let's
1667 create a non-blocking socket, and connect it to the
1668 notification socket, and then wait for POLLOUT before we
1669 send anything. This should efficiently avoid any deadlocks,
1670 as we'll never block on PID 1, hence PID 1 can safely block
1671 on dbus-daemon which can safely block on us again.
1672
1673 Don't think that this issue is real? It is, see:
1674 https://github.com/systemd/systemd/issues/1505
1675 */
1676
1677 e = getenv("NOTIFY_SOCKET");
1678 if (!e)
1679 return 0;
1680
1681 if ((e[0] != '@' && e[0] != '/') || e[1] == 0) {
1682 log_error("NOTIFY_SOCKET set to an invalid value: %s", e);
1683 return -EINVAL;
1684 }
1685
1686 if (strlen(e) > sizeof(sa.un.sun_path)) {
1687 log_error("NOTIFY_SOCKET path too long: %s", e);
1688 return -EINVAL;
1689 }
1690
1691 s->notify_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
1692 if (s->notify_fd < 0)
1693 return log_error_errno(errno, "Failed to create notify socket: %m");
1694
1695 (void) fd_inc_sndbuf(s->notify_fd, NOTIFY_SNDBUF_SIZE);
1696
1697 strncpy(sa.un.sun_path, e, sizeof(sa.un.sun_path));
1698 if (sa.un.sun_path[0] == '@')
1699 sa.un.sun_path[0] = 0;
1700
1701 r = connect(s->notify_fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(e));
1702 if (r < 0)
1703 return log_error_errno(errno, "Failed to connect to notify socket: %m");
1704
1705 r = sd_event_add_io(s->event, &s->notify_event_source, s->notify_fd, EPOLLOUT, dispatch_notify_event, s);
1706 if (r < 0)
1707 return log_error_errno(r, "Failed to watch notification socket: %m");
1708
119e9655
LP
1709 if (sd_watchdog_enabled(false, &s->watchdog_usec) > 0) {
1710 s->send_watchdog = true;
1711
4de2402b 1712 r = sd_event_add_time(s->event, &s->watchdog_event_source, CLOCK_MONOTONIC, now(CLOCK_MONOTONIC) + s->watchdog_usec/2, s->watchdog_usec/4, dispatch_watchdog, s);
119e9655
LP
1713 if (r < 0)
1714 return log_error_errno(r, "Failed to add watchdog time event: %m");
1715 }
1716
e22aa3d3
LP
1717 /* This should fire pretty soon, which we'll use to send the
1718 * READY=1 event. */
1719
1720 return 0;
1721}
1722
d025f1e4 1723int server_init(Server *s) {
13790add 1724 _cleanup_fdset_free_ FDSet *fds = NULL;
d025f1e4 1725 int n, r, fd;
7d18d348 1726 bool no_sockets;
d025f1e4
ZJS
1727
1728 assert(s);
1729
1730 zero(*s);
e22aa3d3 1731 s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->audit_fd = s->hostname_fd = s->notify_fd = -1;
d025f1e4
ZJS
1732 s->compress = true;
1733 s->seal = true;
1734
119e9655
LP
1735 s->watchdog_usec = USEC_INFINITY;
1736
26687bf8
OS
1737 s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1738 s->sync_scheduled = false;
1739
d025f1e4
ZJS
1740 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1741 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1742
40b71e89 1743 s->forward_to_wall = true;
d025f1e4 1744
e150e820
MB
1745 s->max_file_usec = DEFAULT_MAX_FILE_USEC;
1746
d025f1e4
ZJS
1747 s->max_level_store = LOG_DEBUG;
1748 s->max_level_syslog = LOG_DEBUG;
1749 s->max_level_kmsg = LOG_NOTICE;
1750 s->max_level_console = LOG_INFO;
40b71e89 1751 s->max_level_wall = LOG_EMERG;
d025f1e4 1752
8580d1f7
LP
1753 journal_reset_metrics(&s->system_metrics);
1754 journal_reset_metrics(&s->runtime_metrics);
d025f1e4
ZJS
1755
1756 server_parse_config_file(s);
1757 server_parse_proc_cmdline(s);
8580d1f7 1758
d288f79f 1759 if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
b1389b0d
ZJS
1760 log_debug("Setting both rate limit interval and burst from "USEC_FMT",%u to 0,0",
1761 s->rate_limit_interval, s->rate_limit_burst);
d288f79f
ZJS
1762 s->rate_limit_interval = s->rate_limit_burst = 0;
1763 }
d025f1e4 1764
8580d1f7 1765 (void) mkdir_p("/run/systemd/journal", 0755);
d025f1e4 1766
43cf8388 1767 s->user_journals = ordered_hashmap_new(NULL);
d025f1e4
ZJS
1768 if (!s->user_journals)
1769 return log_oom();
1770
1771 s->mmap = mmap_cache_new();
1772 if (!s->mmap)
1773 return log_oom();
1774
b58c888f
VC
1775 s->deferred_closes = set_new(NULL);
1776 if (!s->deferred_closes)
1777 return log_oom();
1778
f9a810be 1779 r = sd_event_default(&s->event);
23bbb0de
MS
1780 if (r < 0)
1781 return log_error_errno(r, "Failed to create event loop: %m");
d025f1e4
ZJS
1782
1783 n = sd_listen_fds(true);
23bbb0de
MS
1784 if (n < 0)
1785 return log_error_errno(n, "Failed to read listening file descriptors from environment: %m");
d025f1e4
ZJS
1786
1787 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1788
1789 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1790
1791 if (s->native_fd >= 0) {
1792 log_error("Too many native sockets passed.");
1793 return -EINVAL;
1794 }
1795
1796 s->native_fd = fd;
1797
1798 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1799
1800 if (s->stdout_fd >= 0) {
1801 log_error("Too many stdout sockets passed.");
1802 return -EINVAL;
1803 }
1804
1805 s->stdout_fd = fd;
1806
03ee5c38
LP
1807 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0 ||
1808 sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/dev-log", 0) > 0) {
d025f1e4
ZJS
1809
1810 if (s->syslog_fd >= 0) {
1811 log_error("Too many /dev/log sockets passed.");
1812 return -EINVAL;
1813 }
1814
1815 s->syslog_fd = fd;
1816
875c2e22
LP
1817 } else if (sd_is_socket(fd, AF_NETLINK, SOCK_RAW, -1) > 0) {
1818
1819 if (s->audit_fd >= 0) {
1820 log_error("Too many audit sockets passed.");
1821 return -EINVAL;
1822 }
1823
1824 s->audit_fd = fd;
1825
4ec3cd73 1826 } else {
4ec3cd73 1827
13790add
LP
1828 if (!fds) {
1829 fds = fdset_new();
1830 if (!fds)
1831 return log_oom();
1832 }
4ec3cd73 1833
13790add
LP
1834 r = fdset_put(fds, fd);
1835 if (r < 0)
1836 return log_oom();
4ec3cd73 1837 }
d025f1e4
ZJS
1838 }
1839
15d91bff
ZJS
1840 /* Try to restore streams, but don't bother if this fails */
1841 (void) server_restore_streams(s, fds);
d025f1e4 1842
13790add
LP
1843 if (fdset_size(fds) > 0) {
1844 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds));
1845 fds = fdset_free(fds);
1846 }
1847
7d18d348
ZJS
1848 no_sockets = s->native_fd < 0 && s->stdout_fd < 0 && s->syslog_fd < 0 && s->audit_fd < 0;
1849
1850 /* always open stdout, syslog, native, and kmsg sockets */
37b7affe
ZJS
1851
1852 /* systemd-journald.socket: /run/systemd/journal/stdout */
15d91bff
ZJS
1853 r = server_open_stdout_socket(s);
1854 if (r < 0)
1855 return r;
1856
37b7affe 1857 /* systemd-journald-dev-log.socket: /run/systemd/journal/dev-log */
13790add 1858 r = server_open_syslog_socket(s);
d025f1e4
ZJS
1859 if (r < 0)
1860 return r;
1861
37b7affe 1862 /* systemd-journald.socket: /run/systemd/journal/socket */
13790add 1863 r = server_open_native_socket(s);
d025f1e4
ZJS
1864 if (r < 0)
1865 return r;
1866
37b7affe 1867 /* /dev/ksmg */
d025f1e4
ZJS
1868 r = server_open_dev_kmsg(s);
1869 if (r < 0)
1870 return r;
1871
7d18d348
ZJS
1872 /* Unless we got *some* sockets and not audit, open audit socket */
1873 if (s->audit_fd >= 0 || no_sockets) {
1874 r = server_open_audit(s);
1875 if (r < 0)
1876 return r;
1877 }
875c2e22 1878
d025f1e4
ZJS
1879 r = server_open_kernel_seqnum(s);
1880 if (r < 0)
1881 return r;
1882
0c24bb23
LP
1883 r = server_open_hostname(s);
1884 if (r < 0)
1885 return r;
1886
f9a810be 1887 r = setup_signals(s);
d025f1e4
ZJS
1888 if (r < 0)
1889 return r;
1890
1891 s->udev = udev_new();
1892 if (!s->udev)
1893 return -ENOMEM;
1894
f9a810be 1895 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
d025f1e4
ZJS
1896 if (!s->rate_limit)
1897 return -ENOMEM;
1898
e9174f29
LP
1899 r = cg_get_root_path(&s->cgroup_root);
1900 if (r < 0)
1901 return r;
1902
0c24bb23
LP
1903 server_cache_hostname(s);
1904 server_cache_boot_id(s);
1905 server_cache_machine_id(s);
1906
e22aa3d3
LP
1907 (void) server_connect_notify(s);
1908
804ae586 1909 return system_journal_open(s, false);
d025f1e4
ZJS
1910}
1911
1912void server_maybe_append_tags(Server *s) {
1913#ifdef HAVE_GCRYPT
1914 JournalFile *f;
1915 Iterator i;
1916 usec_t n;
1917
1918 n = now(CLOCK_REALTIME);
1919
1920 if (s->system_journal)
1921 journal_file_maybe_append_tag(s->system_journal, n);
1922
43cf8388 1923 ORDERED_HASHMAP_FOREACH(f, s->user_journals, i)
d025f1e4
ZJS
1924 journal_file_maybe_append_tag(f, n);
1925#endif
1926}
1927
1928void server_done(Server *s) {
1929 JournalFile *f;
1930 assert(s);
1931
b58c888f
VC
1932 if (s->deferred_closes) {
1933 journal_file_close_set(s->deferred_closes);
1934 set_free(s->deferred_closes);
1935 }
1936
d025f1e4
ZJS
1937 while (s->stdout_streams)
1938 stdout_stream_free(s->stdout_streams);
1939
1940 if (s->system_journal)
69a3a6fd 1941 (void) journal_file_close(s->system_journal);
d025f1e4
ZJS
1942
1943 if (s->runtime_journal)
69a3a6fd 1944 (void) journal_file_close(s->runtime_journal);
d025f1e4 1945
43cf8388 1946 while ((f = ordered_hashmap_steal_first(s->user_journals)))
69a3a6fd 1947 (void) journal_file_close(f);
d025f1e4 1948
43cf8388 1949 ordered_hashmap_free(s->user_journals);
d025f1e4 1950
f9a810be
LP
1951 sd_event_source_unref(s->syslog_event_source);
1952 sd_event_source_unref(s->native_event_source);
1953 sd_event_source_unref(s->stdout_event_source);
1954 sd_event_source_unref(s->dev_kmsg_event_source);
875c2e22 1955 sd_event_source_unref(s->audit_event_source);
f9a810be
LP
1956 sd_event_source_unref(s->sync_event_source);
1957 sd_event_source_unref(s->sigusr1_event_source);
1958 sd_event_source_unref(s->sigusr2_event_source);
1959 sd_event_source_unref(s->sigterm_event_source);
1960 sd_event_source_unref(s->sigint_event_source);
94b65516 1961 sd_event_source_unref(s->sigrtmin1_event_source);
0c24bb23 1962 sd_event_source_unref(s->hostname_event_source);
e22aa3d3 1963 sd_event_source_unref(s->notify_event_source);
119e9655 1964 sd_event_source_unref(s->watchdog_event_source);
f9a810be 1965 sd_event_unref(s->event);
d025f1e4 1966
03e334a1
LP
1967 safe_close(s->syslog_fd);
1968 safe_close(s->native_fd);
1969 safe_close(s->stdout_fd);
1970 safe_close(s->dev_kmsg_fd);
875c2e22 1971 safe_close(s->audit_fd);
03e334a1 1972 safe_close(s->hostname_fd);
e22aa3d3 1973 safe_close(s->notify_fd);
0c24bb23 1974
d025f1e4
ZJS
1975 if (s->rate_limit)
1976 journal_rate_limit_free(s->rate_limit);
1977
1978 if (s->kernel_seqnum)
1979 munmap(s->kernel_seqnum, sizeof(uint64_t));
1980
1981 free(s->buffer);
1982 free(s->tty_path);
e9174f29 1983 free(s->cgroup_root);
99d0966e 1984 free(s->hostname_field);
d025f1e4
ZJS
1985
1986 if (s->mmap)
1987 mmap_cache_unref(s->mmap);
1988
3e044c49 1989 udev_unref(s->udev);
d025f1e4 1990}
8580d1f7
LP
1991
1992static const char* const storage_table[_STORAGE_MAX] = {
1993 [STORAGE_AUTO] = "auto",
1994 [STORAGE_VOLATILE] = "volatile",
1995 [STORAGE_PERSISTENT] = "persistent",
1996 [STORAGE_NONE] = "none"
1997};
1998
1999DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
2000DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
2001
2002static const char* const split_mode_table[_SPLIT_MAX] = {
2003 [SPLIT_LOGIN] = "login",
2004 [SPLIT_UID] = "uid",
2005 [SPLIT_NONE] = "none",
2006};
2007
2008DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
2009DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");