]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/journal/journald-server.c
core: use an AF_UNIX/SOCK_DGRAM socket for cgroup agent notification
[thirdparty/systemd.git] / src / journal / journald-server.c
CommitLineData
d025f1e4
ZJS
1/***
2 This file is part of systemd.
3
4 Copyright 2011 Lennart Poettering
5
6 systemd is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
10
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public License
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
18***/
19
24882e06
LP
20#ifdef HAVE_SELINUX
21#include <selinux/selinux.h>
22#endif
8580d1f7
LP
23#include <sys/ioctl.h>
24#include <sys/mman.h>
25#include <sys/signalfd.h>
26#include <sys/statvfs.h>
07630cea 27#include <linux/sockios.h>
24882e06 28
b4bbcaa9 29#include "libudev.h"
8580d1f7 30#include "sd-daemon.h"
74df0fca
LP
31#include "sd-journal.h"
32#include "sd-messages.h"
8580d1f7
LP
33
34#include "acl-util.h"
b5efdb8a 35#include "alloc-util.h"
430f0182 36#include "audit-util.h"
d025f1e4 37#include "cgroup-util.h"
d025f1e4 38#include "conf-parser.h"
a0956174 39#include "dirent-util.h"
0dec689b 40#include "extract-word.h"
3ffd4af2 41#include "fd-util.h"
33d52ab9 42#include "fileio.h"
958b66ea 43#include "formats-util.h"
f4f15635 44#include "fs-util.h"
8580d1f7 45#include "hashmap.h"
958b66ea 46#include "hostname-util.h"
afc5dbf3 47#include "io-util.h"
8580d1f7
LP
48#include "journal-authenticate.h"
49#include "journal-file.h"
d025f1e4
ZJS
50#include "journal-internal.h"
51#include "journal-vacuum.h"
8580d1f7 52#include "journald-audit.h"
d025f1e4 53#include "journald-kmsg.h"
d025f1e4 54#include "journald-native.h"
8580d1f7 55#include "journald-rate-limit.h"
3ffd4af2 56#include "journald-server.h"
8580d1f7
LP
57#include "journald-stream.h"
58#include "journald-syslog.h"
07630cea
LP
59#include "missing.h"
60#include "mkdir.h"
6bedfcbb 61#include "parse-util.h"
4e731273 62#include "proc-cmdline.h"
07630cea
LP
63#include "process-util.h"
64#include "rm-rf.h"
65#include "selinux-util.h"
66#include "signal-util.h"
67#include "socket-util.h"
32917e33 68#include "stdio-util.h"
8b43440b 69#include "string-table.h"
07630cea 70#include "string-util.h"
4a0b58c4 71#include "user-util.h"
8a03c9ef 72#include "log.h"
d025f1e4 73
d025f1e4
ZJS
74#define USER_JOURNALS_MAX 1024
75
26687bf8 76#define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
7f1ad696
LP
77#define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
78#define DEFAULT_RATE_LIMIT_BURST 1000
e150e820 79#define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
d025f1e4 80
8580d1f7 81#define RECHECK_SPACE_USEC (30*USEC_PER_SEC)
d025f1e4 82
e22aa3d3
LP
83#define NOTIFY_SNDBUF_SIZE (8*1024*1024)
84
7a24f3bf
VC
85/* The period to insert between posting changes for coalescing */
86#define POST_CHANGE_TIMER_INTERVAL_USEC (250*USEC_PER_MSEC)
87
8580d1f7
LP
88static int determine_space_for(
89 Server *s,
90 JournalMetrics *metrics,
91 const char *path,
92 const char *name,
93 bool verbose,
94 bool patch_min_use,
95 uint64_t *available,
96 uint64_t *limit) {
97
98 uint64_t sum = 0, ss_avail, avail;
7fd1b19b 99 _cleanup_closedir_ DIR *d = NULL;
8580d1f7
LP
100 struct dirent *de;
101 struct statvfs ss;
102 const char *p;
d025f1e4 103 usec_t ts;
d025f1e4 104
8580d1f7
LP
105 assert(s);
106 assert(metrics);
107 assert(path);
108 assert(name);
d025f1e4 109
8580d1f7 110 ts = now(CLOCK_MONOTONIC);
d025f1e4 111
8580d1f7 112 if (!verbose && s->cached_space_timestamp + RECHECK_SPACE_USEC > ts) {
d025f1e4 113
8580d1f7
LP
114 if (available)
115 *available = s->cached_space_available;
116 if (limit)
117 *limit = s->cached_space_limit;
d025f1e4 118
d025f1e4 119 return 0;
8580d1f7 120 }
d025f1e4 121
8580d1f7 122 p = strjoina(path, SERVER_MACHINE_ID(s));
d025f1e4 123 d = opendir(p);
d025f1e4 124 if (!d)
8580d1f7 125 return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR, errno, "Failed to open %s: %m", p);
d025f1e4
ZJS
126
127 if (fstatvfs(dirfd(d), &ss) < 0)
8580d1f7 128 return log_error_errno(errno, "Failed to fstatvfs(%s): %m", p);
d025f1e4 129
8580d1f7 130 FOREACH_DIRENT_ALL(de, d, break) {
d025f1e4 131 struct stat st;
d025f1e4
ZJS
132
133 if (!endswith(de->d_name, ".journal") &&
134 !endswith(de->d_name, ".journal~"))
135 continue;
136
8580d1f7
LP
137 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) {
138 log_debug_errno(errno, "Failed to stat %s/%s, ignoring: %m", p, de->d_name);
d025f1e4 139 continue;
8580d1f7 140 }
d025f1e4
ZJS
141
142 if (!S_ISREG(st.st_mode))
143 continue;
144
145 sum += (uint64_t) st.st_blocks * 512UL;
146 }
147
8a03c9ef 148 /* If requested, then let's bump the min_use limit to the
8580d1f7
LP
149 * current usage on disk. We do this when starting up and
150 * first opening the journal files. This way sudden spikes in
151 * disk usage will not cause journald to vacuum files without
152 * bounds. Note that this means that only a restart of
153 * journald will make it reset this value. */
d025f1e4 154
8580d1f7
LP
155 if (patch_min_use)
156 metrics->min_use = MAX(metrics->min_use, sum);
348ced90 157
8580d1f7
LP
158 ss_avail = ss.f_bsize * ss.f_bavail;
159 avail = LESS_BY(ss_avail, metrics->keep_free);
348ced90 160
8580d1f7
LP
161 s->cached_space_limit = MIN(MAX(sum + avail, metrics->min_use), metrics->max_use);
162 s->cached_space_available = LESS_BY(s->cached_space_limit, sum);
163 s->cached_space_timestamp = ts;
d025f1e4 164
670b110c
ZJS
165 if (verbose) {
166 char fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
8580d1f7 167 fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX], fb6[FORMAT_BYTES_MAX];
282c5c4e
ZJS
168 format_bytes(fb1, sizeof(fb1), sum);
169 format_bytes(fb2, sizeof(fb2), metrics->max_use);
170 format_bytes(fb3, sizeof(fb3), metrics->keep_free);
171 format_bytes(fb4, sizeof(fb4), ss_avail);
172 format_bytes(fb5, sizeof(fb5), s->cached_space_limit);
173 format_bytes(fb6, sizeof(fb6), s->cached_space_available);
670b110c
ZJS
174
175 server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
282c5c4e
ZJS
176 LOG_MESSAGE("%s (%s) is %s, max %s, %s free.",
177 name, path, fb1, fb5, fb6),
178 "JOURNAL_NAME=%s", name,
179 "JOURNAL_PATH=%s", path,
180 "CURRENT_USE=%"PRIu64, sum,
181 "CURRENT_USE_PRETTY=%s", fb1,
182 "MAX_USE=%"PRIu64, metrics->max_use,
183 "MAX_USE_PRETTY=%s", fb2,
184 "DISK_KEEP_FREE=%"PRIu64, metrics->keep_free,
185 "DISK_KEEP_FREE_PRETTY=%s", fb3,
186 "DISK_AVAILABLE=%"PRIu64, ss_avail,
187 "DISK_AVAILABLE_PRETTY=%s", fb4,
188 "LIMIT=%"PRIu64, s->cached_space_limit,
189 "LIMIT_PRETTY=%s", fb5,
190 "AVAILABLE=%"PRIu64, s->cached_space_available,
191 "AVAILABLE_PRETTY=%s", fb6,
8a03c9ef 192 NULL);
8580d1f7
LP
193 }
194
195 if (available)
196 *available = s->cached_space_available;
197 if (limit)
198 *limit = s->cached_space_limit;
199
200 return 1;
201}
202
203static int determine_space(Server *s, bool verbose, bool patch_min_use, uint64_t *available, uint64_t *limit) {
204 JournalMetrics *metrics;
205 const char *path, *name;
206
207 assert(s);
208
209 if (s->system_journal) {
210 path = "/var/log/journal/";
211 metrics = &s->system_metrics;
212 name = "System journal";
213 } else {
214 path = "/run/log/journal/";
215 metrics = &s->runtime_metrics;
216 name = "Runtime journal";
670b110c
ZJS
217 }
218
8580d1f7 219 return determine_space_for(s, metrics, path, name, verbose, patch_min_use, available, limit);
d025f1e4
ZJS
220}
221
5c3bde3f 222static void server_add_acls(JournalFile *f, uid_t uid) {
d025f1e4 223#ifdef HAVE_ACL
5c3bde3f 224 int r;
d025f1e4 225#endif
d025f1e4
ZJS
226 assert(f);
227
d025f1e4 228#ifdef HAVE_ACL
34c10968 229 if (uid <= SYSTEM_UID_MAX)
d025f1e4
ZJS
230 return;
231
5c3bde3f
ZJS
232 r = add_acls_for_user(f->fd, uid);
233 if (r < 0)
234 log_warning_errno(r, "Failed to set ACL on %s, ignoring: %m", f->path);
d025f1e4
ZJS
235#endif
236}
237
7a24f3bf
VC
238static int open_journal(
239 Server *s,
240 bool reliably,
241 const char *fname,
242 int flags,
243 bool seal,
244 JournalMetrics *metrics,
7a24f3bf
VC
245 JournalFile **ret) {
246 int r;
e167d7fd 247 JournalFile *f;
7a24f3bf
VC
248
249 assert(s);
250 assert(fname);
251 assert(ret);
252
253 if (reliably)
b58c888f 254 r = journal_file_open_reliably(fname, flags, 0640, s->compress, seal, metrics, s->mmap, s->deferred_closes, NULL, &f);
7a24f3bf 255 else
5d1ce257 256 r = journal_file_open(-1, fname, flags, 0640, s->compress, seal, metrics, s->mmap, s->deferred_closes, NULL, &f);
7a24f3bf
VC
257 if (r < 0)
258 return r;
259
e167d7fd 260 r = journal_file_enable_post_change_timer(f, s->event, POST_CHANGE_TIMER_INTERVAL_USEC);
7a24f3bf 261 if (r < 0) {
69a3a6fd 262 (void) journal_file_close(f);
7a24f3bf
VC
263 return r;
264 }
265
e167d7fd 266 *ret = f;
7a24f3bf
VC
267 return r;
268}
269
d025f1e4 270static JournalFile* find_journal(Server *s, uid_t uid) {
ed375beb 271 _cleanup_free_ char *p = NULL;
d025f1e4
ZJS
272 int r;
273 JournalFile *f;
274 sd_id128_t machine;
275
276 assert(s);
277
278 /* We split up user logs only on /var, not on /run. If the
279 * runtime file is open, we write to it exclusively, in order
280 * to guarantee proper order as soon as we flush /run to
281 * /var and close the runtime file. */
282
283 if (s->runtime_journal)
284 return s->runtime_journal;
285
f7dc3ab9 286 if (uid <= SYSTEM_UID_MAX)
d025f1e4
ZJS
287 return s->system_journal;
288
289 r = sd_id128_get_machine(&machine);
290 if (r < 0)
291 return s->system_journal;
292
4a0b58c4 293 f = ordered_hashmap_get(s->user_journals, UID_TO_PTR(uid));
d025f1e4
ZJS
294 if (f)
295 return f;
296
de0671ee
ZJS
297 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-"UID_FMT".journal",
298 SD_ID128_FORMAT_VAL(machine), uid) < 0)
d025f1e4
ZJS
299 return s->system_journal;
300
43cf8388 301 while (ordered_hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
d025f1e4 302 /* Too many open? Then let's close one */
43cf8388 303 f = ordered_hashmap_steal_first(s->user_journals);
d025f1e4 304 assert(f);
69a3a6fd 305 (void) journal_file_close(f);
d025f1e4
ZJS
306 }
307
089ed40b 308 r = open_journal(s, true, p, O_RDWR|O_CREAT, s->seal, &s->system_metrics, &f);
d025f1e4
ZJS
309 if (r < 0)
310 return s->system_journal;
311
5c3bde3f 312 server_add_acls(f, uid);
d025f1e4 313
4a0b58c4 314 r = ordered_hashmap_put(s->user_journals, UID_TO_PTR(uid), f);
d025f1e4 315 if (r < 0) {
69a3a6fd 316 (void) journal_file_close(f);
d025f1e4
ZJS
317 return s->system_journal;
318 }
319
320 return f;
321}
322
ea69bd41
LP
323static int do_rotate(
324 Server *s,
325 JournalFile **f,
326 const char* name,
327 bool seal,
328 uint32_t uid) {
329
fc55baee
ZJS
330 int r;
331 assert(s);
332
333 if (!*f)
334 return -EINVAL;
335
b58c888f 336 r = journal_file_rotate(f, s->compress, seal, s->deferred_closes);
fc55baee
ZJS
337 if (r < 0)
338 if (*f)
ea69bd41 339 log_error_errno(r, "Failed to rotate %s: %m", (*f)->path);
fc55baee 340 else
ea69bd41 341 log_error_errno(r, "Failed to create new %s journal: %m", name);
fc55baee 342 else
5c3bde3f 343 server_add_acls(*f, uid);
2678031a 344
fc55baee
ZJS
345 return r;
346}
347
d025f1e4
ZJS
348void server_rotate(Server *s) {
349 JournalFile *f;
350 void *k;
351 Iterator i;
352 int r;
353
354 log_debug("Rotating...");
355
8580d1f7
LP
356 (void) do_rotate(s, &s->runtime_journal, "runtime", false, 0);
357 (void) do_rotate(s, &s->system_journal, "system", s->seal, 0);
d025f1e4 358
43cf8388 359 ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
4a0b58c4 360 r = do_rotate(s, &f, "user", s->seal, PTR_TO_UID(k));
fc55baee 361 if (r >= 0)
43cf8388 362 ordered_hashmap_replace(s->user_journals, k, f);
fc55baee
ZJS
363 else if (!f)
364 /* Old file has been closed and deallocated */
43cf8388 365 ordered_hashmap_remove(s->user_journals, k);
d025f1e4 366 }
b58c888f
VC
367
368 /* Perform any deferred closes which aren't still offlining. */
369 SET_FOREACH(f, s->deferred_closes, i)
370 if (!journal_file_is_offlining(f)) {
371 (void) set_remove(s->deferred_closes, f);
372 (void) journal_file_close(f);
373 }
d025f1e4
ZJS
374}
375
26687bf8
OS
376void server_sync(Server *s) {
377 JournalFile *f;
26687bf8
OS
378 Iterator i;
379 int r;
380
26687bf8 381 if (s->system_journal) {
ac2e41f5 382 r = journal_file_set_offline(s->system_journal, false);
26687bf8 383 if (r < 0)
65089b82 384 log_warning_errno(r, "Failed to sync system journal, ignoring: %m");
26687bf8
OS
385 }
386
65c1d46b 387 ORDERED_HASHMAP_FOREACH(f, s->user_journals, i) {
ac2e41f5 388 r = journal_file_set_offline(f, false);
26687bf8 389 if (r < 0)
65089b82 390 log_warning_errno(r, "Failed to sync user journal, ignoring: %m");
26687bf8
OS
391 }
392
f9a810be
LP
393 if (s->sync_event_source) {
394 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
395 if (r < 0)
da927ba9 396 log_error_errno(r, "Failed to disable sync timer source: %m");
f9a810be 397 }
26687bf8
OS
398
399 s->sync_scheduled = false;
400}
401
ea69bd41
LP
402static void do_vacuum(
403 Server *s,
ea69bd41 404 JournalFile *f,
8580d1f7
LP
405 JournalMetrics *metrics,
406 const char *path,
407 const char *name,
408 bool verbose,
409 bool patch_min_use) {
ea69bd41
LP
410
411 const char *p;
8580d1f7 412 uint64_t limit;
63c8666b
ZJS
413 int r;
414
8580d1f7
LP
415 assert(s);
416 assert(metrics);
417 assert(path);
418 assert(name);
419
63c8666b
ZJS
420 if (!f)
421 return;
422
8580d1f7
LP
423 p = strjoina(path, SERVER_MACHINE_ID(s));
424
425 limit = metrics->max_use;
426 (void) determine_space_for(s, metrics, path, name, verbose, patch_min_use, NULL, &limit);
427
428 r = journal_directory_vacuum(p, limit, metrics->n_max_files, s->max_retention_usec, &s->oldest_file_usec, verbose);
63c8666b 429 if (r < 0 && r != -ENOENT)
8580d1f7 430 log_warning_errno(r, "Failed to vacuum %s, ignoring: %m", p);
63c8666b
ZJS
431}
432
8580d1f7
LP
433int server_vacuum(Server *s, bool verbose, bool patch_min_use) {
434 assert(s);
d025f1e4
ZJS
435
436 log_debug("Vacuuming...");
437
438 s->oldest_file_usec = 0;
439
8580d1f7
LP
440 do_vacuum(s, s->system_journal, &s->system_metrics, "/var/log/journal/", "System journal", verbose, patch_min_use);
441 do_vacuum(s, s->runtime_journal, &s->runtime_metrics, "/run/log/journal/", "Runtime journal", verbose, patch_min_use);
d025f1e4 442
8580d1f7
LP
443 s->cached_space_limit = 0;
444 s->cached_space_available = 0;
445 s->cached_space_timestamp = 0;
d025f1e4 446
8580d1f7 447 return 0;
d025f1e4
ZJS
448}
449
0c24bb23
LP
450static void server_cache_machine_id(Server *s) {
451 sd_id128_t id;
452 int r;
453
454 assert(s);
455
456 r = sd_id128_get_machine(&id);
457 if (r < 0)
458 return;
459
460 sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
461}
462
463static void server_cache_boot_id(Server *s) {
464 sd_id128_t id;
465 int r;
466
467 assert(s);
468
469 r = sd_id128_get_boot(&id);
470 if (r < 0)
471 return;
472
473 sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
474}
475
476static void server_cache_hostname(Server *s) {
477 _cleanup_free_ char *t = NULL;
478 char *x;
479
480 assert(s);
481
482 t = gethostname_malloc();
483 if (!t)
484 return;
485
486 x = strappend("_HOSTNAME=", t);
487 if (!x)
488 return;
489
490 free(s->hostname_field);
491 s->hostname_field = x;
492}
493
8531ae70 494static bool shall_try_append_again(JournalFile *f, int r) {
6e1045e5
ZJS
495 switch(r) {
496 case -E2BIG: /* Hit configured limit */
497 case -EFBIG: /* Hit fs limit */
498 case -EDQUOT: /* Quota limit hit */
499 case -ENOSPC: /* Disk full */
d025f1e4 500 log_debug("%s: Allocation limit reached, rotating.", f->path);
6e1045e5
ZJS
501 return true;
502 case -EIO: /* I/O error of some kind (mmap) */
503 log_warning("%s: IO error, rotating.", f->path);
504 return true;
505 case -EHOSTDOWN: /* Other machine */
d025f1e4 506 log_info("%s: Journal file from other machine, rotating.", f->path);
6e1045e5
ZJS
507 return true;
508 case -EBUSY: /* Unclean shutdown */
d025f1e4 509 log_info("%s: Unclean shutdown, rotating.", f->path);
6e1045e5
ZJS
510 return true;
511 case -EPROTONOSUPPORT: /* Unsupported feature */
d025f1e4 512 log_info("%s: Unsupported feature, rotating.", f->path);
6e1045e5
ZJS
513 return true;
514 case -EBADMSG: /* Corrupted */
515 case -ENODATA: /* Truncated */
516 case -ESHUTDOWN: /* Already archived */
d025f1e4 517 log_warning("%s: Journal file corrupted, rotating.", f->path);
6e1045e5
ZJS
518 return true;
519 case -EIDRM: /* Journal file has been deleted */
2678031a 520 log_warning("%s: Journal file has been deleted, rotating.", f->path);
6e1045e5
ZJS
521 return true;
522 default:
d025f1e4 523 return false;
6e1045e5 524 }
d025f1e4
ZJS
525}
526
d07f7b9e 527static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
d025f1e4
ZJS
528 JournalFile *f;
529 bool vacuumed = false;
530 int r;
531
532 assert(s);
533 assert(iovec);
534 assert(n > 0);
535
536 f = find_journal(s, uid);
537 if (!f)
538 return;
539
540 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
541 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
542 server_rotate(s);
8580d1f7 543 server_vacuum(s, false, false);
d025f1e4
ZJS
544 vacuumed = true;
545
546 f = find_journal(s, uid);
547 if (!f)
548 return;
549 }
550
551 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
26687bf8 552 if (r >= 0) {
d07f7b9e 553 server_schedule_sync(s, priority);
d025f1e4 554 return;
26687bf8 555 }
d025f1e4
ZJS
556
557 if (vacuumed || !shall_try_append_again(f, r)) {
8266e1c0 558 log_error_errno(r, "Failed to write entry (%d items, %zu bytes), ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
d025f1e4
ZJS
559 return;
560 }
561
562 server_rotate(s);
8580d1f7 563 server_vacuum(s, false, false);
d025f1e4
ZJS
564
565 f = find_journal(s, uid);
566 if (!f)
567 return;
568
569 log_debug("Retrying write.");
570 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
8266e1c0
LP
571 if (r < 0)
572 log_error_errno(r, "Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
573 else
d07f7b9e 574 server_schedule_sync(s, priority);
d025f1e4
ZJS
575}
576
577static void dispatch_message_real(
578 Server *s,
579 struct iovec *iovec, unsigned n, unsigned m,
3b3154df
LP
580 const struct ucred *ucred,
581 const struct timeval *tv,
d025f1e4 582 const char *label, size_t label_len,
968f3196 583 const char *unit_id,
d07f7b9e 584 int priority,
968f3196 585 pid_t object_pid) {
d025f1e4 586
968f3196 587 char pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
ae018d9b
LP
588 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
589 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
590 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
d3789917 591 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
968f3196
ZJS
592 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
593 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
594 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
595 uid_t object_uid;
596 gid_t object_gid;
968f3196 597 char *x;
d025f1e4 598 int r;
ae018d9b 599 char *t, *c;
82499507
LP
600 uid_t realuid = 0, owner = 0, journal_uid;
601 bool owner_valid = false;
ae018d9b 602#ifdef HAVE_AUDIT
968f3196
ZJS
603 char audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
604 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
605 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
606 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
ae018d9b
LP
607
608 uint32_t audit;
609 uid_t loginuid;
610#endif
d025f1e4
ZJS
611
612 assert(s);
613 assert(iovec);
614 assert(n > 0);
968f3196 615 assert(n + N_IOVEC_META_FIELDS + (object_pid ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
d025f1e4
ZJS
616
617 if (ucred) {
d025f1e4
ZJS
618 realuid = ucred->uid;
619
de0671ee 620 sprintf(pid, "_PID="PID_FMT, ucred->pid);
c2457105 621 IOVEC_SET_STRING(iovec[n++], pid);
d025f1e4 622
de0671ee 623 sprintf(uid, "_UID="UID_FMT, ucred->uid);
c2457105 624 IOVEC_SET_STRING(iovec[n++], uid);
d025f1e4 625
de0671ee 626 sprintf(gid, "_GID="GID_FMT, ucred->gid);
c2457105 627 IOVEC_SET_STRING(iovec[n++], gid);
d025f1e4
ZJS
628
629 r = get_process_comm(ucred->pid, &t);
630 if (r >= 0) {
63c372cb 631 x = strjoina("_COMM=", t);
d025f1e4 632 free(t);
968f3196 633 IOVEC_SET_STRING(iovec[n++], x);
d025f1e4
ZJS
634 }
635
636 r = get_process_exe(ucred->pid, &t);
637 if (r >= 0) {
63c372cb 638 x = strjoina("_EXE=", t);
d025f1e4 639 free(t);
968f3196 640 IOVEC_SET_STRING(iovec[n++], x);
d025f1e4
ZJS
641 }
642
9bdbc2e2 643 r = get_process_cmdline(ucred->pid, 0, false, &t);
d025f1e4 644 if (r >= 0) {
63c372cb 645 x = strjoina("_CMDLINE=", t);
d025f1e4 646 free(t);
3a832116
SL
647 IOVEC_SET_STRING(iovec[n++], x);
648 }
649
650 r = get_process_capeff(ucred->pid, &t);
651 if (r >= 0) {
63c372cb 652 x = strjoina("_CAP_EFFECTIVE=", t);
3a832116 653 free(t);
968f3196 654 IOVEC_SET_STRING(iovec[n++], x);
d025f1e4
ZJS
655 }
656
0a20e3c1 657#ifdef HAVE_AUDIT
d025f1e4 658 r = audit_session_from_pid(ucred->pid, &audit);
ae018d9b 659 if (r >= 0) {
de0671ee 660 sprintf(audit_session, "_AUDIT_SESSION=%"PRIu32, audit);
ae018d9b
LP
661 IOVEC_SET_STRING(iovec[n++], audit_session);
662 }
d025f1e4
ZJS
663
664 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
7027ff61 665 if (r >= 0) {
de0671ee 666 sprintf(audit_loginuid, "_AUDIT_LOGINUID="UID_FMT, loginuid);
ae018d9b 667 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
d025f1e4 668 }
ae018d9b 669#endif
d025f1e4 670
e9174f29 671 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &c);
7027ff61 672 if (r >= 0) {
968f3196
ZJS
673 char *session = NULL;
674
63c372cb 675 x = strjoina("_SYSTEMD_CGROUP=", c);
968f3196 676 IOVEC_SET_STRING(iovec[n++], x);
d025f1e4 677
ae018d9b
LP
678 r = cg_path_get_session(c, &t);
679 if (r >= 0) {
63c372cb 680 session = strjoina("_SYSTEMD_SESSION=", t);
ae018d9b 681 free(t);
d025f1e4 682 IOVEC_SET_STRING(iovec[n++], session);
ae018d9b
LP
683 }
684
685 if (cg_path_get_owner_uid(c, &owner) >= 0) {
686 owner_valid = true;
d025f1e4 687
de0671ee 688 sprintf(owner_uid, "_SYSTEMD_OWNER_UID="UID_FMT, owner);
d025f1e4 689 IOVEC_SET_STRING(iovec[n++], owner_uid);
ae018d9b 690 }
d025f1e4 691
ae018d9b 692 if (cg_path_get_unit(c, &t) >= 0) {
63c372cb 693 x = strjoina("_SYSTEMD_UNIT=", t);
ae018d9b 694 free(t);
19cace37
LP
695 IOVEC_SET_STRING(iovec[n++], x);
696 } else if (unit_id && !session) {
63c372cb 697 x = strjoina("_SYSTEMD_UNIT=", unit_id);
19cace37
LP
698 IOVEC_SET_STRING(iovec[n++], x);
699 }
700
701 if (cg_path_get_user_unit(c, &t) >= 0) {
63c372cb 702 x = strjoina("_SYSTEMD_USER_UNIT=", t);
ae018d9b 703 free(t);
968f3196 704 IOVEC_SET_STRING(iovec[n++], x);
19cace37 705 } else if (unit_id && session) {
63c372cb 706 x = strjoina("_SYSTEMD_USER_UNIT=", unit_id);
19cace37
LP
707 IOVEC_SET_STRING(iovec[n++], x);
708 }
ae018d9b 709
0a244b8e 710 if (cg_path_get_slice(c, &t) >= 0) {
63c372cb 711 x = strjoina("_SYSTEMD_SLICE=", t);
0a244b8e
LP
712 free(t);
713 IOVEC_SET_STRING(iovec[n++], x);
714 }
715
ae018d9b 716 free(c);
2d43b190 717 } else if (unit_id) {
63c372cb 718 x = strjoina("_SYSTEMD_UNIT=", unit_id);
2d43b190 719 IOVEC_SET_STRING(iovec[n++], x);
ef1673d1 720 }
d025f1e4 721
d025f1e4 722#ifdef HAVE_SELINUX
6355e756 723 if (mac_selinux_have()) {
d682b3a7 724 if (label) {
f8294e41 725 x = alloca(strlen("_SELINUX_CONTEXT=") + label_len + 1);
ae018d9b 726
d682b3a7
LP
727 *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
728 IOVEC_SET_STRING(iovec[n++], x);
729 } else {
730 security_context_t con;
d025f1e4 731
d682b3a7 732 if (getpidcon(ucred->pid, &con) >= 0) {
63c372cb 733 x = strjoina("_SELINUX_CONTEXT=", con);
e7ff4e7f 734
d682b3a7
LP
735 freecon(con);
736 IOVEC_SET_STRING(iovec[n++], x);
737 }
d025f1e4
ZJS
738 }
739 }
740#endif
741 }
968f3196
ZJS
742 assert(n <= m);
743
744 if (object_pid) {
745 r = get_process_uid(object_pid, &object_uid);
746 if (r >= 0) {
de0671ee 747 sprintf(o_uid, "OBJECT_UID="UID_FMT, object_uid);
968f3196
ZJS
748 IOVEC_SET_STRING(iovec[n++], o_uid);
749 }
750
751 r = get_process_gid(object_pid, &object_gid);
752 if (r >= 0) {
de0671ee 753 sprintf(o_gid, "OBJECT_GID="GID_FMT, object_gid);
968f3196
ZJS
754 IOVEC_SET_STRING(iovec[n++], o_gid);
755 }
756
757 r = get_process_comm(object_pid, &t);
758 if (r >= 0) {
63c372cb 759 x = strjoina("OBJECT_COMM=", t);
968f3196
ZJS
760 free(t);
761 IOVEC_SET_STRING(iovec[n++], x);
762 }
763
764 r = get_process_exe(object_pid, &t);
765 if (r >= 0) {
63c372cb 766 x = strjoina("OBJECT_EXE=", t);
968f3196
ZJS
767 free(t);
768 IOVEC_SET_STRING(iovec[n++], x);
769 }
770
771 r = get_process_cmdline(object_pid, 0, false, &t);
772 if (r >= 0) {
63c372cb 773 x = strjoina("OBJECT_CMDLINE=", t);
968f3196
ZJS
774 free(t);
775 IOVEC_SET_STRING(iovec[n++], x);
776 }
777
778#ifdef HAVE_AUDIT
779 r = audit_session_from_pid(object_pid, &audit);
780 if (r >= 0) {
de0671ee 781 sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%"PRIu32, audit);
968f3196
ZJS
782 IOVEC_SET_STRING(iovec[n++], o_audit_session);
783 }
784
785 r = audit_loginuid_from_pid(object_pid, &loginuid);
786 if (r >= 0) {
de0671ee 787 sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID="UID_FMT, loginuid);
968f3196
ZJS
788 IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
789 }
790#endif
791
e9174f29 792 r = cg_pid_get_path_shifted(object_pid, s->cgroup_root, &c);
968f3196 793 if (r >= 0) {
63c372cb 794 x = strjoina("OBJECT_SYSTEMD_CGROUP=", c);
968f3196
ZJS
795 IOVEC_SET_STRING(iovec[n++], x);
796
797 r = cg_path_get_session(c, &t);
798 if (r >= 0) {
63c372cb 799 x = strjoina("OBJECT_SYSTEMD_SESSION=", t);
968f3196
ZJS
800 free(t);
801 IOVEC_SET_STRING(iovec[n++], x);
802 }
803
804 if (cg_path_get_owner_uid(c, &owner) >= 0) {
de0671ee 805 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID="UID_FMT, owner);
968f3196
ZJS
806 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
807 }
808
809 if (cg_path_get_unit(c, &t) >= 0) {
63c372cb 810 x = strjoina("OBJECT_SYSTEMD_UNIT=", t);
968f3196 811 free(t);
19cace37
LP
812 IOVEC_SET_STRING(iovec[n++], x);
813 }
814
815 if (cg_path_get_user_unit(c, &t) >= 0) {
63c372cb 816 x = strjoina("OBJECT_SYSTEMD_USER_UNIT=", t);
968f3196 817 free(t);
968f3196 818 IOVEC_SET_STRING(iovec[n++], x);
19cace37 819 }
968f3196
ZJS
820
821 free(c);
822 }
823 }
824 assert(n <= m);
d025f1e4
ZJS
825
826 if (tv) {
ae018d9b 827 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv));
a5693989 828 IOVEC_SET_STRING(iovec[n++], source_time);
d025f1e4
ZJS
829 }
830
831 /* Note that strictly speaking storing the boot id here is
832 * redundant since the entry includes this in-line
833 * anyway. However, we need this indexed, too. */
0c24bb23
LP
834 if (!isempty(s->boot_id_field))
835 IOVEC_SET_STRING(iovec[n++], s->boot_id_field);
d025f1e4 836
0c24bb23
LP
837 if (!isempty(s->machine_id_field))
838 IOVEC_SET_STRING(iovec[n++], s->machine_id_field);
d025f1e4 839
0c24bb23
LP
840 if (!isempty(s->hostname_field))
841 IOVEC_SET_STRING(iovec[n++], s->hostname_field);
d025f1e4
ZJS
842
843 assert(n <= m);
844
da499392 845 if (s->split_mode == SPLIT_UID && realuid > 0)
40adcda8 846 /* Split up strictly by any UID */
759c945a 847 journal_uid = realuid;
82499507 848 else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
edc3797f
LP
849 /* Split up by login UIDs. We do this only if the
850 * realuid is not root, in order not to accidentally
851 * leak privileged information to the user that is
852 * logged by a privileged process that is part of an
7517e174 853 * unprivileged session. */
8a0889df 854 journal_uid = owner;
da499392
KS
855 else
856 journal_uid = 0;
759c945a 857
d07f7b9e 858 write_to_journal(s, journal_uid, iovec, n, priority);
d025f1e4
ZJS
859}
860
861void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
862 char mid[11 + 32 + 1];
8a03c9ef
ZJS
863 struct iovec iovec[N_IOVEC_META_FIELDS + 5 + N_IOVEC_PAYLOAD_FIELDS];
864 unsigned n = 0, m;
32917e33 865 int r;
d025f1e4 866 va_list ap;
b92bea5d 867 struct ucred ucred = {};
d025f1e4
ZJS
868
869 assert(s);
870 assert(format);
871
4850d39a 872 assert_cc(3 == LOG_FAC(LOG_DAEMON));
b6fa2555
EV
873 IOVEC_SET_STRING(iovec[n++], "SYSLOG_FACILITY=3");
874 IOVEC_SET_STRING(iovec[n++], "SYSLOG_IDENTIFIER=systemd-journald");
875
d025f1e4 876 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
4850d39a 877 assert_cc(6 == LOG_INFO);
32917e33 878 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
d025f1e4
ZJS
879
880 if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
e2cc6eca 881 snprintf(mid, sizeof(mid), LOG_MESSAGE_ID(message_id));
d025f1e4
ZJS
882 IOVEC_SET_STRING(iovec[n++], mid);
883 }
884
8a03c9ef
ZJS
885 m = n;
886
887 va_start(ap, format);
32917e33
ZJS
888 r = log_format_iovec(iovec, ELEMENTSOF(iovec), &n, false, 0, format, ap);
889 /* Error handling below */
8a03c9ef
ZJS
890 va_end(ap);
891
d025f1e4
ZJS
892 ucred.pid = getpid();
893 ucred.uid = getuid();
894 ucred.gid = getgid();
895
32917e33
ZJS
896 if (r >= 0)
897 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
8a03c9ef
ZJS
898
899 while (m < n)
900 free(iovec[m++].iov_base);
32917e33
ZJS
901
902 if (r < 0) {
903 /* We failed to format the message. Emit a warning instead. */
904 char buf[LINE_MAX];
905
906 xsprintf(buf, "MESSAGE=Entry printing failed: %s", strerror(-r));
907
908 n = 3;
909 IOVEC_SET_STRING(iovec[n++], "PRIORITY=4");
910 IOVEC_SET_STRING(iovec[n++], buf);
911 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
912 }
d025f1e4
ZJS
913}
914
915void server_dispatch_message(
916 Server *s,
917 struct iovec *iovec, unsigned n, unsigned m,
3b3154df
LP
918 const struct ucred *ucred,
919 const struct timeval *tv,
d025f1e4
ZJS
920 const char *label, size_t label_len,
921 const char *unit_id,
968f3196
ZJS
922 int priority,
923 pid_t object_pid) {
d025f1e4 924
7027ff61 925 int rl, r;
7fd1b19b 926 _cleanup_free_ char *path = NULL;
8580d1f7 927 uint64_t available = 0;
db91ea32 928 char *c;
d025f1e4
ZJS
929
930 assert(s);
931 assert(iovec || n == 0);
932
933 if (n == 0)
934 return;
935
936 if (LOG_PRI(priority) > s->max_level_store)
937 return;
938
2f5df74a
HHPF
939 /* Stop early in case the information will not be stored
940 * in a journal. */
941 if (s->storage == STORAGE_NONE)
942 return;
943
d025f1e4
ZJS
944 if (!ucred)
945 goto finish;
946
e9174f29 947 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &path);
7027ff61 948 if (r < 0)
d025f1e4
ZJS
949 goto finish;
950
951 /* example: /user/lennart/3/foobar
952 * /system/dbus.service/foobar
953 *
954 * So let's cut of everything past the third /, since that is
955 * where user directories start */
956
957 c = strchr(path, '/');
958 if (c) {
959 c = strchr(c+1, '/');
960 if (c) {
961 c = strchr(c+1, '/');
962 if (c)
963 *c = 0;
964 }
965 }
966
8580d1f7
LP
967 (void) determine_space(s, false, false, &available, NULL);
968 rl = journal_rate_limit_test(s->rate_limit, path, priority & LOG_PRIMASK, available);
db91ea32 969 if (rl == 0)
d025f1e4 970 return;
d025f1e4
ZJS
971
972 /* Write a suppression message if we suppressed something */
973 if (rl > 1)
db91ea32 974 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
8a03c9ef
ZJS
975 LOG_MESSAGE("Suppressed %u messages from %s", rl - 1, path),
976 NULL);
d025f1e4
ZJS
977
978finish:
d07f7b9e 979 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, priority, object_pid);
d025f1e4
ZJS
980}
981
982
caa2f4c0 983static int system_journal_open(Server *s, bool flush_requested) {
84267e40 984 const char *fn;
09eba4d4 985 int r = 0;
d025f1e4
ZJS
986
987 if (!s->system_journal &&
988 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
caa2f4c0
ZJS
989 (flush_requested
990 || access("/run/systemd/journal/flushed", F_OK) >= 0)) {
d025f1e4
ZJS
991
992 /* If in auto mode: first try to create the machine
993 * path, but not the prefix.
994 *
995 * If in persistent mode: create /var/log/journal and
996 * the machine path */
997
998 if (s->storage == STORAGE_PERSISTENT)
ac892057 999 (void) mkdir_p("/var/log/journal/", 0755);
d025f1e4 1000
8580d1f7 1001 fn = strjoina("/var/log/journal/", SERVER_MACHINE_ID(s));
d025f1e4 1002 (void) mkdir(fn, 0755);
d025f1e4 1003
63c372cb 1004 fn = strjoina(fn, "/system.journal");
089ed40b 1005 r = open_journal(s, true, fn, O_RDWR|O_CREAT, s->seal, &s->system_metrics, &s->system_journal);
8580d1f7 1006 if (r >= 0) {
5c3bde3f 1007 server_add_acls(s->system_journal, 0);
8580d1f7
LP
1008 (void) determine_space_for(s, &s->system_metrics, "/var/log/journal/", "System journal", true, true, NULL, NULL);
1009 } else if (r < 0) {
433dd100 1010 if (r != -ENOENT && r != -EROFS)
da927ba9 1011 log_warning_errno(r, "Failed to open system journal: %m");
e40ec7ae 1012
433dd100
LN
1013 r = 0;
1014 }
d025f1e4
ZJS
1015 }
1016
1017 if (!s->runtime_journal &&
1018 (s->storage != STORAGE_NONE)) {
1019
8580d1f7 1020 fn = strjoina("/run/log/journal/", SERVER_MACHINE_ID(s), "/system.journal");
d025f1e4
ZJS
1021
1022 if (s->system_journal) {
1023
1024 /* Try to open the runtime journal, but only
1025 * if it already exists, so that we can flush
1026 * it into the system journal */
1027
089ed40b 1028 r = open_journal(s, false, fn, O_RDWR, false, &s->runtime_metrics, &s->runtime_journal);
d025f1e4
ZJS
1029 if (r < 0) {
1030 if (r != -ENOENT)
da927ba9 1031 log_warning_errno(r, "Failed to open runtime journal: %m");
d025f1e4
ZJS
1032
1033 r = 0;
1034 }
1035
1036 } else {
1037
1038 /* OK, we really need the runtime journal, so create
1039 * it if necessary. */
1040
fc1d70af
LP
1041 (void) mkdir("/run/log", 0755);
1042 (void) mkdir("/run/log/journal", 0755);
1043 (void) mkdir_parents(fn, 0750);
1044
089ed40b 1045 r = open_journal(s, true, fn, O_RDWR|O_CREAT, false, &s->runtime_metrics, &s->runtime_journal);
23bbb0de
MS
1046 if (r < 0)
1047 return log_error_errno(r, "Failed to open runtime journal: %m");
d025f1e4
ZJS
1048 }
1049
8580d1f7 1050 if (s->runtime_journal) {
5c3bde3f 1051 server_add_acls(s->runtime_journal, 0);
8580d1f7
LP
1052 (void) determine_space_for(s, &s->runtime_metrics, "/run/log/journal/", "Runtime journal", true, true, NULL, NULL);
1053 }
d025f1e4
ZJS
1054 }
1055
1056 return r;
1057}
1058
1059int server_flush_to_var(Server *s) {
d025f1e4
ZJS
1060 sd_id128_t machine;
1061 sd_journal *j = NULL;
fbb63411
LP
1062 char ts[FORMAT_TIMESPAN_MAX];
1063 usec_t start;
1064 unsigned n = 0;
1065 int r;
d025f1e4
ZJS
1066
1067 assert(s);
1068
1069 if (s->storage != STORAGE_AUTO &&
1070 s->storage != STORAGE_PERSISTENT)
1071 return 0;
1072
1073 if (!s->runtime_journal)
1074 return 0;
1075
8580d1f7 1076 (void) system_journal_open(s, true);
d025f1e4
ZJS
1077
1078 if (!s->system_journal)
1079 return 0;
1080
1081 log_debug("Flushing to /var...");
1082
fbb63411
LP
1083 start = now(CLOCK_MONOTONIC);
1084
d025f1e4 1085 r = sd_id128_get_machine(&machine);
00a16861 1086 if (r < 0)
d025f1e4 1087 return r;
d025f1e4
ZJS
1088
1089 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
23bbb0de
MS
1090 if (r < 0)
1091 return log_error_errno(r, "Failed to read runtime journal: %m");
d025f1e4 1092
93b73b06
LP
1093 sd_journal_set_data_threshold(j, 0);
1094
d025f1e4
ZJS
1095 SD_JOURNAL_FOREACH(j) {
1096 Object *o = NULL;
1097 JournalFile *f;
1098
1099 f = j->current_file;
1100 assert(f && f->current_offset > 0);
1101
fbb63411
LP
1102 n++;
1103
d025f1e4
ZJS
1104 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1105 if (r < 0) {
da927ba9 1106 log_error_errno(r, "Can't read entry: %m");
d025f1e4
ZJS
1107 goto finish;
1108 }
1109
1110 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1111 if (r >= 0)
1112 continue;
1113
1114 if (!shall_try_append_again(s->system_journal, r)) {
da927ba9 1115 log_error_errno(r, "Can't write entry: %m");
d025f1e4
ZJS
1116 goto finish;
1117 }
1118
1119 server_rotate(s);
8580d1f7 1120 server_vacuum(s, false, false);
d025f1e4 1121
253f59df
LP
1122 if (!s->system_journal) {
1123 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1124 r = -EIO;
1125 goto finish;
1126 }
1127
d025f1e4
ZJS
1128 log_debug("Retrying write.");
1129 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1130 if (r < 0) {
da927ba9 1131 log_error_errno(r, "Can't write entry: %m");
d025f1e4
ZJS
1132 goto finish;
1133 }
1134 }
1135
804ae586
LP
1136 r = 0;
1137
d025f1e4
ZJS
1138finish:
1139 journal_file_post_change(s->system_journal);
1140
804ae586 1141 s->runtime_journal = journal_file_close(s->runtime_journal);
d025f1e4
ZJS
1142
1143 if (r >= 0)
c6878637 1144 (void) rm_rf("/run/log/journal", REMOVE_ROOT);
d025f1e4 1145
763c7aa2 1146 sd_journal_close(j);
d025f1e4 1147
8a03c9ef
ZJS
1148 server_driver_message(s, SD_ID128_NULL,
1149 LOG_MESSAGE("Time spent on flushing to /var is %s for %u entries.",
1150 format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0),
1151 n),
1152 NULL);
fbb63411 1153
d025f1e4
ZJS
1154 return r;
1155}
1156
8531ae70 1157int server_process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
f9a810be 1158 Server *s = userdata;
a315ac4e
LP
1159 struct ucred *ucred = NULL;
1160 struct timeval *tv = NULL;
1161 struct cmsghdr *cmsg;
1162 char *label = NULL;
1163 size_t label_len = 0, m;
1164 struct iovec iovec;
1165 ssize_t n;
1166 int *fds = NULL, v = 0;
1167 unsigned n_fds = 0;
1168
1169 union {
1170 struct cmsghdr cmsghdr;
1171
1172 /* We use NAME_MAX space for the SELinux label
1173 * here. The kernel currently enforces no
1174 * limit, but according to suggestions from
1175 * the SELinux people this will change and it
1176 * will probably be identical to NAME_MAX. For
1177 * now we use that, but this should be updated
1178 * one day when the final limit is known. */
1179 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1180 CMSG_SPACE(sizeof(struct timeval)) +
1181 CMSG_SPACE(sizeof(int)) + /* fd */
1182 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1183 } control = {};
1184
1185 union sockaddr_union sa = {};
1186
1187 struct msghdr msghdr = {
1188 .msg_iov = &iovec,
1189 .msg_iovlen = 1,
1190 .msg_control = &control,
1191 .msg_controllen = sizeof(control),
1192 .msg_name = &sa,
1193 .msg_namelen = sizeof(sa),
1194 };
f9a810be 1195
d025f1e4 1196 assert(s);
875c2e22 1197 assert(fd == s->native_fd || fd == s->syslog_fd || fd == s->audit_fd);
f9a810be
LP
1198
1199 if (revents != EPOLLIN) {
1200 log_error("Got invalid event from epoll for datagram fd: %"PRIx32, revents);
1201 return -EIO;
1202 }
1203
a315ac4e
LP
1204 /* Try to get the right size, if we can. (Not all
1205 * sockets support SIOCINQ, hence we just try, but
1206 * don't rely on it. */
1207 (void) ioctl(fd, SIOCINQ, &v);
d025f1e4 1208
a315ac4e
LP
1209 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1210 m = PAGE_ALIGN(MAX3((size_t) v + 1,
1211 (size_t) LINE_MAX,
1212 ALIGN(sizeof(struct nlmsghdr)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH)) + 1);
d025f1e4 1213
a315ac4e
LP
1214 if (!GREEDY_REALLOC(s->buffer, s->buffer_size, m))
1215 return log_oom();
875c2e22 1216
a315ac4e
LP
1217 iovec.iov_base = s->buffer;
1218 iovec.iov_len = s->buffer_size - 1; /* Leave room for trailing NUL we add later */
d025f1e4 1219
a315ac4e
LP
1220 n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1221 if (n < 0) {
1222 if (errno == EINTR || errno == EAGAIN)
1223 return 0;
875c2e22 1224
a315ac4e
LP
1225 return log_error_errno(errno, "recvmsg() failed: %m");
1226 }
875c2e22 1227
a315ac4e
LP
1228 CMSG_FOREACH(cmsg, &msghdr) {
1229
1230 if (cmsg->cmsg_level == SOL_SOCKET &&
1231 cmsg->cmsg_type == SCM_CREDENTIALS &&
1232 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1233 ucred = (struct ucred*) CMSG_DATA(cmsg);
1234 else if (cmsg->cmsg_level == SOL_SOCKET &&
1235 cmsg->cmsg_type == SCM_SECURITY) {
1236 label = (char*) CMSG_DATA(cmsg);
1237 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1238 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1239 cmsg->cmsg_type == SO_TIMESTAMP &&
1240 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1241 tv = (struct timeval*) CMSG_DATA(cmsg);
1242 else if (cmsg->cmsg_level == SOL_SOCKET &&
1243 cmsg->cmsg_type == SCM_RIGHTS) {
1244 fds = (int*) CMSG_DATA(cmsg);
1245 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
d025f1e4 1246 }
a315ac4e 1247 }
d025f1e4 1248
a315ac4e
LP
1249 /* And a trailing NUL, just in case */
1250 s->buffer[n] = 0;
1251
1252 if (fd == s->syslog_fd) {
1253 if (n > 0 && n_fds == 0)
1254 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1255 else if (n_fds > 0)
1256 log_warning("Got file descriptors via syslog socket. Ignoring.");
1257
1258 } else if (fd == s->native_fd) {
1259 if (n > 0 && n_fds == 0)
1260 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1261 else if (n == 0 && n_fds == 1)
1262 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1263 else if (n_fds > 0)
1264 log_warning("Got too many file descriptors via native socket. Ignoring.");
1265
1266 } else {
1267 assert(fd == s->audit_fd);
1268
1269 if (n > 0 && n_fds == 0)
1270 server_process_audit_message(s, s->buffer, n, ucred, &sa, msghdr.msg_namelen);
1271 else if (n_fds > 0)
1272 log_warning("Got file descriptors via audit socket. Ignoring.");
f9a810be 1273 }
a315ac4e
LP
1274
1275 close_many(fds, n_fds);
1276 return 0;
f9a810be 1277}
d025f1e4 1278
f9a810be
LP
1279static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1280 Server *s = userdata;
33d52ab9 1281 int r;
d025f1e4 1282
f9a810be 1283 assert(s);
d025f1e4 1284
94b65516 1285 log_info("Received request to flush runtime journal from PID " PID_FMT, si->ssi_pid);
d025f1e4 1286
f9a810be
LP
1287 server_flush_to_var(s);
1288 server_sync(s);
8580d1f7 1289 server_vacuum(s, false, false);
d025f1e4 1290
33d52ab9
LP
1291 r = touch("/run/systemd/journal/flushed");
1292 if (r < 0)
1293 log_warning_errno(r, "Failed to touch /run/systemd/journal/flushed, ignoring: %m");
74055aa7 1294
f9a810be
LP
1295 return 0;
1296}
d025f1e4 1297
f9a810be
LP
1298static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1299 Server *s = userdata;
33d52ab9 1300 int r;
d025f1e4 1301
f9a810be 1302 assert(s);
d025f1e4 1303
94b65516 1304 log_info("Received request to rotate journal from PID " PID_FMT, si->ssi_pid);
f9a810be 1305 server_rotate(s);
8580d1f7 1306 server_vacuum(s, true, true);
d025f1e4 1307
dbd6e31c 1308 /* Let clients know when the most recent rotation happened. */
33d52ab9
LP
1309 r = write_timestamp_file_atomic("/run/systemd/journal/rotated", now(CLOCK_MONOTONIC));
1310 if (r < 0)
1311 log_warning_errno(r, "Failed to write /run/systemd/journal/rotated, ignoring: %m");
dbd6e31c 1312
f9a810be
LP
1313 return 0;
1314}
d025f1e4 1315
f9a810be
LP
1316static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1317 Server *s = userdata;
d025f1e4 1318
f9a810be 1319 assert(s);
d025f1e4 1320
4daf54a8 1321 log_received_signal(LOG_INFO, si);
d025f1e4 1322
6203e07a 1323 sd_event_exit(s->event, 0);
d025f1e4
ZJS
1324 return 0;
1325}
1326
94b65516
LP
1327static int dispatch_sigrtmin1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1328 Server *s = userdata;
33d52ab9 1329 int r;
94b65516
LP
1330
1331 assert(s);
1332
1333 log_debug("Received request to sync from PID " PID_FMT, si->ssi_pid);
1334
1335 server_sync(s);
1336
1337 /* Let clients know when the most recent sync happened. */
33d52ab9
LP
1338 r = write_timestamp_file_atomic("/run/systemd/journal/synced", now(CLOCK_MONOTONIC));
1339 if (r < 0)
1340 log_warning_errno(r, "Failed to write /run/systemd/journal/synced, ignoring: %m");
94b65516
LP
1341
1342 return 0;
1343}
1344
f9a810be 1345static int setup_signals(Server *s) {
f9a810be 1346 int r;
d025f1e4
ZJS
1347
1348 assert(s);
1349
94b65516 1350 assert(sigprocmask_many(SIG_SETMASK, NULL, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, SIGRTMIN+1, -1) >= 0);
d025f1e4 1351
151b9b96 1352 r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
f9a810be
LP
1353 if (r < 0)
1354 return r;
1355
151b9b96 1356 r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s);
f9a810be
LP
1357 if (r < 0)
1358 return r;
d025f1e4 1359
151b9b96 1360 r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s);
f9a810be
LP
1361 if (r < 0)
1362 return r;
d025f1e4 1363
b374689c
LP
1364 /* Let's process SIGTERM late, so that we flush all queued
1365 * messages to disk before we exit */
1366 r = sd_event_source_set_priority(s->sigterm_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1367 if (r < 0)
1368 return r;
1369
1370 /* When journald is invoked on the terminal (when debugging),
1371 * it's useful if C-c is handled equivalent to SIGTERM. */
151b9b96 1372 r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s);
f9a810be
LP
1373 if (r < 0)
1374 return r;
d025f1e4 1375
b374689c
LP
1376 r = sd_event_source_set_priority(s->sigint_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1377 if (r < 0)
1378 return r;
1379
94b65516
LP
1380 /* SIGRTMIN+1 causes an immediate sync. We process this very
1381 * late, so that everything else queued at this point is
1382 * really written to disk. Clients can watch
1383 * /run/systemd/journal/synced with inotify until its mtime
1384 * changes to see when a sync happened. */
1385 r = sd_event_add_signal(s->event, &s->sigrtmin1_event_source, SIGRTMIN+1, dispatch_sigrtmin1, s);
1386 if (r < 0)
1387 return r;
1388
1389 r = sd_event_source_set_priority(s->sigrtmin1_event_source, SD_EVENT_PRIORITY_NORMAL+15);
1390 if (r < 0)
1391 return r;
1392
d025f1e4
ZJS
1393 return 0;
1394}
1395
1396static int server_parse_proc_cmdline(Server *s) {
7fd1b19b 1397 _cleanup_free_ char *line = NULL;
d581d9d9 1398 const char *p;
74df0fca 1399 int r;
d025f1e4 1400
74df0fca 1401 r = proc_cmdline(&line);
b5884878 1402 if (r < 0) {
da927ba9 1403 log_warning_errno(r, "Failed to read /proc/cmdline, ignoring: %m");
d025f1e4 1404 return 0;
b5884878 1405 }
d025f1e4 1406
d581d9d9 1407 p = line;
9ed794a3 1408 for (;;) {
ff82c36c 1409 _cleanup_free_ char *word = NULL;
d025f1e4 1410
d581d9d9
SS
1411 r = extract_first_word(&p, &word, NULL, 0);
1412 if (r < 0)
1413 return log_error_errno(r, "Failed to parse journald syntax \"%s\": %m", line);
1414
1415 if (r == 0)
1416 break;
d025f1e4
ZJS
1417
1418 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1419 r = parse_boolean(word + 35);
1420 if (r < 0)
1421 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1422 else
1423 s->forward_to_syslog = r;
1424 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1425 r = parse_boolean(word + 33);
1426 if (r < 0)
1427 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1428 else
1429 s->forward_to_kmsg = r;
1430 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1431 r = parse_boolean(word + 36);
1432 if (r < 0)
1433 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1434 else
1435 s->forward_to_console = r;
40b71e89
ST
1436 } else if (startswith(word, "systemd.journald.forward_to_wall=")) {
1437 r = parse_boolean(word + 33);
1438 if (r < 0)
1439 log_warning("Failed to parse forward to wall switch %s. Ignoring.", word + 33);
1440 else
1441 s->forward_to_wall = r;
d025f1e4
ZJS
1442 } else if (startswith(word, "systemd.journald"))
1443 log_warning("Invalid systemd.journald parameter. Ignoring.");
d025f1e4
ZJS
1444 }
1445
804ae586 1446 /* do not warn about state here, since probably systemd already did */
db91ea32 1447 return 0;
d025f1e4
ZJS
1448}
1449
1450static int server_parse_config_file(Server *s) {
d025f1e4
ZJS
1451 assert(s);
1452
75eb6154
LP
1453 return config_parse_many(PKGSYSCONFDIR "/journald.conf",
1454 CONF_PATHS_NULSTR("systemd/journald.conf.d"),
a9edaeff
JT
1455 "Journal\0",
1456 config_item_perf_lookup, journald_gperf_lookup,
1457 false, s);
d025f1e4
ZJS
1458}
1459
f9a810be
LP
1460static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
1461 Server *s = userdata;
26687bf8
OS
1462
1463 assert(s);
1464
f9a810be 1465 server_sync(s);
26687bf8
OS
1466 return 0;
1467}
1468
d07f7b9e 1469int server_schedule_sync(Server *s, int priority) {
26687bf8
OS
1470 int r;
1471
26687bf8
OS
1472 assert(s);
1473
d07f7b9e
LP
1474 if (priority <= LOG_CRIT) {
1475 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1476 server_sync(s);
1477 return 0;
1478 }
1479
26687bf8
OS
1480 if (s->sync_scheduled)
1481 return 0;
1482
f9a810be
LP
1483 if (s->sync_interval_usec > 0) {
1484 usec_t when;
ca267016 1485
6a0f1f6d 1486 r = sd_event_now(s->event, CLOCK_MONOTONIC, &when);
f9a810be
LP
1487 if (r < 0)
1488 return r;
26687bf8 1489
f9a810be
LP
1490 when += s->sync_interval_usec;
1491
1492 if (!s->sync_event_source) {
6a0f1f6d
LP
1493 r = sd_event_add_time(
1494 s->event,
1495 &s->sync_event_source,
1496 CLOCK_MONOTONIC,
1497 when, 0,
1498 server_dispatch_sync, s);
f9a810be
LP
1499 if (r < 0)
1500 return r;
1501
1502 r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
1503 } else {
1504 r = sd_event_source_set_time(s->sync_event_source, when);
1505 if (r < 0)
1506 return r;
1507
1508 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
1509 }
26687bf8 1510 if (r < 0)
f9a810be 1511 return r;
26687bf8 1512
f9a810be
LP
1513 s->sync_scheduled = true;
1514 }
26687bf8
OS
1515
1516 return 0;
1517}
1518
0c24bb23
LP
1519static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1520 Server *s = userdata;
1521
1522 assert(s);
1523
1524 server_cache_hostname(s);
1525 return 0;
1526}
1527
1528static int server_open_hostname(Server *s) {
1529 int r;
1530
1531 assert(s);
1532
1533 s->hostname_fd = open("/proc/sys/kernel/hostname", O_RDONLY|O_CLOEXEC|O_NDELAY|O_NOCTTY);
4a62c710
MS
1534 if (s->hostname_fd < 0)
1535 return log_error_errno(errno, "Failed to open /proc/sys/kernel/hostname: %m");
0c24bb23 1536
151b9b96 1537 r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s);
0c24bb23 1538 if (r < 0) {
28def94c
DR
1539 /* kernels prior to 3.2 don't support polling this file. Ignore
1540 * the failure. */
1541 if (r == -EPERM) {
e53fc357 1542 log_warning_errno(r, "Failed to register hostname fd in event loop, ignoring: %m");
03e334a1 1543 s->hostname_fd = safe_close(s->hostname_fd);
28def94c
DR
1544 return 0;
1545 }
1546
23bbb0de 1547 return log_error_errno(r, "Failed to register hostname fd in event loop: %m");
0c24bb23
LP
1548 }
1549
1550 r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
23bbb0de
MS
1551 if (r < 0)
1552 return log_error_errno(r, "Failed to adjust priority of host name event source: %m");
0c24bb23
LP
1553
1554 return 0;
1555}
1556
e22aa3d3
LP
1557static int dispatch_notify_event(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1558 Server *s = userdata;
1559 int r;
1560
1561 assert(s);
1562 assert(s->notify_event_source == es);
1563 assert(s->notify_fd == fd);
1564
e22aa3d3 1565 /* The $NOTIFY_SOCKET is writable again, now send exactly one
119e9655
LP
1566 * message on it. Either it's the wtachdog event, the initial
1567 * READY=1 event or an stdout stream event. If there's nothing
1568 * to write anymore, turn our event source off. The next time
1569 * there's something to send it will be turned on again. */
e22aa3d3
LP
1570
1571 if (!s->sent_notify_ready) {
1572 static const char p[] =
1573 "READY=1\n"
1574 "STATUS=Processing requests...";
1575 ssize_t l;
1576
1577 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1578 if (l < 0) {
1579 if (errno == EAGAIN)
1580 return 0;
1581
1582 return log_error_errno(errno, "Failed to send READY=1 notification message: %m");
1583 }
1584
1585 s->sent_notify_ready = true;
1586 log_debug("Sent READY=1 notification.");
1587
119e9655
LP
1588 } else if (s->send_watchdog) {
1589
1590 static const char p[] =
1591 "WATCHDOG=1";
1592
1593 ssize_t l;
1594
1595 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1596 if (l < 0) {
1597 if (errno == EAGAIN)
1598 return 0;
1599
1600 return log_error_errno(errno, "Failed to send WATCHDOG=1 notification message: %m");
1601 }
1602
1603 s->send_watchdog = false;
1604 log_debug("Sent WATCHDOG=1 notification.");
1605
e22aa3d3
LP
1606 } else if (s->stdout_streams_notify_queue)
1607 /* Dispatch one stream notification event */
1608 stdout_stream_send_notify(s->stdout_streams_notify_queue);
1609
1610 /* Leave us enabled if there's still more to to do. */
119e9655 1611 if (s->send_watchdog || s->stdout_streams_notify_queue)
e22aa3d3
LP
1612 return 0;
1613
1614 /* There was nothing to do anymore, let's turn ourselves off. */
1615 r = sd_event_source_set_enabled(es, SD_EVENT_OFF);
1616 if (r < 0)
1617 return log_error_errno(r, "Failed to turn off notify event source: %m");
1618
1619 return 0;
1620}
1621
119e9655
LP
1622static int dispatch_watchdog(sd_event_source *es, uint64_t usec, void *userdata) {
1623 Server *s = userdata;
1624 int r;
1625
1626 assert(s);
1627
1628 s->send_watchdog = true;
1629
1630 r = sd_event_source_set_enabled(s->notify_event_source, SD_EVENT_ON);
1631 if (r < 0)
1632 log_warning_errno(r, "Failed to turn on notify event source: %m");
1633
1634 r = sd_event_source_set_time(s->watchdog_event_source, usec + s->watchdog_usec / 2);
1635 if (r < 0)
1636 return log_error_errno(r, "Failed to restart watchdog event source: %m");
1637
1638 r = sd_event_source_set_enabled(s->watchdog_event_source, SD_EVENT_ON);
1639 if (r < 0)
1640 return log_error_errno(r, "Failed to enable watchdog event source: %m");
1641
1642 return 0;
1643}
1644
e22aa3d3
LP
1645static int server_connect_notify(Server *s) {
1646 union sockaddr_union sa = {
1647 .un.sun_family = AF_UNIX,
1648 };
1649 const char *e;
1650 int r;
1651
1652 assert(s);
1653 assert(s->notify_fd < 0);
1654 assert(!s->notify_event_source);
1655
1656 /*
1657 So here's the problem: we'd like to send notification
1658 messages to PID 1, but we cannot do that via sd_notify(),
1659 since that's synchronous, and we might end up blocking on
1660 it. Specifically: given that PID 1 might block on
1661 dbus-daemon during IPC, and dbus-daemon is logging to us,
1662 and might hence block on us, we might end up in a deadlock
ccddd104 1663 if we block on sending PID 1 notification messages — by
e22aa3d3
LP
1664 generating a full blocking circle. To avoid this, let's
1665 create a non-blocking socket, and connect it to the
1666 notification socket, and then wait for POLLOUT before we
1667 send anything. This should efficiently avoid any deadlocks,
1668 as we'll never block on PID 1, hence PID 1 can safely block
1669 on dbus-daemon which can safely block on us again.
1670
1671 Don't think that this issue is real? It is, see:
1672 https://github.com/systemd/systemd/issues/1505
1673 */
1674
1675 e = getenv("NOTIFY_SOCKET");
1676 if (!e)
1677 return 0;
1678
1679 if ((e[0] != '@' && e[0] != '/') || e[1] == 0) {
1680 log_error("NOTIFY_SOCKET set to an invalid value: %s", e);
1681 return -EINVAL;
1682 }
1683
1684 if (strlen(e) > sizeof(sa.un.sun_path)) {
1685 log_error("NOTIFY_SOCKET path too long: %s", e);
1686 return -EINVAL;
1687 }
1688
1689 s->notify_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
1690 if (s->notify_fd < 0)
1691 return log_error_errno(errno, "Failed to create notify socket: %m");
1692
1693 (void) fd_inc_sndbuf(s->notify_fd, NOTIFY_SNDBUF_SIZE);
1694
1695 strncpy(sa.un.sun_path, e, sizeof(sa.un.sun_path));
1696 if (sa.un.sun_path[0] == '@')
1697 sa.un.sun_path[0] = 0;
1698
1699 r = connect(s->notify_fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(e));
1700 if (r < 0)
1701 return log_error_errno(errno, "Failed to connect to notify socket: %m");
1702
1703 r = sd_event_add_io(s->event, &s->notify_event_source, s->notify_fd, EPOLLOUT, dispatch_notify_event, s);
1704 if (r < 0)
1705 return log_error_errno(r, "Failed to watch notification socket: %m");
1706
119e9655
LP
1707 if (sd_watchdog_enabled(false, &s->watchdog_usec) > 0) {
1708 s->send_watchdog = true;
1709
4de2402b 1710 r = sd_event_add_time(s->event, &s->watchdog_event_source, CLOCK_MONOTONIC, now(CLOCK_MONOTONIC) + s->watchdog_usec/2, s->watchdog_usec/4, dispatch_watchdog, s);
119e9655
LP
1711 if (r < 0)
1712 return log_error_errno(r, "Failed to add watchdog time event: %m");
1713 }
1714
e22aa3d3
LP
1715 /* This should fire pretty soon, which we'll use to send the
1716 * READY=1 event. */
1717
1718 return 0;
1719}
1720
d025f1e4 1721int server_init(Server *s) {
13790add 1722 _cleanup_fdset_free_ FDSet *fds = NULL;
d025f1e4 1723 int n, r, fd;
7d18d348 1724 bool no_sockets;
d025f1e4
ZJS
1725
1726 assert(s);
1727
1728 zero(*s);
e22aa3d3 1729 s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->audit_fd = s->hostname_fd = s->notify_fd = -1;
d025f1e4
ZJS
1730 s->compress = true;
1731 s->seal = true;
1732
119e9655
LP
1733 s->watchdog_usec = USEC_INFINITY;
1734
26687bf8
OS
1735 s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1736 s->sync_scheduled = false;
1737
d025f1e4
ZJS
1738 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1739 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1740
40b71e89 1741 s->forward_to_wall = true;
d025f1e4 1742
e150e820
MB
1743 s->max_file_usec = DEFAULT_MAX_FILE_USEC;
1744
d025f1e4
ZJS
1745 s->max_level_store = LOG_DEBUG;
1746 s->max_level_syslog = LOG_DEBUG;
1747 s->max_level_kmsg = LOG_NOTICE;
1748 s->max_level_console = LOG_INFO;
40b71e89 1749 s->max_level_wall = LOG_EMERG;
d025f1e4 1750
8580d1f7
LP
1751 journal_reset_metrics(&s->system_metrics);
1752 journal_reset_metrics(&s->runtime_metrics);
d025f1e4
ZJS
1753
1754 server_parse_config_file(s);
1755 server_parse_proc_cmdline(s);
8580d1f7 1756
d288f79f 1757 if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
b1389b0d
ZJS
1758 log_debug("Setting both rate limit interval and burst from "USEC_FMT",%u to 0,0",
1759 s->rate_limit_interval, s->rate_limit_burst);
d288f79f
ZJS
1760 s->rate_limit_interval = s->rate_limit_burst = 0;
1761 }
d025f1e4 1762
8580d1f7 1763 (void) mkdir_p("/run/systemd/journal", 0755);
d025f1e4 1764
43cf8388 1765 s->user_journals = ordered_hashmap_new(NULL);
d025f1e4
ZJS
1766 if (!s->user_journals)
1767 return log_oom();
1768
1769 s->mmap = mmap_cache_new();
1770 if (!s->mmap)
1771 return log_oom();
1772
b58c888f
VC
1773 s->deferred_closes = set_new(NULL);
1774 if (!s->deferred_closes)
1775 return log_oom();
1776
f9a810be 1777 r = sd_event_default(&s->event);
23bbb0de
MS
1778 if (r < 0)
1779 return log_error_errno(r, "Failed to create event loop: %m");
d025f1e4
ZJS
1780
1781 n = sd_listen_fds(true);
23bbb0de
MS
1782 if (n < 0)
1783 return log_error_errno(n, "Failed to read listening file descriptors from environment: %m");
d025f1e4
ZJS
1784
1785 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1786
1787 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1788
1789 if (s->native_fd >= 0) {
1790 log_error("Too many native sockets passed.");
1791 return -EINVAL;
1792 }
1793
1794 s->native_fd = fd;
1795
1796 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1797
1798 if (s->stdout_fd >= 0) {
1799 log_error("Too many stdout sockets passed.");
1800 return -EINVAL;
1801 }
1802
1803 s->stdout_fd = fd;
1804
03ee5c38
LP
1805 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0 ||
1806 sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/dev-log", 0) > 0) {
d025f1e4
ZJS
1807
1808 if (s->syslog_fd >= 0) {
1809 log_error("Too many /dev/log sockets passed.");
1810 return -EINVAL;
1811 }
1812
1813 s->syslog_fd = fd;
1814
875c2e22
LP
1815 } else if (sd_is_socket(fd, AF_NETLINK, SOCK_RAW, -1) > 0) {
1816
1817 if (s->audit_fd >= 0) {
1818 log_error("Too many audit sockets passed.");
1819 return -EINVAL;
1820 }
1821
1822 s->audit_fd = fd;
1823
4ec3cd73 1824 } else {
4ec3cd73 1825
13790add
LP
1826 if (!fds) {
1827 fds = fdset_new();
1828 if (!fds)
1829 return log_oom();
1830 }
4ec3cd73 1831
13790add
LP
1832 r = fdset_put(fds, fd);
1833 if (r < 0)
1834 return log_oom();
4ec3cd73 1835 }
d025f1e4
ZJS
1836 }
1837
15d91bff
ZJS
1838 /* Try to restore streams, but don't bother if this fails */
1839 (void) server_restore_streams(s, fds);
d025f1e4 1840
13790add
LP
1841 if (fdset_size(fds) > 0) {
1842 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds));
1843 fds = fdset_free(fds);
1844 }
1845
7d18d348
ZJS
1846 no_sockets = s->native_fd < 0 && s->stdout_fd < 0 && s->syslog_fd < 0 && s->audit_fd < 0;
1847
1848 /* always open stdout, syslog, native, and kmsg sockets */
37b7affe
ZJS
1849
1850 /* systemd-journald.socket: /run/systemd/journal/stdout */
15d91bff
ZJS
1851 r = server_open_stdout_socket(s);
1852 if (r < 0)
1853 return r;
1854
37b7affe 1855 /* systemd-journald-dev-log.socket: /run/systemd/journal/dev-log */
13790add 1856 r = server_open_syslog_socket(s);
d025f1e4
ZJS
1857 if (r < 0)
1858 return r;
1859
37b7affe 1860 /* systemd-journald.socket: /run/systemd/journal/socket */
13790add 1861 r = server_open_native_socket(s);
d025f1e4
ZJS
1862 if (r < 0)
1863 return r;
1864
37b7affe 1865 /* /dev/ksmg */
d025f1e4
ZJS
1866 r = server_open_dev_kmsg(s);
1867 if (r < 0)
1868 return r;
1869
7d18d348
ZJS
1870 /* Unless we got *some* sockets and not audit, open audit socket */
1871 if (s->audit_fd >= 0 || no_sockets) {
1872 r = server_open_audit(s);
1873 if (r < 0)
1874 return r;
1875 }
875c2e22 1876
d025f1e4
ZJS
1877 r = server_open_kernel_seqnum(s);
1878 if (r < 0)
1879 return r;
1880
0c24bb23
LP
1881 r = server_open_hostname(s);
1882 if (r < 0)
1883 return r;
1884
f9a810be 1885 r = setup_signals(s);
d025f1e4
ZJS
1886 if (r < 0)
1887 return r;
1888
1889 s->udev = udev_new();
1890 if (!s->udev)
1891 return -ENOMEM;
1892
f9a810be 1893 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
d025f1e4
ZJS
1894 if (!s->rate_limit)
1895 return -ENOMEM;
1896
e9174f29
LP
1897 r = cg_get_root_path(&s->cgroup_root);
1898 if (r < 0)
1899 return r;
1900
0c24bb23
LP
1901 server_cache_hostname(s);
1902 server_cache_boot_id(s);
1903 server_cache_machine_id(s);
1904
e22aa3d3
LP
1905 (void) server_connect_notify(s);
1906
804ae586 1907 return system_journal_open(s, false);
d025f1e4
ZJS
1908}
1909
1910void server_maybe_append_tags(Server *s) {
1911#ifdef HAVE_GCRYPT
1912 JournalFile *f;
1913 Iterator i;
1914 usec_t n;
1915
1916 n = now(CLOCK_REALTIME);
1917
1918 if (s->system_journal)
1919 journal_file_maybe_append_tag(s->system_journal, n);
1920
43cf8388 1921 ORDERED_HASHMAP_FOREACH(f, s->user_journals, i)
d025f1e4
ZJS
1922 journal_file_maybe_append_tag(f, n);
1923#endif
1924}
1925
1926void server_done(Server *s) {
1927 JournalFile *f;
1928 assert(s);
1929
b58c888f
VC
1930 if (s->deferred_closes) {
1931 journal_file_close_set(s->deferred_closes);
1932 set_free(s->deferred_closes);
1933 }
1934
d025f1e4
ZJS
1935 while (s->stdout_streams)
1936 stdout_stream_free(s->stdout_streams);
1937
1938 if (s->system_journal)
69a3a6fd 1939 (void) journal_file_close(s->system_journal);
d025f1e4
ZJS
1940
1941 if (s->runtime_journal)
69a3a6fd 1942 (void) journal_file_close(s->runtime_journal);
d025f1e4 1943
43cf8388 1944 while ((f = ordered_hashmap_steal_first(s->user_journals)))
69a3a6fd 1945 (void) journal_file_close(f);
d025f1e4 1946
43cf8388 1947 ordered_hashmap_free(s->user_journals);
d025f1e4 1948
f9a810be
LP
1949 sd_event_source_unref(s->syslog_event_source);
1950 sd_event_source_unref(s->native_event_source);
1951 sd_event_source_unref(s->stdout_event_source);
1952 sd_event_source_unref(s->dev_kmsg_event_source);
875c2e22 1953 sd_event_source_unref(s->audit_event_source);
f9a810be
LP
1954 sd_event_source_unref(s->sync_event_source);
1955 sd_event_source_unref(s->sigusr1_event_source);
1956 sd_event_source_unref(s->sigusr2_event_source);
1957 sd_event_source_unref(s->sigterm_event_source);
1958 sd_event_source_unref(s->sigint_event_source);
94b65516 1959 sd_event_source_unref(s->sigrtmin1_event_source);
0c24bb23 1960 sd_event_source_unref(s->hostname_event_source);
e22aa3d3 1961 sd_event_source_unref(s->notify_event_source);
119e9655 1962 sd_event_source_unref(s->watchdog_event_source);
f9a810be 1963 sd_event_unref(s->event);
d025f1e4 1964
03e334a1
LP
1965 safe_close(s->syslog_fd);
1966 safe_close(s->native_fd);
1967 safe_close(s->stdout_fd);
1968 safe_close(s->dev_kmsg_fd);
875c2e22 1969 safe_close(s->audit_fd);
03e334a1 1970 safe_close(s->hostname_fd);
e22aa3d3 1971 safe_close(s->notify_fd);
0c24bb23 1972
d025f1e4
ZJS
1973 if (s->rate_limit)
1974 journal_rate_limit_free(s->rate_limit);
1975
1976 if (s->kernel_seqnum)
1977 munmap(s->kernel_seqnum, sizeof(uint64_t));
1978
1979 free(s->buffer);
1980 free(s->tty_path);
e9174f29 1981 free(s->cgroup_root);
99d0966e 1982 free(s->hostname_field);
d025f1e4
ZJS
1983
1984 if (s->mmap)
1985 mmap_cache_unref(s->mmap);
1986
3e044c49 1987 udev_unref(s->udev);
d025f1e4 1988}
8580d1f7
LP
1989
1990static const char* const storage_table[_STORAGE_MAX] = {
1991 [STORAGE_AUTO] = "auto",
1992 [STORAGE_VOLATILE] = "volatile",
1993 [STORAGE_PERSISTENT] = "persistent",
1994 [STORAGE_NONE] = "none"
1995};
1996
1997DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
1998DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
1999
2000static const char* const split_mode_table[_SPLIT_MAX] = {
2001 [SPLIT_LOGIN] = "login",
2002 [SPLIT_UID] = "uid",
2003 [SPLIT_NONE] = "none",
2004};
2005
2006DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
2007DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");