]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/journal/journald-server.c
journald: prefix exported calls with "server_", unexport unnecessary calls
[thirdparty/systemd.git] / src / journal / journald-server.c
CommitLineData
d025f1e4
ZJS
1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3/***
4 This file is part of systemd.
5
6 Copyright 2011 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
22#include <sys/signalfd.h>
23#include <sys/ioctl.h>
24#include <linux/sockios.h>
25#include <sys/statvfs.h>
26#include <sys/mman.h>
26687bf8 27#include <sys/timerfd.h>
d025f1e4
ZJS
28
29#include <libudev.h>
d025f1e4 30
74df0fca
LP
31#include "sd-journal.h"
32#include "sd-messages.h"
33#include "sd-daemon.h"
a5c32cff 34#include "fileio.h"
d025f1e4
ZJS
35#include "mkdir.h"
36#include "hashmap.h"
37#include "journal-file.h"
38#include "socket-util.h"
39#include "cgroup-util.h"
40#include "list.h"
d025f1e4
ZJS
41#include "missing.h"
42#include "conf-parser.h"
74df0fca 43#include "selinux-util.h"
d025f1e4
ZJS
44#include "journal-internal.h"
45#include "journal-vacuum.h"
46#include "journal-authenticate.h"
d025f1e4
ZJS
47#include "journald-rate-limit.h"
48#include "journald-kmsg.h"
49#include "journald-syslog.h"
50#include "journald-stream.h"
51#include "journald-console.h"
52#include "journald-native.h"
875c2e22 53#include "journald-audit.h"
74df0fca 54#include "journald-server.h"
d025f1e4
ZJS
55
56#ifdef HAVE_ACL
57#include <sys/acl.h>
58#include <acl/libacl.h>
59#include "acl-util.h"
60#endif
61
62#ifdef HAVE_SELINUX
63#include <selinux/selinux.h>
64#endif
65
66#define USER_JOURNALS_MAX 1024
67
26687bf8 68#define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
7f1ad696
LP
69#define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
70#define DEFAULT_RATE_LIMIT_BURST 1000
e150e820 71#define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
d025f1e4
ZJS
72
73#define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
74
2c5859af 75static const char* const storage_table[_STORAGE_MAX] = {
d025f1e4
ZJS
76 [STORAGE_AUTO] = "auto",
77 [STORAGE_VOLATILE] = "volatile",
78 [STORAGE_PERSISTENT] = "persistent",
79 [STORAGE_NONE] = "none"
80};
81
82DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
83DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
84
2c5859af
DM
85static const char* const split_mode_table[_SPLIT_MAX] = {
86 [SPLIT_LOGIN] = "login",
d025f1e4 87 [SPLIT_UID] = "uid",
2c5859af 88 [SPLIT_NONE] = "none",
d025f1e4
ZJS
89};
90
91DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
92DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
93
670b110c 94static uint64_t available_space(Server *s, bool verbose) {
db91ea32 95 char ids[33];
7fd1b19b 96 _cleanup_free_ char *p = NULL;
d025f1e4
ZJS
97 sd_id128_t machine;
98 struct statvfs ss;
670b110c 99 uint64_t sum = 0, ss_avail = 0, avail = 0;
d025f1e4 100 int r;
7fd1b19b 101 _cleanup_closedir_ DIR *d = NULL;
d025f1e4 102 usec_t ts;
670b110c 103 const char *f;
d025f1e4
ZJS
104 JournalMetrics *m;
105
106 ts = now(CLOCK_MONOTONIC);
107
670b110c
ZJS
108 if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts
109 && !verbose)
d025f1e4
ZJS
110 return s->cached_available_space;
111
112 r = sd_id128_get_machine(&machine);
113 if (r < 0)
114 return 0;
115
116 if (s->system_journal) {
117 f = "/var/log/journal/";
118 m = &s->system_metrics;
119 } else {
120 f = "/run/log/journal/";
121 m = &s->runtime_metrics;
122 }
123
124 assert(m);
125
126 p = strappend(f, sd_id128_to_string(machine, ids));
127 if (!p)
128 return 0;
129
130 d = opendir(p);
d025f1e4
ZJS
131 if (!d)
132 return 0;
133
134 if (fstatvfs(dirfd(d), &ss) < 0)
db91ea32 135 return 0;
d025f1e4
ZJS
136
137 for (;;) {
138 struct stat st;
139 struct dirent *de;
d025f1e4 140
0371ca0d
FW
141 errno = 0;
142 de = readdir(d);
143 if (!de && errno != 0)
144 return 0;
d025f1e4
ZJS
145
146 if (!de)
147 break;
148
149 if (!endswith(de->d_name, ".journal") &&
150 !endswith(de->d_name, ".journal~"))
151 continue;
152
153 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
154 continue;
155
156 if (!S_ISREG(st.st_mode))
157 continue;
158
159 sum += (uint64_t) st.st_blocks * 512UL;
160 }
161
d025f1e4
ZJS
162 ss_avail = ss.f_bsize * ss.f_bavail;
163
348ced90
ZJS
164 /* If we reached a high mark, we will always allow this much
165 * again, unless usage goes above max_use. This watermark
166 * value is cached so that we don't give up space on pressure,
167 * but hover below the maximum usage. */
168
169 if (m->use < sum)
170 m->use = sum;
171
172 avail = LESS_BY(ss_avail, m->keep_free);
173
174 s->cached_available_space = LESS_BY(MIN(m->max_use, avail), sum);
d025f1e4
ZJS
175 s->cached_available_space_timestamp = ts;
176
670b110c
ZJS
177 if (verbose) {
178 char fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
179 fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX];
180
181 server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
348ced90
ZJS
182 "%s journal is using %s (max allowed %s, "
183 "trying to leave %s free of %s available → current limit %s).",
670b110c
ZJS
184 s->system_journal ? "Permanent" : "Runtime",
185 format_bytes(fb1, sizeof(fb1), sum),
186 format_bytes(fb2, sizeof(fb2), m->max_use),
187 format_bytes(fb3, sizeof(fb3), m->keep_free),
188 format_bytes(fb4, sizeof(fb4), ss_avail),
348ced90 189 format_bytes(fb5, sizeof(fb5), s->cached_available_space + sum));
670b110c
ZJS
190 }
191
192 return s->cached_available_space;
d025f1e4
ZJS
193}
194
d025f1e4
ZJS
195void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
196 int r;
197#ifdef HAVE_ACL
198 acl_t acl;
199 acl_entry_t entry;
200 acl_permset_t permset;
201#endif
202
203 assert(f);
204
4608af43 205 r = fchmod(f->fd, 0640);
d025f1e4 206 if (r < 0)
da927ba9 207 log_warning_errno(r, "Failed to fix access mode on %s, ignoring: %m", f->path);
d025f1e4
ZJS
208
209#ifdef HAVE_ACL
34c10968 210 if (uid <= SYSTEM_UID_MAX)
d025f1e4
ZJS
211 return;
212
213 acl = acl_get_fd(f->fd);
214 if (!acl) {
56f64d95 215 log_warning_errno(errno, "Failed to read ACL on %s, ignoring: %m", f->path);
d025f1e4
ZJS
216 return;
217 }
218
219 r = acl_find_uid(acl, uid, &entry);
220 if (r <= 0) {
221
222 if (acl_create_entry(&acl, &entry) < 0 ||
223 acl_set_tag_type(entry, ACL_USER) < 0 ||
224 acl_set_qualifier(entry, &uid) < 0) {
56f64d95 225 log_warning_errno(errno, "Failed to patch ACL on %s, ignoring: %m", f->path);
d025f1e4
ZJS
226 goto finish;
227 }
228 }
229
23ad4dd8
JAS
230 /* We do not recalculate the mask unconditionally here,
231 * so that the fchmod() mask above stays intact. */
d025f1e4 232 if (acl_get_permset(entry, &permset) < 0 ||
23ad4dd8
JAS
233 acl_add_perm(permset, ACL_READ) < 0 ||
234 calc_acl_mask_if_needed(&acl) < 0) {
56f64d95 235 log_warning_errno(errno, "Failed to patch ACL on %s, ignoring: %m", f->path);
d025f1e4
ZJS
236 goto finish;
237 }
238
239 if (acl_set_fd(f->fd, acl) < 0)
56f64d95 240 log_warning_errno(errno, "Failed to set ACL on %s, ignoring: %m", f->path);
d025f1e4
ZJS
241
242finish:
243 acl_free(acl);
244#endif
245}
246
247static JournalFile* find_journal(Server *s, uid_t uid) {
ed375beb 248 _cleanup_free_ char *p = NULL;
d025f1e4
ZJS
249 int r;
250 JournalFile *f;
251 sd_id128_t machine;
252
253 assert(s);
254
255 /* We split up user logs only on /var, not on /run. If the
256 * runtime file is open, we write to it exclusively, in order
257 * to guarantee proper order as soon as we flush /run to
258 * /var and close the runtime file. */
259
260 if (s->runtime_journal)
261 return s->runtime_journal;
262
f7dc3ab9 263 if (uid <= SYSTEM_UID_MAX)
d025f1e4
ZJS
264 return s->system_journal;
265
266 r = sd_id128_get_machine(&machine);
267 if (r < 0)
268 return s->system_journal;
269
43cf8388 270 f = ordered_hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
d025f1e4
ZJS
271 if (f)
272 return f;
273
de0671ee
ZJS
274 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-"UID_FMT".journal",
275 SD_ID128_FORMAT_VAL(machine), uid) < 0)
d025f1e4
ZJS
276 return s->system_journal;
277
43cf8388 278 while (ordered_hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
d025f1e4 279 /* Too many open? Then let's close one */
43cf8388 280 f = ordered_hashmap_steal_first(s->user_journals);
d025f1e4
ZJS
281 assert(f);
282 journal_file_close(f);
283 }
284
cbd67177 285 r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &f);
d025f1e4
ZJS
286 if (r < 0)
287 return s->system_journal;
288
289 server_fix_perms(s, f, uid);
290
43cf8388 291 r = ordered_hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
d025f1e4
ZJS
292 if (r < 0) {
293 journal_file_close(f);
294 return s->system_journal;
295 }
296
297 return f;
298}
299
fc55baee
ZJS
300static int do_rotate(Server *s, JournalFile **f, const char* name,
301 bool seal, uint32_t uid) {
302 int r;
303 assert(s);
304
305 if (!*f)
306 return -EINVAL;
307
308 r = journal_file_rotate(f, s->compress, seal);
309 if (r < 0)
310 if (*f)
c33b3297
MS
311 log_error_errno(r, "Failed to rotate %s: %m",
312 (*f)->path);
fc55baee 313 else
c33b3297
MS
314 log_error_errno(r, "Failed to create new %s journal: %m",
315 name);
fc55baee
ZJS
316 else
317 server_fix_perms(s, *f, uid);
318 return r;
319}
320
d025f1e4
ZJS
321void server_rotate(Server *s) {
322 JournalFile *f;
323 void *k;
324 Iterator i;
325 int r;
326
327 log_debug("Rotating...");
328
fc55baee
ZJS
329 do_rotate(s, &s->runtime_journal, "runtime", false, 0);
330 do_rotate(s, &s->system_journal, "system", s->seal, 0);
d025f1e4 331
43cf8388 332 ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
fc55baee
ZJS
333 r = do_rotate(s, &f, "user", s->seal, PTR_TO_UINT32(k));
334 if (r >= 0)
43cf8388 335 ordered_hashmap_replace(s->user_journals, k, f);
fc55baee
ZJS
336 else if (!f)
337 /* Old file has been closed and deallocated */
43cf8388 338 ordered_hashmap_remove(s->user_journals, k);
d025f1e4
ZJS
339 }
340}
341
26687bf8
OS
342void server_sync(Server *s) {
343 JournalFile *f;
344 void *k;
345 Iterator i;
346 int r;
347
26687bf8
OS
348 if (s->system_journal) {
349 r = journal_file_set_offline(s->system_journal);
350 if (r < 0)
da927ba9 351 log_error_errno(r, "Failed to sync system journal: %m");
26687bf8
OS
352 }
353
43cf8388 354 ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
26687bf8
OS
355 r = journal_file_set_offline(f);
356 if (r < 0)
da927ba9 357 log_error_errno(r, "Failed to sync user journal: %m");
26687bf8
OS
358 }
359
f9a810be
LP
360 if (s->sync_event_source) {
361 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
362 if (r < 0)
da927ba9 363 log_error_errno(r, "Failed to disable sync timer source: %m");
f9a810be 364 }
26687bf8
OS
365
366 s->sync_scheduled = false;
367}
368
63c8666b
ZJS
369static void do_vacuum(Server *s, char *ids, JournalFile *f, const char* path,
370 JournalMetrics *metrics) {
371 char *p;
372 int r;
373
374 if (!f)
375 return;
376
377 p = strappenda(path, ids);
dbd2a83f 378 r = journal_directory_vacuum(p, metrics->max_use, s->max_retention_usec, &s->oldest_file_usec, false);
63c8666b 379 if (r < 0 && r != -ENOENT)
da927ba9 380 log_error_errno(r, "Failed to vacuum %s: %m", p);
63c8666b
ZJS
381}
382
d025f1e4 383void server_vacuum(Server *s) {
d025f1e4
ZJS
384 char ids[33];
385 sd_id128_t machine;
386 int r;
387
388 log_debug("Vacuuming...");
389
390 s->oldest_file_usec = 0;
391
392 r = sd_id128_get_machine(&machine);
393 if (r < 0) {
da927ba9 394 log_error_errno(r, "Failed to get machine ID: %m");
d025f1e4
ZJS
395 return;
396 }
d025f1e4
ZJS
397 sd_id128_to_string(machine, ids);
398
63c8666b
ZJS
399 do_vacuum(s, ids, s->system_journal, "/var/log/journal/", &s->system_metrics);
400 do_vacuum(s, ids, s->runtime_journal, "/run/log/journal/", &s->runtime_metrics);
d025f1e4
ZJS
401
402 s->cached_available_space_timestamp = 0;
403}
404
0c24bb23
LP
405static void server_cache_machine_id(Server *s) {
406 sd_id128_t id;
407 int r;
408
409 assert(s);
410
411 r = sd_id128_get_machine(&id);
412 if (r < 0)
413 return;
414
415 sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
416}
417
418static void server_cache_boot_id(Server *s) {
419 sd_id128_t id;
420 int r;
421
422 assert(s);
423
424 r = sd_id128_get_boot(&id);
425 if (r < 0)
426 return;
427
428 sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
429}
430
431static void server_cache_hostname(Server *s) {
432 _cleanup_free_ char *t = NULL;
433 char *x;
434
435 assert(s);
436
437 t = gethostname_malloc();
438 if (!t)
439 return;
440
441 x = strappend("_HOSTNAME=", t);
442 if (!x)
443 return;
444
445 free(s->hostname_field);
446 s->hostname_field = x;
447}
448
8531ae70 449static bool shall_try_append_again(JournalFile *f, int r) {
d025f1e4
ZJS
450
451 /* -E2BIG Hit configured limit
452 -EFBIG Hit fs limit
453 -EDQUOT Quota limit hit
454 -ENOSPC Disk full
fa6ac760 455 -EIO I/O error of some kind (mmap)
d025f1e4
ZJS
456 -EHOSTDOWN Other machine
457 -EBUSY Unclean shutdown
458 -EPROTONOSUPPORT Unsupported feature
459 -EBADMSG Corrupted
460 -ENODATA Truncated
461 -ESHUTDOWN Already archived */
462
463 if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
464 log_debug("%s: Allocation limit reached, rotating.", f->path);
465 else if (r == -EHOSTDOWN)
466 log_info("%s: Journal file from other machine, rotating.", f->path);
467 else if (r == -EBUSY)
468 log_info("%s: Unclean shutdown, rotating.", f->path);
469 else if (r == -EPROTONOSUPPORT)
470 log_info("%s: Unsupported feature, rotating.", f->path);
471 else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
472 log_warning("%s: Journal file corrupted, rotating.", f->path);
fa6ac760
LP
473 else if (r == -EIO)
474 log_warning("%s: IO error, rotating.", f->path);
d025f1e4
ZJS
475 else
476 return false;
477
478 return true;
479}
480
d07f7b9e 481static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
d025f1e4
ZJS
482 JournalFile *f;
483 bool vacuumed = false;
484 int r;
485
486 assert(s);
487 assert(iovec);
488 assert(n > 0);
489
490 f = find_journal(s, uid);
491 if (!f)
492 return;
493
494 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
495 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
496 server_rotate(s);
497 server_vacuum(s);
498 vacuumed = true;
499
500 f = find_journal(s, uid);
501 if (!f)
502 return;
503 }
504
505 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
26687bf8 506 if (r >= 0) {
d07f7b9e 507 server_schedule_sync(s, priority);
d025f1e4 508 return;
26687bf8 509 }
d025f1e4
ZJS
510
511 if (vacuumed || !shall_try_append_again(f, r)) {
e40ec7ae
ZJS
512 size_t size = 0;
513 unsigned i;
514 for (i = 0; i < n; i++)
515 size += iovec[i].iov_len;
516
da927ba9 517 log_error_errno(r, "Failed to write entry (%d items, %zu bytes), ignoring: %m", n, size);
d025f1e4
ZJS
518 return;
519 }
520
521 server_rotate(s);
522 server_vacuum(s);
523
524 f = find_journal(s, uid);
525 if (!f)
526 return;
527
528 log_debug("Retrying write.");
529 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
e40ec7ae
ZJS
530 if (r < 0) {
531 size_t size = 0;
532 unsigned i;
533 for (i = 0; i < n; i++)
534 size += iovec[i].iov_len;
535
da927ba9 536 log_error_errno(r, "Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %m", n, size);
d07f7b9e
LP
537 } else
538 server_schedule_sync(s, priority);
d025f1e4
ZJS
539}
540
541static void dispatch_message_real(
542 Server *s,
543 struct iovec *iovec, unsigned n, unsigned m,
3b3154df
LP
544 const struct ucred *ucred,
545 const struct timeval *tv,
d025f1e4 546 const char *label, size_t label_len,
968f3196 547 const char *unit_id,
d07f7b9e 548 int priority,
968f3196 549 pid_t object_pid) {
d025f1e4 550
968f3196 551 char pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
ae018d9b
LP
552 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
553 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
554 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
d3789917 555 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
968f3196
ZJS
556 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
557 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
558 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
559 uid_t object_uid;
560 gid_t object_gid;
968f3196 561 char *x;
d025f1e4 562 int r;
ae018d9b 563 char *t, *c;
82499507
LP
564 uid_t realuid = 0, owner = 0, journal_uid;
565 bool owner_valid = false;
ae018d9b 566#ifdef HAVE_AUDIT
968f3196
ZJS
567 char audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
568 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
569 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
570 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
ae018d9b
LP
571
572 uint32_t audit;
573 uid_t loginuid;
574#endif
d025f1e4
ZJS
575
576 assert(s);
577 assert(iovec);
578 assert(n > 0);
968f3196 579 assert(n + N_IOVEC_META_FIELDS + (object_pid ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
d025f1e4
ZJS
580
581 if (ucred) {
d025f1e4
ZJS
582 realuid = ucred->uid;
583
de0671ee 584 sprintf(pid, "_PID="PID_FMT, ucred->pid);
c2457105 585 IOVEC_SET_STRING(iovec[n++], pid);
d025f1e4 586
de0671ee 587 sprintf(uid, "_UID="UID_FMT, ucred->uid);
c2457105 588 IOVEC_SET_STRING(iovec[n++], uid);
d025f1e4 589
de0671ee 590 sprintf(gid, "_GID="GID_FMT, ucred->gid);
c2457105 591 IOVEC_SET_STRING(iovec[n++], gid);
d025f1e4
ZJS
592
593 r = get_process_comm(ucred->pid, &t);
594 if (r >= 0) {
968f3196 595 x = strappenda("_COMM=", t);
d025f1e4 596 free(t);
968f3196 597 IOVEC_SET_STRING(iovec[n++], x);
d025f1e4
ZJS
598 }
599
600 r = get_process_exe(ucred->pid, &t);
601 if (r >= 0) {
968f3196 602 x = strappenda("_EXE=", t);
d025f1e4 603 free(t);
968f3196 604 IOVEC_SET_STRING(iovec[n++], x);
d025f1e4
ZJS
605 }
606
9bdbc2e2 607 r = get_process_cmdline(ucred->pid, 0, false, &t);
d025f1e4 608 if (r >= 0) {
968f3196 609 x = strappenda("_CMDLINE=", t);
d025f1e4 610 free(t);
3a832116
SL
611 IOVEC_SET_STRING(iovec[n++], x);
612 }
613
614 r = get_process_capeff(ucred->pid, &t);
615 if (r >= 0) {
616 x = strappenda("_CAP_EFFECTIVE=", t);
617 free(t);
968f3196 618 IOVEC_SET_STRING(iovec[n++], x);
d025f1e4
ZJS
619 }
620
0a20e3c1 621#ifdef HAVE_AUDIT
d025f1e4 622 r = audit_session_from_pid(ucred->pid, &audit);
ae018d9b 623 if (r >= 0) {
de0671ee 624 sprintf(audit_session, "_AUDIT_SESSION=%"PRIu32, audit);
ae018d9b
LP
625 IOVEC_SET_STRING(iovec[n++], audit_session);
626 }
d025f1e4
ZJS
627
628 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
7027ff61 629 if (r >= 0) {
de0671ee 630 sprintf(audit_loginuid, "_AUDIT_LOGINUID="UID_FMT, loginuid);
ae018d9b 631 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
d025f1e4 632 }
ae018d9b 633#endif
d025f1e4 634
e9174f29 635 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &c);
7027ff61 636 if (r >= 0) {
968f3196
ZJS
637 char *session = NULL;
638
639 x = strappenda("_SYSTEMD_CGROUP=", c);
640 IOVEC_SET_STRING(iovec[n++], x);
d025f1e4 641
ae018d9b
LP
642 r = cg_path_get_session(c, &t);
643 if (r >= 0) {
644 session = strappenda("_SYSTEMD_SESSION=", t);
645 free(t);
d025f1e4 646 IOVEC_SET_STRING(iovec[n++], session);
ae018d9b
LP
647 }
648
649 if (cg_path_get_owner_uid(c, &owner) >= 0) {
650 owner_valid = true;
d025f1e4 651
de0671ee 652 sprintf(owner_uid, "_SYSTEMD_OWNER_UID="UID_FMT, owner);
d025f1e4 653 IOVEC_SET_STRING(iovec[n++], owner_uid);
ae018d9b 654 }
d025f1e4 655
ae018d9b 656 if (cg_path_get_unit(c, &t) >= 0) {
968f3196 657 x = strappenda("_SYSTEMD_UNIT=", t);
ae018d9b 658 free(t);
19cace37
LP
659 IOVEC_SET_STRING(iovec[n++], x);
660 } else if (unit_id && !session) {
661 x = strappenda("_SYSTEMD_UNIT=", unit_id);
662 IOVEC_SET_STRING(iovec[n++], x);
663 }
664
665 if (cg_path_get_user_unit(c, &t) >= 0) {
968f3196 666 x = strappenda("_SYSTEMD_USER_UNIT=", t);
ae018d9b 667 free(t);
968f3196 668 IOVEC_SET_STRING(iovec[n++], x);
19cace37
LP
669 } else if (unit_id && session) {
670 x = strappenda("_SYSTEMD_USER_UNIT=", unit_id);
671 IOVEC_SET_STRING(iovec[n++], x);
672 }
ae018d9b 673
0a244b8e
LP
674 if (cg_path_get_slice(c, &t) >= 0) {
675 x = strappenda("_SYSTEMD_SLICE=", t);
676 free(t);
677 IOVEC_SET_STRING(iovec[n++], x);
678 }
679
ae018d9b 680 free(c);
2d43b190
DM
681 } else if (unit_id) {
682 x = strappenda("_SYSTEMD_UNIT=", unit_id);
683 IOVEC_SET_STRING(iovec[n++], x);
ef1673d1 684 }
d025f1e4 685
d025f1e4 686#ifdef HAVE_SELINUX
6baa7db0 687 if (mac_selinux_use()) {
d682b3a7 688 if (label) {
f8294e41 689 x = alloca(strlen("_SELINUX_CONTEXT=") + label_len + 1);
ae018d9b 690
d682b3a7
LP
691 *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
692 IOVEC_SET_STRING(iovec[n++], x);
693 } else {
694 security_context_t con;
d025f1e4 695
d682b3a7
LP
696 if (getpidcon(ucred->pid, &con) >= 0) {
697 x = strappenda("_SELINUX_CONTEXT=", con);
e7ff4e7f 698
d682b3a7
LP
699 freecon(con);
700 IOVEC_SET_STRING(iovec[n++], x);
701 }
d025f1e4
ZJS
702 }
703 }
704#endif
705 }
968f3196
ZJS
706 assert(n <= m);
707
708 if (object_pid) {
709 r = get_process_uid(object_pid, &object_uid);
710 if (r >= 0) {
de0671ee 711 sprintf(o_uid, "OBJECT_UID="UID_FMT, object_uid);
968f3196
ZJS
712 IOVEC_SET_STRING(iovec[n++], o_uid);
713 }
714
715 r = get_process_gid(object_pid, &object_gid);
716 if (r >= 0) {
de0671ee 717 sprintf(o_gid, "OBJECT_GID="GID_FMT, object_gid);
968f3196
ZJS
718 IOVEC_SET_STRING(iovec[n++], o_gid);
719 }
720
721 r = get_process_comm(object_pid, &t);
722 if (r >= 0) {
723 x = strappenda("OBJECT_COMM=", t);
724 free(t);
725 IOVEC_SET_STRING(iovec[n++], x);
726 }
727
728 r = get_process_exe(object_pid, &t);
729 if (r >= 0) {
730 x = strappenda("OBJECT_EXE=", t);
731 free(t);
732 IOVEC_SET_STRING(iovec[n++], x);
733 }
734
735 r = get_process_cmdline(object_pid, 0, false, &t);
736 if (r >= 0) {
737 x = strappenda("OBJECT_CMDLINE=", t);
738 free(t);
739 IOVEC_SET_STRING(iovec[n++], x);
740 }
741
742#ifdef HAVE_AUDIT
743 r = audit_session_from_pid(object_pid, &audit);
744 if (r >= 0) {
de0671ee 745 sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%"PRIu32, audit);
968f3196
ZJS
746 IOVEC_SET_STRING(iovec[n++], o_audit_session);
747 }
748
749 r = audit_loginuid_from_pid(object_pid, &loginuid);
750 if (r >= 0) {
de0671ee 751 sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID="UID_FMT, loginuid);
968f3196
ZJS
752 IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
753 }
754#endif
755
e9174f29 756 r = cg_pid_get_path_shifted(object_pid, s->cgroup_root, &c);
968f3196
ZJS
757 if (r >= 0) {
758 x = strappenda("OBJECT_SYSTEMD_CGROUP=", c);
759 IOVEC_SET_STRING(iovec[n++], x);
760
761 r = cg_path_get_session(c, &t);
762 if (r >= 0) {
763 x = strappenda("OBJECT_SYSTEMD_SESSION=", t);
764 free(t);
765 IOVEC_SET_STRING(iovec[n++], x);
766 }
767
768 if (cg_path_get_owner_uid(c, &owner) >= 0) {
de0671ee 769 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID="UID_FMT, owner);
968f3196
ZJS
770 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
771 }
772
773 if (cg_path_get_unit(c, &t) >= 0) {
774 x = strappenda("OBJECT_SYSTEMD_UNIT=", t);
775 free(t);
19cace37
LP
776 IOVEC_SET_STRING(iovec[n++], x);
777 }
778
779 if (cg_path_get_user_unit(c, &t) >= 0) {
968f3196
ZJS
780 x = strappenda("OBJECT_SYSTEMD_USER_UNIT=", t);
781 free(t);
968f3196 782 IOVEC_SET_STRING(iovec[n++], x);
19cace37 783 }
968f3196
ZJS
784
785 free(c);
786 }
787 }
788 assert(n <= m);
d025f1e4
ZJS
789
790 if (tv) {
ae018d9b 791 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv));
a5693989 792 IOVEC_SET_STRING(iovec[n++], source_time);
d025f1e4
ZJS
793 }
794
795 /* Note that strictly speaking storing the boot id here is
796 * redundant since the entry includes this in-line
797 * anyway. However, we need this indexed, too. */
0c24bb23
LP
798 if (!isempty(s->boot_id_field))
799 IOVEC_SET_STRING(iovec[n++], s->boot_id_field);
d025f1e4 800
0c24bb23
LP
801 if (!isempty(s->machine_id_field))
802 IOVEC_SET_STRING(iovec[n++], s->machine_id_field);
d025f1e4 803
0c24bb23
LP
804 if (!isempty(s->hostname_field))
805 IOVEC_SET_STRING(iovec[n++], s->hostname_field);
d025f1e4
ZJS
806
807 assert(n <= m);
808
da499392 809 if (s->split_mode == SPLIT_UID && realuid > 0)
40adcda8 810 /* Split up strictly by any UID */
759c945a 811 journal_uid = realuid;
82499507 812 else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
edc3797f
LP
813 /* Split up by login UIDs. We do this only if the
814 * realuid is not root, in order not to accidentally
815 * leak privileged information to the user that is
816 * logged by a privileged process that is part of an
7517e174 817 * unprivileged session. */
8a0889df 818 journal_uid = owner;
da499392
KS
819 else
820 journal_uid = 0;
759c945a 821
d07f7b9e 822 write_to_journal(s, journal_uid, iovec, n, priority);
d025f1e4
ZJS
823}
824
825void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
826 char mid[11 + 32 + 1];
827 char buffer[16 + LINE_MAX + 1];
828 struct iovec iovec[N_IOVEC_META_FIELDS + 4];
829 int n = 0;
830 va_list ap;
b92bea5d 831 struct ucred ucred = {};
d025f1e4
ZJS
832
833 assert(s);
834 assert(format);
835
836 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
837 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
838
839 memcpy(buffer, "MESSAGE=", 8);
840 va_start(ap, format);
841 vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
842 va_end(ap);
843 char_array_0(buffer);
844 IOVEC_SET_STRING(iovec[n++], buffer);
845
846 if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
e2cc6eca 847 snprintf(mid, sizeof(mid), LOG_MESSAGE_ID(message_id));
d025f1e4
ZJS
848 char_array_0(mid);
849 IOVEC_SET_STRING(iovec[n++], mid);
850 }
851
d025f1e4
ZJS
852 ucred.pid = getpid();
853 ucred.uid = getuid();
854 ucred.gid = getgid();
855
d07f7b9e 856 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
d025f1e4
ZJS
857}
858
859void server_dispatch_message(
860 Server *s,
861 struct iovec *iovec, unsigned n, unsigned m,
3b3154df
LP
862 const struct ucred *ucred,
863 const struct timeval *tv,
d025f1e4
ZJS
864 const char *label, size_t label_len,
865 const char *unit_id,
968f3196
ZJS
866 int priority,
867 pid_t object_pid) {
d025f1e4 868
7027ff61 869 int rl, r;
7fd1b19b 870 _cleanup_free_ char *path = NULL;
db91ea32 871 char *c;
d025f1e4
ZJS
872
873 assert(s);
874 assert(iovec || n == 0);
875
876 if (n == 0)
877 return;
878
879 if (LOG_PRI(priority) > s->max_level_store)
880 return;
881
2f5df74a
HHPF
882 /* Stop early in case the information will not be stored
883 * in a journal. */
884 if (s->storage == STORAGE_NONE)
885 return;
886
d025f1e4
ZJS
887 if (!ucred)
888 goto finish;
889
e9174f29 890 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &path);
7027ff61 891 if (r < 0)
d025f1e4
ZJS
892 goto finish;
893
894 /* example: /user/lennart/3/foobar
895 * /system/dbus.service/foobar
896 *
897 * So let's cut of everything past the third /, since that is
898 * where user directories start */
899
900 c = strchr(path, '/');
901 if (c) {
902 c = strchr(c+1, '/');
903 if (c) {
904 c = strchr(c+1, '/');
905 if (c)
906 *c = 0;
907 }
908 }
909
db91ea32 910 rl = journal_rate_limit_test(s->rate_limit, path,
670b110c 911 priority & LOG_PRIMASK, available_space(s, false));
d025f1e4 912
db91ea32 913 if (rl == 0)
d025f1e4 914 return;
d025f1e4
ZJS
915
916 /* Write a suppression message if we suppressed something */
917 if (rl > 1)
db91ea32
ZJS
918 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
919 "Suppressed %u messages from %s", rl - 1, path);
d025f1e4
ZJS
920
921finish:
d07f7b9e 922 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, priority, object_pid);
d025f1e4
ZJS
923}
924
925
caa2f4c0 926static int system_journal_open(Server *s, bool flush_requested) {
d025f1e4
ZJS
927 int r;
928 char *fn;
929 sd_id128_t machine;
930 char ids[33];
931
932 r = sd_id128_get_machine(&machine);
23bbb0de
MS
933 if (r < 0)
934 return log_error_errno(r, "Failed to get machine id: %m");
d025f1e4
ZJS
935
936 sd_id128_to_string(machine, ids);
937
938 if (!s->system_journal &&
939 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
caa2f4c0
ZJS
940 (flush_requested
941 || access("/run/systemd/journal/flushed", F_OK) >= 0)) {
d025f1e4
ZJS
942
943 /* If in auto mode: first try to create the machine
944 * path, but not the prefix.
945 *
946 * If in persistent mode: create /var/log/journal and
947 * the machine path */
948
949 if (s->storage == STORAGE_PERSISTENT)
950 (void) mkdir("/var/log/journal/", 0755);
951
e40ec7ae 952 fn = strappenda("/var/log/journal/", ids);
d025f1e4 953 (void) mkdir(fn, 0755);
d025f1e4 954
e40ec7ae 955 fn = strappenda(fn, "/system.journal");
d025f1e4 956 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
d025f1e4 957
670b110c 958 if (r >= 0)
d025f1e4 959 server_fix_perms(s, s->system_journal, 0);
433dd100
LN
960 else if (r < 0) {
961 if (r != -ENOENT && r != -EROFS)
da927ba9 962 log_warning_errno(r, "Failed to open system journal: %m");
e40ec7ae 963
433dd100
LN
964 r = 0;
965 }
d025f1e4
ZJS
966 }
967
968 if (!s->runtime_journal &&
969 (s->storage != STORAGE_NONE)) {
970
971 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
972 if (!fn)
973 return -ENOMEM;
974
975 if (s->system_journal) {
976
977 /* Try to open the runtime journal, but only
978 * if it already exists, so that we can flush
979 * it into the system journal */
980
981 r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
982 free(fn);
983
984 if (r < 0) {
985 if (r != -ENOENT)
da927ba9 986 log_warning_errno(r, "Failed to open runtime journal: %m");
d025f1e4
ZJS
987
988 r = 0;
989 }
990
991 } else {
992
993 /* OK, we really need the runtime journal, so create
994 * it if necessary. */
995
fc1d70af
LP
996 (void) mkdir("/run/log", 0755);
997 (void) mkdir("/run/log/journal", 0755);
998 (void) mkdir_parents(fn, 0750);
999
d025f1e4
ZJS
1000 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
1001 free(fn);
1002
23bbb0de
MS
1003 if (r < 0)
1004 return log_error_errno(r, "Failed to open runtime journal: %m");
d025f1e4
ZJS
1005 }
1006
670b110c 1007 if (s->runtime_journal)
d025f1e4 1008 server_fix_perms(s, s->runtime_journal, 0);
d025f1e4
ZJS
1009 }
1010
670b110c
ZJS
1011 available_space(s, true);
1012
d025f1e4
ZJS
1013 return r;
1014}
1015
1016int server_flush_to_var(Server *s) {
d025f1e4
ZJS
1017 sd_id128_t machine;
1018 sd_journal *j = NULL;
fbb63411
LP
1019 char ts[FORMAT_TIMESPAN_MAX];
1020 usec_t start;
1021 unsigned n = 0;
1022 int r;
d025f1e4
ZJS
1023
1024 assert(s);
1025
1026 if (s->storage != STORAGE_AUTO &&
1027 s->storage != STORAGE_PERSISTENT)
1028 return 0;
1029
1030 if (!s->runtime_journal)
1031 return 0;
1032
caa2f4c0 1033 system_journal_open(s, true);
d025f1e4
ZJS
1034
1035 if (!s->system_journal)
1036 return 0;
1037
1038 log_debug("Flushing to /var...");
1039
fbb63411
LP
1040 start = now(CLOCK_MONOTONIC);
1041
d025f1e4 1042 r = sd_id128_get_machine(&machine);
00a16861 1043 if (r < 0)
d025f1e4 1044 return r;
d025f1e4
ZJS
1045
1046 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
23bbb0de
MS
1047 if (r < 0)
1048 return log_error_errno(r, "Failed to read runtime journal: %m");
d025f1e4 1049
93b73b06
LP
1050 sd_journal_set_data_threshold(j, 0);
1051
d025f1e4
ZJS
1052 SD_JOURNAL_FOREACH(j) {
1053 Object *o = NULL;
1054 JournalFile *f;
1055
1056 f = j->current_file;
1057 assert(f && f->current_offset > 0);
1058
fbb63411
LP
1059 n++;
1060
d025f1e4
ZJS
1061 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1062 if (r < 0) {
da927ba9 1063 log_error_errno(r, "Can't read entry: %m");
d025f1e4
ZJS
1064 goto finish;
1065 }
1066
1067 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1068 if (r >= 0)
1069 continue;
1070
1071 if (!shall_try_append_again(s->system_journal, r)) {
da927ba9 1072 log_error_errno(r, "Can't write entry: %m");
d025f1e4
ZJS
1073 goto finish;
1074 }
1075
1076 server_rotate(s);
1077 server_vacuum(s);
1078
253f59df
LP
1079 if (!s->system_journal) {
1080 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1081 r = -EIO;
1082 goto finish;
1083 }
1084
d025f1e4
ZJS
1085 log_debug("Retrying write.");
1086 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1087 if (r < 0) {
da927ba9 1088 log_error_errno(r, "Can't write entry: %m");
d025f1e4
ZJS
1089 goto finish;
1090 }
1091 }
1092
1093finish:
1094 journal_file_post_change(s->system_journal);
1095
1096 journal_file_close(s->runtime_journal);
1097 s->runtime_journal = NULL;
1098
1099 if (r >= 0)
1100 rm_rf("/run/log/journal", false, true, false);
1101
763c7aa2 1102 sd_journal_close(j);
d025f1e4 1103
fbb63411
LP
1104 server_driver_message(s, SD_ID128_NULL, "Time spent on flushing to /var is %s for %u entries.", format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0), n);
1105
d025f1e4
ZJS
1106 return r;
1107}
1108
8531ae70 1109int server_process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
f9a810be
LP
1110 Server *s = userdata;
1111
d025f1e4 1112 assert(s);
875c2e22 1113 assert(fd == s->native_fd || fd == s->syslog_fd || fd == s->audit_fd);
f9a810be
LP
1114
1115 if (revents != EPOLLIN) {
1116 log_error("Got invalid event from epoll for datagram fd: %"PRIx32, revents);
1117 return -EIO;
1118 }
1119
1120 for (;;) {
1121 struct ucred *ucred = NULL;
1122 struct timeval *tv = NULL;
1123 struct cmsghdr *cmsg;
1124 char *label = NULL;
1125 size_t label_len = 0;
1126 struct iovec iovec;
1127
1128 union {
1129 struct cmsghdr cmsghdr;
1130
bdd13f6b
ZJS
1131 /* We use NAME_MAX space for the SELinux label
1132 * here. The kernel currently enforces no
1133 * limit, but according to suggestions from
1134 * the SELinux people this will change and it
1135 * will probably be identical to NAME_MAX. For
1136 * now we use that, but this should be updated
7517e174 1137 * one day when the final limit is known. */
f9a810be
LP
1138 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1139 CMSG_SPACE(sizeof(struct timeval)) +
1140 CMSG_SPACE(sizeof(int)) + /* fd */
1141 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1142 } control = {};
875c2e22 1143 union sockaddr_union sa = {};
f9a810be
LP
1144 struct msghdr msghdr = {
1145 .msg_iov = &iovec,
1146 .msg_iovlen = 1,
1147 .msg_control = &control,
1148 .msg_controllen = sizeof(control),
875c2e22
LP
1149 .msg_name = &sa,
1150 .msg_namelen = sizeof(sa),
f9a810be 1151 };
d025f1e4 1152
d025f1e4 1153 ssize_t n;
f9a810be
LP
1154 int *fds = NULL;
1155 unsigned n_fds = 0;
875c2e22
LP
1156 int v = 0;
1157 size_t m;
d025f1e4 1158
875c2e22
LP
1159 /* Try to get the right size, if we can. (Not all
1160 * sockets support SIOCINQ, hence we just try, but
1161 * don't rely on it. */
1162 (void) ioctl(fd, SIOCINQ, &v);
1163
7517e174 1164 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
875c2e22
LP
1165 m = PAGE_ALIGN(MAX3((size_t) v + 1,
1166 (size_t) LINE_MAX,
1167 ALIGN(sizeof(struct nlmsghdr)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH)) + 1);
d025f1e4 1168
875c2e22 1169 if (!GREEDY_REALLOC(s->buffer, s->buffer_size, m))
f9a810be 1170 return log_oom();
d025f1e4 1171
f9a810be 1172 iovec.iov_base = s->buffer;
875c2e22 1173 iovec.iov_len = s->buffer_size - 1; /* Leave room for trailing NUL we add later */
d025f1e4 1174
f9a810be
LP
1175 n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1176 if (n < 0) {
d025f1e4 1177 if (errno == EINTR || errno == EAGAIN)
f9a810be 1178 return 0;
d025f1e4 1179
56f64d95 1180 log_error_errno(errno, "recvmsg() failed: %m");
d025f1e4
ZJS
1181 return -errno;
1182 }
1183
f9a810be
LP
1184 for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1185
1186 if (cmsg->cmsg_level == SOL_SOCKET &&
1187 cmsg->cmsg_type == SCM_CREDENTIALS &&
1188 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1189 ucred = (struct ucred*) CMSG_DATA(cmsg);
1190 else if (cmsg->cmsg_level == SOL_SOCKET &&
1191 cmsg->cmsg_type == SCM_SECURITY) {
1192 label = (char*) CMSG_DATA(cmsg);
1193 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1194 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1195 cmsg->cmsg_type == SO_TIMESTAMP &&
1196 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1197 tv = (struct timeval*) CMSG_DATA(cmsg);
1198 else if (cmsg->cmsg_level == SOL_SOCKET &&
1199 cmsg->cmsg_type == SCM_RIGHTS) {
1200 fds = (int*) CMSG_DATA(cmsg);
1201 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1202 }
d025f1e4
ZJS
1203 }
1204
875c2e22
LP
1205 /* And a trailing NUL, just in case */
1206 s->buffer[n] = 0;
1207
f9a810be 1208 if (fd == s->syslog_fd) {
875c2e22 1209 if (n > 0 && n_fds == 0)
f9a810be 1210 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
875c2e22 1211 else if (n_fds > 0)
f9a810be 1212 log_warning("Got file descriptors via syslog socket. Ignoring.");
d025f1e4 1213
875c2e22 1214 } else if (fd == s->native_fd) {
f9a810be
LP
1215 if (n > 0 && n_fds == 0)
1216 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1217 else if (n == 0 && n_fds == 1)
1218 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1219 else if (n_fds > 0)
1220 log_warning("Got too many file descriptors via native socket. Ignoring.");
875c2e22
LP
1221
1222 } else {
1223 assert(fd == s->audit_fd);
1224
1225 if (n > 0 && n_fds == 0)
0b97208d 1226 server_process_audit_message(s, s->buffer, n, ucred, &sa, msghdr.msg_namelen);
875c2e22
LP
1227 else if (n_fds > 0)
1228 log_warning("Got file descriptors via audit socket. Ignoring.");
d025f1e4
ZJS
1229 }
1230
f9a810be
LP
1231 close_many(fds, n_fds);
1232 }
f9a810be 1233}
d025f1e4 1234
f9a810be
LP
1235static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1236 Server *s = userdata;
d025f1e4 1237
f9a810be 1238 assert(s);
d025f1e4 1239
f9a810be 1240 log_info("Received request to flush runtime journal from PID %"PRIu32, si->ssi_pid);
d025f1e4 1241
f9a810be
LP
1242 server_flush_to_var(s);
1243 server_sync(s);
3bfd4e0c 1244 server_vacuum(s);
d025f1e4 1245
74055aa7
LP
1246 touch("/run/systemd/journal/flushed");
1247
f9a810be
LP
1248 return 0;
1249}
d025f1e4 1250
f9a810be
LP
1251static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1252 Server *s = userdata;
d025f1e4 1253
f9a810be 1254 assert(s);
d025f1e4 1255
f9a810be
LP
1256 log_info("Received request to rotate journal from PID %"PRIu32, si->ssi_pid);
1257 server_rotate(s);
1258 server_vacuum(s);
d025f1e4 1259
f9a810be
LP
1260 return 0;
1261}
d025f1e4 1262
f9a810be
LP
1263static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1264 Server *s = userdata;
d025f1e4 1265
f9a810be 1266 assert(s);
d025f1e4 1267
4daf54a8 1268 log_received_signal(LOG_INFO, si);
d025f1e4 1269
6203e07a 1270 sd_event_exit(s->event, 0);
d025f1e4
ZJS
1271 return 0;
1272}
1273
f9a810be 1274static int setup_signals(Server *s) {
d025f1e4 1275 sigset_t mask;
f9a810be 1276 int r;
d025f1e4
ZJS
1277
1278 assert(s);
1279
1280 assert_se(sigemptyset(&mask) == 0);
1281 sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1282 assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1283
151b9b96 1284 r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
f9a810be
LP
1285 if (r < 0)
1286 return r;
1287
151b9b96 1288 r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s);
f9a810be
LP
1289 if (r < 0)
1290 return r;
d025f1e4 1291
151b9b96 1292 r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s);
f9a810be
LP
1293 if (r < 0)
1294 return r;
d025f1e4 1295
151b9b96 1296 r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s);
f9a810be
LP
1297 if (r < 0)
1298 return r;
d025f1e4
ZJS
1299
1300 return 0;
1301}
1302
1303static int server_parse_proc_cmdline(Server *s) {
7fd1b19b 1304 _cleanup_free_ char *line = NULL;
a2a5291b 1305 const char *w, *state;
d025f1e4 1306 size_t l;
74df0fca 1307 int r;
d025f1e4 1308
74df0fca 1309 r = proc_cmdline(&line);
b5884878 1310 if (r < 0) {
da927ba9 1311 log_warning_errno(r, "Failed to read /proc/cmdline, ignoring: %m");
d025f1e4 1312 return 0;
b5884878 1313 }
d025f1e4
ZJS
1314
1315 FOREACH_WORD_QUOTED(w, l, line, state) {
7fd1b19b 1316 _cleanup_free_ char *word;
d025f1e4
ZJS
1317
1318 word = strndup(w, l);
db91ea32
ZJS
1319 if (!word)
1320 return -ENOMEM;
d025f1e4
ZJS
1321
1322 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1323 r = parse_boolean(word + 35);
1324 if (r < 0)
1325 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1326 else
1327 s->forward_to_syslog = r;
1328 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1329 r = parse_boolean(word + 33);
1330 if (r < 0)
1331 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1332 else
1333 s->forward_to_kmsg = r;
1334 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1335 r = parse_boolean(word + 36);
1336 if (r < 0)
1337 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1338 else
1339 s->forward_to_console = r;
40b71e89
ST
1340 } else if (startswith(word, "systemd.journald.forward_to_wall=")) {
1341 r = parse_boolean(word + 33);
1342 if (r < 0)
1343 log_warning("Failed to parse forward to wall switch %s. Ignoring.", word + 33);
1344 else
1345 s->forward_to_wall = r;
d025f1e4
ZJS
1346 } else if (startswith(word, "systemd.journald"))
1347 log_warning("Invalid systemd.journald parameter. Ignoring.");
d025f1e4 1348 }
b2fadec6 1349 /* do not warn about state here, since probably systemd already did */
d025f1e4 1350
db91ea32 1351 return 0;
d025f1e4
ZJS
1352}
1353
1354static int server_parse_config_file(Server *s) {
d025f1e4
ZJS
1355 assert(s);
1356
a9edaeff
JT
1357 return config_parse_many("/etc/systemd/journald.conf",
1358 CONF_DIRS_NULSTR("systemd/journald.conf"),
1359 "Journal\0",
1360 config_item_perf_lookup, journald_gperf_lookup,
1361 false, s);
d025f1e4
ZJS
1362}
1363
f9a810be
LP
1364static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
1365 Server *s = userdata;
26687bf8
OS
1366
1367 assert(s);
1368
f9a810be 1369 server_sync(s);
26687bf8
OS
1370 return 0;
1371}
1372
d07f7b9e 1373int server_schedule_sync(Server *s, int priority) {
26687bf8
OS
1374 int r;
1375
26687bf8
OS
1376 assert(s);
1377
d07f7b9e
LP
1378 if (priority <= LOG_CRIT) {
1379 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1380 server_sync(s);
1381 return 0;
1382 }
1383
26687bf8
OS
1384 if (s->sync_scheduled)
1385 return 0;
1386
f9a810be
LP
1387 if (s->sync_interval_usec > 0) {
1388 usec_t when;
ca267016 1389
6a0f1f6d 1390 r = sd_event_now(s->event, CLOCK_MONOTONIC, &when);
f9a810be
LP
1391 if (r < 0)
1392 return r;
26687bf8 1393
f9a810be
LP
1394 when += s->sync_interval_usec;
1395
1396 if (!s->sync_event_source) {
6a0f1f6d
LP
1397 r = sd_event_add_time(
1398 s->event,
1399 &s->sync_event_source,
1400 CLOCK_MONOTONIC,
1401 when, 0,
1402 server_dispatch_sync, s);
f9a810be
LP
1403 if (r < 0)
1404 return r;
1405
1406 r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
1407 } else {
1408 r = sd_event_source_set_time(s->sync_event_source, when);
1409 if (r < 0)
1410 return r;
1411
1412 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
1413 }
26687bf8 1414 if (r < 0)
f9a810be 1415 return r;
26687bf8 1416
f9a810be
LP
1417 s->sync_scheduled = true;
1418 }
26687bf8
OS
1419
1420 return 0;
1421}
1422
0c24bb23
LP
1423static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1424 Server *s = userdata;
1425
1426 assert(s);
1427
1428 server_cache_hostname(s);
1429 return 0;
1430}
1431
1432static int server_open_hostname(Server *s) {
1433 int r;
1434
1435 assert(s);
1436
1437 s->hostname_fd = open("/proc/sys/kernel/hostname", O_RDONLY|O_CLOEXEC|O_NDELAY|O_NOCTTY);
4a62c710
MS
1438 if (s->hostname_fd < 0)
1439 return log_error_errno(errno, "Failed to open /proc/sys/kernel/hostname: %m");
0c24bb23 1440
151b9b96 1441 r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s);
0c24bb23 1442 if (r < 0) {
28def94c
DR
1443 /* kernels prior to 3.2 don't support polling this file. Ignore
1444 * the failure. */
1445 if (r == -EPERM) {
1446 log_warning("Failed to register hostname fd in event loop: %s. Ignoring.",
1447 strerror(-r));
03e334a1 1448 s->hostname_fd = safe_close(s->hostname_fd);
28def94c
DR
1449 return 0;
1450 }
1451
23bbb0de 1452 return log_error_errno(r, "Failed to register hostname fd in event loop: %m");
0c24bb23
LP
1453 }
1454
1455 r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
23bbb0de
MS
1456 if (r < 0)
1457 return log_error_errno(r, "Failed to adjust priority of host name event source: %m");
0c24bb23
LP
1458
1459 return 0;
1460}
1461
d025f1e4
ZJS
1462int server_init(Server *s) {
1463 int n, r, fd;
1464
1465 assert(s);
1466
1467 zero(*s);
875c2e22 1468 s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->audit_fd = s->hostname_fd = -1;
d025f1e4
ZJS
1469 s->compress = true;
1470 s->seal = true;
1471
26687bf8
OS
1472 s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1473 s->sync_scheduled = false;
1474
d025f1e4
ZJS
1475 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1476 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1477
40b71e89 1478 s->forward_to_wall = true;
d025f1e4 1479
e150e820
MB
1480 s->max_file_usec = DEFAULT_MAX_FILE_USEC;
1481
d025f1e4
ZJS
1482 s->max_level_store = LOG_DEBUG;
1483 s->max_level_syslog = LOG_DEBUG;
1484 s->max_level_kmsg = LOG_NOTICE;
1485 s->max_level_console = LOG_INFO;
40b71e89 1486 s->max_level_wall = LOG_EMERG;
d025f1e4
ZJS
1487
1488 memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1489 memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1490
1491 server_parse_config_file(s);
1492 server_parse_proc_cmdline(s);
d288f79f 1493 if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
b1389b0d
ZJS
1494 log_debug("Setting both rate limit interval and burst from "USEC_FMT",%u to 0,0",
1495 s->rate_limit_interval, s->rate_limit_burst);
d288f79f
ZJS
1496 s->rate_limit_interval = s->rate_limit_burst = 0;
1497 }
d025f1e4
ZJS
1498
1499 mkdir_p("/run/systemd/journal", 0755);
1500
43cf8388 1501 s->user_journals = ordered_hashmap_new(NULL);
d025f1e4
ZJS
1502 if (!s->user_journals)
1503 return log_oom();
1504
1505 s->mmap = mmap_cache_new();
1506 if (!s->mmap)
1507 return log_oom();
1508
f9a810be 1509 r = sd_event_default(&s->event);
23bbb0de
MS
1510 if (r < 0)
1511 return log_error_errno(r, "Failed to create event loop: %m");
d025f1e4 1512
f9a810be
LP
1513 sd_event_set_watchdog(s->event, true);
1514
d025f1e4 1515 n = sd_listen_fds(true);
23bbb0de
MS
1516 if (n < 0)
1517 return log_error_errno(n, "Failed to read listening file descriptors from environment: %m");
d025f1e4
ZJS
1518
1519 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1520
1521 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1522
1523 if (s->native_fd >= 0) {
1524 log_error("Too many native sockets passed.");
1525 return -EINVAL;
1526 }
1527
1528 s->native_fd = fd;
1529
1530 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1531
1532 if (s->stdout_fd >= 0) {
1533 log_error("Too many stdout sockets passed.");
1534 return -EINVAL;
1535 }
1536
1537 s->stdout_fd = fd;
1538
03ee5c38
LP
1539 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0 ||
1540 sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/dev-log", 0) > 0) {
d025f1e4
ZJS
1541
1542 if (s->syslog_fd >= 0) {
1543 log_error("Too many /dev/log sockets passed.");
1544 return -EINVAL;
1545 }
1546
1547 s->syslog_fd = fd;
1548
875c2e22
LP
1549 } else if (sd_is_socket(fd, AF_NETLINK, SOCK_RAW, -1) > 0) {
1550
1551 if (s->audit_fd >= 0) {
1552 log_error("Too many audit sockets passed.");
1553 return -EINVAL;
1554 }
1555
1556 s->audit_fd = fd;
1557
4ec3cd73
LP
1558 } else {
1559 log_warning("Unknown socket passed as file descriptor %d, ignoring.", fd);
1560
1561 /* Let's close the fd, better be safe than
1562 sorry. The fd might reference some resource
1563 that we really want to release if we don't
1564 make use of it. */
1565
1566 safe_close(fd);
1567 }
d025f1e4
ZJS
1568 }
1569
1570 r = server_open_syslog_socket(s);
1571 if (r < 0)
1572 return r;
1573
1574 r = server_open_native_socket(s);
1575 if (r < 0)
1576 return r;
1577
1578 r = server_open_stdout_socket(s);
1579 if (r < 0)
1580 return r;
1581
1582 r = server_open_dev_kmsg(s);
1583 if (r < 0)
1584 return r;
1585
875c2e22
LP
1586 r = server_open_audit(s);
1587 if (r < 0)
1588 return r;
1589
d025f1e4
ZJS
1590 r = server_open_kernel_seqnum(s);
1591 if (r < 0)
1592 return r;
1593
0c24bb23
LP
1594 r = server_open_hostname(s);
1595 if (r < 0)
1596 return r;
1597
f9a810be 1598 r = setup_signals(s);
d025f1e4
ZJS
1599 if (r < 0)
1600 return r;
1601
1602 s->udev = udev_new();
1603 if (!s->udev)
1604 return -ENOMEM;
1605
f9a810be 1606 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
d025f1e4
ZJS
1607 if (!s->rate_limit)
1608 return -ENOMEM;
1609
e9174f29
LP
1610 r = cg_get_root_path(&s->cgroup_root);
1611 if (r < 0)
1612 return r;
1613
0c24bb23
LP
1614 server_cache_hostname(s);
1615 server_cache_boot_id(s);
1616 server_cache_machine_id(s);
1617
caa2f4c0 1618 r = system_journal_open(s, false);
d025f1e4
ZJS
1619 if (r < 0)
1620 return r;
1621
1622 return 0;
1623}
1624
1625void server_maybe_append_tags(Server *s) {
1626#ifdef HAVE_GCRYPT
1627 JournalFile *f;
1628 Iterator i;
1629 usec_t n;
1630
1631 n = now(CLOCK_REALTIME);
1632
1633 if (s->system_journal)
1634 journal_file_maybe_append_tag(s->system_journal, n);
1635
43cf8388 1636 ORDERED_HASHMAP_FOREACH(f, s->user_journals, i)
d025f1e4
ZJS
1637 journal_file_maybe_append_tag(f, n);
1638#endif
1639}
1640
1641void server_done(Server *s) {
1642 JournalFile *f;
1643 assert(s);
1644
1645 while (s->stdout_streams)
1646 stdout_stream_free(s->stdout_streams);
1647
1648 if (s->system_journal)
1649 journal_file_close(s->system_journal);
1650
1651 if (s->runtime_journal)
1652 journal_file_close(s->runtime_journal);
1653
43cf8388 1654 while ((f = ordered_hashmap_steal_first(s->user_journals)))
d025f1e4
ZJS
1655 journal_file_close(f);
1656
43cf8388 1657 ordered_hashmap_free(s->user_journals);
d025f1e4 1658
f9a810be
LP
1659 sd_event_source_unref(s->syslog_event_source);
1660 sd_event_source_unref(s->native_event_source);
1661 sd_event_source_unref(s->stdout_event_source);
1662 sd_event_source_unref(s->dev_kmsg_event_source);
875c2e22 1663 sd_event_source_unref(s->audit_event_source);
f9a810be
LP
1664 sd_event_source_unref(s->sync_event_source);
1665 sd_event_source_unref(s->sigusr1_event_source);
1666 sd_event_source_unref(s->sigusr2_event_source);
1667 sd_event_source_unref(s->sigterm_event_source);
1668 sd_event_source_unref(s->sigint_event_source);
0c24bb23 1669 sd_event_source_unref(s->hostname_event_source);
f9a810be 1670 sd_event_unref(s->event);
d025f1e4 1671
03e334a1
LP
1672 safe_close(s->syslog_fd);
1673 safe_close(s->native_fd);
1674 safe_close(s->stdout_fd);
1675 safe_close(s->dev_kmsg_fd);
875c2e22 1676 safe_close(s->audit_fd);
03e334a1 1677 safe_close(s->hostname_fd);
0c24bb23 1678
d025f1e4
ZJS
1679 if (s->rate_limit)
1680 journal_rate_limit_free(s->rate_limit);
1681
1682 if (s->kernel_seqnum)
1683 munmap(s->kernel_seqnum, sizeof(uint64_t));
1684
1685 free(s->buffer);
1686 free(s->tty_path);
e9174f29 1687 free(s->cgroup_root);
99d0966e 1688 free(s->hostname_field);
d025f1e4
ZJS
1689
1690 if (s->mmap)
1691 mmap_cache_unref(s->mmap);
1692
1693 if (s->udev)
1694 udev_unref(s->udev);
1695}