]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/journal/journald-server.c
journald: remove rotated file from hashmap when rotation fails
[thirdparty/systemd.git] / src / journal / journald-server.c
CommitLineData
d025f1e4
ZJS
1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3/***
4 This file is part of systemd.
5
6 Copyright 2011 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
22#include <sys/signalfd.h>
23#include <sys/ioctl.h>
24#include <linux/sockios.h>
25#include <sys/statvfs.h>
26#include <sys/mman.h>
26687bf8 27#include <sys/timerfd.h>
d025f1e4
ZJS
28
29#include <libudev.h>
30#include <systemd/sd-journal.h>
31#include <systemd/sd-messages.h>
32#include <systemd/sd-daemon.h>
33
a5c32cff 34#include "fileio.h"
d025f1e4
ZJS
35#include "mkdir.h"
36#include "hashmap.h"
37#include "journal-file.h"
38#include "socket-util.h"
39#include "cgroup-util.h"
40#include "list.h"
41#include "virt.h"
42#include "missing.h"
43#include "conf-parser.h"
44#include "journal-internal.h"
45#include "journal-vacuum.h"
46#include "journal-authenticate.h"
47#include "journald-server.h"
48#include "journald-rate-limit.h"
49#include "journald-kmsg.h"
50#include "journald-syslog.h"
51#include "journald-stream.h"
52#include "journald-console.h"
53#include "journald-native.h"
54
55#ifdef HAVE_ACL
56#include <sys/acl.h>
57#include <acl/libacl.h>
58#include "acl-util.h"
59#endif
60
61#ifdef HAVE_SELINUX
62#include <selinux/selinux.h>
63#endif
64
65#define USER_JOURNALS_MAX 1024
66
26687bf8 67#define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
7f1ad696
LP
68#define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
69#define DEFAULT_RATE_LIMIT_BURST 1000
d025f1e4
ZJS
70
71#define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
72
73static const char* const storage_table[] = {
74 [STORAGE_AUTO] = "auto",
75 [STORAGE_VOLATILE] = "volatile",
76 [STORAGE_PERSISTENT] = "persistent",
77 [STORAGE_NONE] = "none"
78};
79
80DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
81DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
82
83static const char* const split_mode_table[] = {
84 [SPLIT_NONE] = "none",
85 [SPLIT_UID] = "uid",
86 [SPLIT_LOGIN] = "login"
87};
88
89DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
90DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
91
670b110c 92static uint64_t available_space(Server *s, bool verbose) {
db91ea32 93 char ids[33];
7fd1b19b 94 _cleanup_free_ char *p = NULL;
d025f1e4
ZJS
95 sd_id128_t machine;
96 struct statvfs ss;
670b110c 97 uint64_t sum = 0, ss_avail = 0, avail = 0;
d025f1e4 98 int r;
7fd1b19b 99 _cleanup_closedir_ DIR *d = NULL;
d025f1e4 100 usec_t ts;
670b110c 101 const char *f;
d025f1e4
ZJS
102 JournalMetrics *m;
103
104 ts = now(CLOCK_MONOTONIC);
105
670b110c
ZJS
106 if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts
107 && !verbose)
d025f1e4
ZJS
108 return s->cached_available_space;
109
110 r = sd_id128_get_machine(&machine);
111 if (r < 0)
112 return 0;
113
114 if (s->system_journal) {
115 f = "/var/log/journal/";
116 m = &s->system_metrics;
117 } else {
118 f = "/run/log/journal/";
119 m = &s->runtime_metrics;
120 }
121
122 assert(m);
123
124 p = strappend(f, sd_id128_to_string(machine, ids));
125 if (!p)
126 return 0;
127
128 d = opendir(p);
d025f1e4
ZJS
129 if (!d)
130 return 0;
131
132 if (fstatvfs(dirfd(d), &ss) < 0)
db91ea32 133 return 0;
d025f1e4
ZJS
134
135 for (;;) {
136 struct stat st;
137 struct dirent *de;
138 union dirent_storage buf;
139
140 r = readdir_r(d, &buf.de, &de);
141 if (r != 0)
142 break;
143
144 if (!de)
145 break;
146
147 if (!endswith(de->d_name, ".journal") &&
148 !endswith(de->d_name, ".journal~"))
149 continue;
150
151 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
152 continue;
153
154 if (!S_ISREG(st.st_mode))
155 continue;
156
157 sum += (uint64_t) st.st_blocks * 512UL;
158 }
159
d025f1e4 160 ss_avail = ss.f_bsize * ss.f_bavail;
670b110c 161 avail = ss_avail > m->keep_free ? ss_avail - m->keep_free : 0;
d025f1e4 162
670b110c 163 s->cached_available_space = MIN(m->max_use, avail) > sum ? MIN(m->max_use, avail) - sum : 0;
d025f1e4
ZJS
164 s->cached_available_space_timestamp = ts;
165
670b110c
ZJS
166 if (verbose) {
167 char fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
168 fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX];
169
170 server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
171 "%s journal is using %s (max %s, leaving %s of free %s, current limit %s).",
172 s->system_journal ? "Permanent" : "Runtime",
173 format_bytes(fb1, sizeof(fb1), sum),
174 format_bytes(fb2, sizeof(fb2), m->max_use),
175 format_bytes(fb3, sizeof(fb3), m->keep_free),
176 format_bytes(fb4, sizeof(fb4), ss_avail),
177 format_bytes(fb5, sizeof(fb5), MIN(m->max_use, avail)));
178 }
179
180 return s->cached_available_space;
d025f1e4
ZJS
181}
182
d025f1e4
ZJS
183void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
184 int r;
185#ifdef HAVE_ACL
186 acl_t acl;
187 acl_entry_t entry;
188 acl_permset_t permset;
189#endif
190
191 assert(f);
192
4608af43 193 r = fchmod(f->fd, 0640);
d025f1e4 194 if (r < 0)
4608af43 195 log_warning("Failed to fix access mode on %s, ignoring: %s", f->path, strerror(-r));
d025f1e4
ZJS
196
197#ifdef HAVE_ACL
198 if (uid <= 0)
199 return;
200
201 acl = acl_get_fd(f->fd);
202 if (!acl) {
203 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
204 return;
205 }
206
207 r = acl_find_uid(acl, uid, &entry);
208 if (r <= 0) {
209
210 if (acl_create_entry(&acl, &entry) < 0 ||
211 acl_set_tag_type(entry, ACL_USER) < 0 ||
212 acl_set_qualifier(entry, &uid) < 0) {
213 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
214 goto finish;
215 }
216 }
217
23ad4dd8
JAS
218 /* We do not recalculate the mask unconditionally here,
219 * so that the fchmod() mask above stays intact. */
d025f1e4 220 if (acl_get_permset(entry, &permset) < 0 ||
23ad4dd8
JAS
221 acl_add_perm(permset, ACL_READ) < 0 ||
222 calc_acl_mask_if_needed(&acl) < 0) {
d025f1e4
ZJS
223 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
224 goto finish;
225 }
226
227 if (acl_set_fd(f->fd, acl) < 0)
228 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
229
230finish:
231 acl_free(acl);
232#endif
233}
234
235static JournalFile* find_journal(Server *s, uid_t uid) {
ed375beb 236 _cleanup_free_ char *p = NULL;
d025f1e4
ZJS
237 int r;
238 JournalFile *f;
239 sd_id128_t machine;
240
241 assert(s);
242
243 /* We split up user logs only on /var, not on /run. If the
244 * runtime file is open, we write to it exclusively, in order
245 * to guarantee proper order as soon as we flush /run to
246 * /var and close the runtime file. */
247
248 if (s->runtime_journal)
249 return s->runtime_journal;
250
251 if (uid <= 0)
252 return s->system_journal;
253
254 r = sd_id128_get_machine(&machine);
255 if (r < 0)
256 return s->system_journal;
257
258 f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
259 if (f)
260 return f;
261
262 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-%lu.journal",
263 SD_ID128_FORMAT_VAL(machine), (unsigned long) uid) < 0)
264 return s->system_journal;
265
266 while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
267 /* Too many open? Then let's close one */
268 f = hashmap_steal_first(s->user_journals);
269 assert(f);
270 journal_file_close(f);
271 }
272
cbd67177 273 r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &f);
d025f1e4
ZJS
274 if (r < 0)
275 return s->system_journal;
276
277 server_fix_perms(s, f, uid);
278
279 r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
280 if (r < 0) {
281 journal_file_close(f);
282 return s->system_journal;
283 }
284
285 return f;
286}
287
288void server_rotate(Server *s) {
289 JournalFile *f;
290 void *k;
291 Iterator i;
292 int r;
293
294 log_debug("Rotating...");
295
296 if (s->runtime_journal) {
297 r = journal_file_rotate(&s->runtime_journal, s->compress, false);
298 if (r < 0)
299 if (s->runtime_journal)
300 log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r));
301 else
302 log_error("Failed to create new runtime journal: %s", strerror(-r));
303 else
304 server_fix_perms(s, s->runtime_journal, 0);
305 }
306
307 if (s->system_journal) {
308 r = journal_file_rotate(&s->system_journal, s->compress, s->seal);
309 if (r < 0)
310 if (s->system_journal)
311 log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r));
312 else
313 log_error("Failed to create new system journal: %s", strerror(-r));
314
315 else
316 server_fix_perms(s, s->system_journal, 0);
317 }
318
319 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
320 r = journal_file_rotate(&f, s->compress, s->seal);
321 if (r < 0)
7d73c134 322 if (f)
d025f1e4 323 log_error("Failed to rotate %s: %s", f->path, strerror(-r));
2b98f75a 324 else {
d025f1e4 325 log_error("Failed to create user journal: %s", strerror(-r));
2b98f75a
ZJS
326 hashmap_remove(s->user_journals, k);
327 }
d025f1e4
ZJS
328 else {
329 hashmap_replace(s->user_journals, k, f);
330 server_fix_perms(s, f, PTR_TO_UINT32(k));
331 }
332 }
333}
334
26687bf8 335void server_sync(Server *s) {
d07f7b9e 336 static const struct itimerspec sync_timer_disable = {};
26687bf8
OS
337 JournalFile *f;
338 void *k;
339 Iterator i;
340 int r;
341
26687bf8
OS
342 if (s->system_journal) {
343 r = journal_file_set_offline(s->system_journal);
344 if (r < 0)
345 log_error("Failed to sync system journal: %s", strerror(-r));
346 }
347
348 HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
349 r = journal_file_set_offline(f);
350 if (r < 0)
351 log_error("Failed to sync user journal: %s", strerror(-r));
352 }
353
354 r = timerfd_settime(s->sync_timer_fd, 0, &sync_timer_disable, NULL);
355 if (r < 0)
356 log_error("Failed to disable max timer: %m");
357
358 s->sync_scheduled = false;
359}
360
d025f1e4 361void server_vacuum(Server *s) {
d025f1e4
ZJS
362 char ids[33];
363 sd_id128_t machine;
364 int r;
365
366 log_debug("Vacuuming...");
367
368 s->oldest_file_usec = 0;
369
370 r = sd_id128_get_machine(&machine);
371 if (r < 0) {
372 log_error("Failed to get machine ID: %s", strerror(-r));
373 return;
374 }
375
376 sd_id128_to_string(machine, ids);
377
378 if (s->system_journal) {
ed375beb 379 char *p = strappenda("/var/log/journal/", ids);
d025f1e4
ZJS
380
381 r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
382 if (r < 0 && r != -ENOENT)
383 log_error("Failed to vacuum %s: %s", p, strerror(-r));
d025f1e4
ZJS
384 }
385
386 if (s->runtime_journal) {
ed375beb 387 char *p = strappenda("/run/log/journal/", ids);
d025f1e4
ZJS
388
389 r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
390 if (r < 0 && r != -ENOENT)
391 log_error("Failed to vacuum %s: %s", p, strerror(-r));
d025f1e4
ZJS
392 }
393
394 s->cached_available_space_timestamp = 0;
395}
396
d025f1e4
ZJS
397bool shall_try_append_again(JournalFile *f, int r) {
398
399 /* -E2BIG Hit configured limit
400 -EFBIG Hit fs limit
401 -EDQUOT Quota limit hit
402 -ENOSPC Disk full
403 -EHOSTDOWN Other machine
404 -EBUSY Unclean shutdown
405 -EPROTONOSUPPORT Unsupported feature
406 -EBADMSG Corrupted
407 -ENODATA Truncated
408 -ESHUTDOWN Already archived */
409
410 if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
411 log_debug("%s: Allocation limit reached, rotating.", f->path);
412 else if (r == -EHOSTDOWN)
413 log_info("%s: Journal file from other machine, rotating.", f->path);
414 else if (r == -EBUSY)
415 log_info("%s: Unclean shutdown, rotating.", f->path);
416 else if (r == -EPROTONOSUPPORT)
417 log_info("%s: Unsupported feature, rotating.", f->path);
418 else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
419 log_warning("%s: Journal file corrupted, rotating.", f->path);
420 else
421 return false;
422
423 return true;
424}
425
d07f7b9e 426static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
d025f1e4
ZJS
427 JournalFile *f;
428 bool vacuumed = false;
429 int r;
430
431 assert(s);
432 assert(iovec);
433 assert(n > 0);
434
435 f = find_journal(s, uid);
436 if (!f)
437 return;
438
439 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
440 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
441 server_rotate(s);
442 server_vacuum(s);
443 vacuumed = true;
444
445 f = find_journal(s, uid);
446 if (!f)
447 return;
448 }
449
450 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
26687bf8 451 if (r >= 0) {
d07f7b9e 452 server_schedule_sync(s, priority);
d025f1e4 453 return;
26687bf8 454 }
d025f1e4
ZJS
455
456 if (vacuumed || !shall_try_append_again(f, r)) {
e40ec7ae
ZJS
457 size_t size = 0;
458 unsigned i;
459 for (i = 0; i < n; i++)
460 size += iovec[i].iov_len;
461
462 log_error("Failed to write entry (%d items, %zu bytes), ignoring: %s", n, size, strerror(-r));
d025f1e4
ZJS
463 return;
464 }
465
466 server_rotate(s);
467 server_vacuum(s);
468
469 f = find_journal(s, uid);
470 if (!f)
471 return;
472
473 log_debug("Retrying write.");
474 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
e40ec7ae
ZJS
475 if (r < 0) {
476 size_t size = 0;
477 unsigned i;
478 for (i = 0; i < n; i++)
479 size += iovec[i].iov_len;
480
481 log_error("Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %s", n, size, strerror(-r));
d07f7b9e
LP
482 } else
483 server_schedule_sync(s, priority);
d025f1e4
ZJS
484}
485
486static void dispatch_message_real(
487 Server *s,
488 struct iovec *iovec, unsigned n, unsigned m,
489 struct ucred *ucred,
490 struct timeval *tv,
491 const char *label, size_t label_len,
968f3196 492 const char *unit_id,
d07f7b9e 493 int priority,
968f3196 494 pid_t object_pid) {
d025f1e4 495
968f3196 496 char pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
ae018d9b
LP
497 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
498 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
499 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
d3789917 500 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
adb435bb 501 boot_id[sizeof("_BOOT_ID=") + 32] = "_BOOT_ID=",
968f3196
ZJS
502 machine_id[sizeof("_MACHINE_ID=") + 32] = "_MACHINE_ID=",
503 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
504 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
505 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
506 uid_t object_uid;
507 gid_t object_gid;
968f3196 508 char *x;
d025f1e4
ZJS
509 sd_id128_t id;
510 int r;
ae018d9b 511 char *t, *c;
82499507
LP
512 uid_t realuid = 0, owner = 0, journal_uid;
513 bool owner_valid = false;
ae018d9b 514#ifdef HAVE_AUDIT
968f3196
ZJS
515 char audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
516 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
517 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
518 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
ae018d9b
LP
519
520 uint32_t audit;
521 uid_t loginuid;
522#endif
d025f1e4
ZJS
523
524 assert(s);
525 assert(iovec);
526 assert(n > 0);
968f3196 527 assert(n + N_IOVEC_META_FIELDS + (object_pid ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
d025f1e4
ZJS
528
529 if (ucred) {
d025f1e4
ZJS
530 realuid = ucred->uid;
531
ae018d9b 532 sprintf(pid, "_PID=%lu", (unsigned long) ucred->pid);
c2457105 533 IOVEC_SET_STRING(iovec[n++], pid);
d025f1e4 534
ae018d9b 535 sprintf(uid, "_UID=%lu", (unsigned long) ucred->uid);
c2457105 536 IOVEC_SET_STRING(iovec[n++], uid);
d025f1e4 537
ae018d9b 538 sprintf(gid, "_GID=%lu", (unsigned long) ucred->gid);
c2457105 539 IOVEC_SET_STRING(iovec[n++], gid);
d025f1e4
ZJS
540
541 r = get_process_comm(ucred->pid, &t);
542 if (r >= 0) {
968f3196 543 x = strappenda("_COMM=", t);
d025f1e4 544 free(t);
968f3196 545 IOVEC_SET_STRING(iovec[n++], x);
d025f1e4
ZJS
546 }
547
548 r = get_process_exe(ucred->pid, &t);
549 if (r >= 0) {
968f3196 550 x = strappenda("_EXE=", t);
d025f1e4 551 free(t);
968f3196 552 IOVEC_SET_STRING(iovec[n++], x);
d025f1e4
ZJS
553 }
554
9bdbc2e2 555 r = get_process_cmdline(ucred->pid, 0, false, &t);
d025f1e4 556 if (r >= 0) {
968f3196 557 x = strappenda("_CMDLINE=", t);
d025f1e4 558 free(t);
3a832116
SL
559 IOVEC_SET_STRING(iovec[n++], x);
560 }
561
562 r = get_process_capeff(ucred->pid, &t);
563 if (r >= 0) {
564 x = strappenda("_CAP_EFFECTIVE=", t);
565 free(t);
968f3196 566 IOVEC_SET_STRING(iovec[n++], x);
d025f1e4
ZJS
567 }
568
0a20e3c1 569#ifdef HAVE_AUDIT
d025f1e4 570 r = audit_session_from_pid(ucred->pid, &audit);
ae018d9b
LP
571 if (r >= 0) {
572 sprintf(audit_session, "_AUDIT_SESSION=%lu", (unsigned long) audit);
573 IOVEC_SET_STRING(iovec[n++], audit_session);
574 }
d025f1e4
ZJS
575
576 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
7027ff61 577 if (r >= 0) {
ae018d9b
LP
578 sprintf(audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid);
579 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
d025f1e4 580 }
ae018d9b 581#endif
d025f1e4 582
ae018d9b 583 r = cg_pid_get_path_shifted(ucred->pid, NULL, &c);
7027ff61 584 if (r >= 0) {
968f3196
ZJS
585 char *session = NULL;
586
587 x = strappenda("_SYSTEMD_CGROUP=", c);
588 IOVEC_SET_STRING(iovec[n++], x);
d025f1e4 589
ae018d9b
LP
590 r = cg_path_get_session(c, &t);
591 if (r >= 0) {
592 session = strappenda("_SYSTEMD_SESSION=", t);
593 free(t);
d025f1e4 594 IOVEC_SET_STRING(iovec[n++], session);
ae018d9b
LP
595 }
596
597 if (cg_path_get_owner_uid(c, &owner) >= 0) {
598 owner_valid = true;
d025f1e4 599
ae018d9b 600 sprintf(owner_uid, "_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner);
d025f1e4 601 IOVEC_SET_STRING(iovec[n++], owner_uid);
ae018d9b 602 }
d025f1e4 603
ae018d9b 604 if (cg_path_get_unit(c, &t) >= 0) {
968f3196 605 x = strappenda("_SYSTEMD_UNIT=", t);
ae018d9b 606 free(t);
19cace37
LP
607 IOVEC_SET_STRING(iovec[n++], x);
608 } else if (unit_id && !session) {
609 x = strappenda("_SYSTEMD_UNIT=", unit_id);
610 IOVEC_SET_STRING(iovec[n++], x);
611 }
612
613 if (cg_path_get_user_unit(c, &t) >= 0) {
968f3196 614 x = strappenda("_SYSTEMD_USER_UNIT=", t);
ae018d9b 615 free(t);
968f3196 616 IOVEC_SET_STRING(iovec[n++], x);
19cace37
LP
617 } else if (unit_id && session) {
618 x = strappenda("_SYSTEMD_USER_UNIT=", unit_id);
619 IOVEC_SET_STRING(iovec[n++], x);
620 }
ae018d9b 621
0a244b8e
LP
622 if (cg_path_get_slice(c, &t) >= 0) {
623 x = strappenda("_SYSTEMD_SLICE=", t);
624 free(t);
625 IOVEC_SET_STRING(iovec[n++], x);
626 }
627
ae018d9b 628 free(c);
ef1673d1 629 }
d025f1e4 630
d025f1e4
ZJS
631#ifdef HAVE_SELINUX
632 if (label) {
968f3196 633 x = alloca(sizeof("_SELINUX_CONTEXT=") + label_len);
ae018d9b 634
968f3196
ZJS
635 *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
636 IOVEC_SET_STRING(iovec[n++], x);
d025f1e4
ZJS
637 } else {
638 security_context_t con;
639
640 if (getpidcon(ucred->pid, &con) >= 0) {
968f3196 641 x = strappenda("_SELINUX_CONTEXT=", con);
e7ff4e7f 642
d025f1e4 643 freecon(con);
968f3196 644 IOVEC_SET_STRING(iovec[n++], x);
d025f1e4
ZJS
645 }
646 }
647#endif
648 }
968f3196
ZJS
649 assert(n <= m);
650
651 if (object_pid) {
652 r = get_process_uid(object_pid, &object_uid);
653 if (r >= 0) {
654 sprintf(o_uid, "OBJECT_UID=%lu", (unsigned long) object_uid);
655 IOVEC_SET_STRING(iovec[n++], o_uid);
656 }
657
658 r = get_process_gid(object_pid, &object_gid);
659 if (r >= 0) {
660 sprintf(o_gid, "OBJECT_GID=%lu", (unsigned long) object_gid);
661 IOVEC_SET_STRING(iovec[n++], o_gid);
662 }
663
664 r = get_process_comm(object_pid, &t);
665 if (r >= 0) {
666 x = strappenda("OBJECT_COMM=", t);
667 free(t);
668 IOVEC_SET_STRING(iovec[n++], x);
669 }
670
671 r = get_process_exe(object_pid, &t);
672 if (r >= 0) {
673 x = strappenda("OBJECT_EXE=", t);
674 free(t);
675 IOVEC_SET_STRING(iovec[n++], x);
676 }
677
678 r = get_process_cmdline(object_pid, 0, false, &t);
679 if (r >= 0) {
680 x = strappenda("OBJECT_CMDLINE=", t);
681 free(t);
682 IOVEC_SET_STRING(iovec[n++], x);
683 }
684
685#ifdef HAVE_AUDIT
686 r = audit_session_from_pid(object_pid, &audit);
687 if (r >= 0) {
688 sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%lu", (unsigned long) audit);
689 IOVEC_SET_STRING(iovec[n++], o_audit_session);
690 }
691
692 r = audit_loginuid_from_pid(object_pid, &loginuid);
693 if (r >= 0) {
694 sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID=%lu", (unsigned long) loginuid);
695 IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
696 }
697#endif
698
699 r = cg_pid_get_path_shifted(object_pid, NULL, &c);
700 if (r >= 0) {
701 x = strappenda("OBJECT_SYSTEMD_CGROUP=", c);
702 IOVEC_SET_STRING(iovec[n++], x);
703
704 r = cg_path_get_session(c, &t);
705 if (r >= 0) {
706 x = strappenda("OBJECT_SYSTEMD_SESSION=", t);
707 free(t);
708 IOVEC_SET_STRING(iovec[n++], x);
709 }
710
711 if (cg_path_get_owner_uid(c, &owner) >= 0) {
712 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID=%lu", (unsigned long) owner);
713 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
714 }
715
716 if (cg_path_get_unit(c, &t) >= 0) {
717 x = strappenda("OBJECT_SYSTEMD_UNIT=", t);
718 free(t);
19cace37
LP
719 IOVEC_SET_STRING(iovec[n++], x);
720 }
721
722 if (cg_path_get_user_unit(c, &t) >= 0) {
968f3196
ZJS
723 x = strappenda("OBJECT_SYSTEMD_USER_UNIT=", t);
724 free(t);
968f3196 725 IOVEC_SET_STRING(iovec[n++], x);
19cace37 726 }
968f3196
ZJS
727
728 free(c);
729 }
730 }
731 assert(n <= m);
d025f1e4
ZJS
732
733 if (tv) {
ae018d9b 734 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv));
a5693989 735 IOVEC_SET_STRING(iovec[n++], source_time);
d025f1e4
ZJS
736 }
737
738 /* Note that strictly speaking storing the boot id here is
739 * redundant since the entry includes this in-line
740 * anyway. However, we need this indexed, too. */
741 r = sd_id128_get_boot(&id);
adb435bb 742 if (r >= 0) {
968f3196 743 sd_id128_to_string(id, boot_id + strlen("_BOOT_ID="));
adb435bb
LP
744 IOVEC_SET_STRING(iovec[n++], boot_id);
745 }
d025f1e4
ZJS
746
747 r = sd_id128_get_machine(&id);
adb435bb 748 if (r >= 0) {
968f3196 749 sd_id128_to_string(id, machine_id + strlen("_MACHINE_ID="));
adb435bb
LP
750 IOVEC_SET_STRING(iovec[n++], machine_id);
751 }
d025f1e4
ZJS
752
753 t = gethostname_malloc();
754 if (t) {
968f3196 755 x = strappenda("_HOSTNAME=", t);
d025f1e4 756 free(t);
968f3196 757 IOVEC_SET_STRING(iovec[n++], x);
d025f1e4
ZJS
758 }
759
760 assert(n <= m);
761
da499392 762 if (s->split_mode == SPLIT_UID && realuid > 0)
40adcda8 763 /* Split up strictly by any UID */
759c945a 764 journal_uid = realuid;
82499507 765 else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
40adcda8
LP
766 /* Split up by login UIDs, this avoids creation of
767 * individual journals for system UIDs. We do this
768 * only if the realuid is not root, in order not to
82499507
LP
769 * accidentally leak privileged information to the
770 * user that is logged by a privileged process that is
771 * part of an unprivileged session.*/
8a0889df 772 journal_uid = owner;
da499392
KS
773 else
774 journal_uid = 0;
759c945a 775
d07f7b9e 776 write_to_journal(s, journal_uid, iovec, n, priority);
d025f1e4
ZJS
777}
778
779void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
780 char mid[11 + 32 + 1];
781 char buffer[16 + LINE_MAX + 1];
782 struct iovec iovec[N_IOVEC_META_FIELDS + 4];
783 int n = 0;
784 va_list ap;
b92bea5d 785 struct ucred ucred = {};
d025f1e4
ZJS
786
787 assert(s);
788 assert(format);
789
790 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
791 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
792
793 memcpy(buffer, "MESSAGE=", 8);
794 va_start(ap, format);
795 vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
796 va_end(ap);
797 char_array_0(buffer);
798 IOVEC_SET_STRING(iovec[n++], buffer);
799
800 if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
801 snprintf(mid, sizeof(mid), MESSAGE_ID(message_id));
802 char_array_0(mid);
803 IOVEC_SET_STRING(iovec[n++], mid);
804 }
805
d025f1e4
ZJS
806 ucred.pid = getpid();
807 ucred.uid = getuid();
808 ucred.gid = getgid();
809
d07f7b9e 810 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
d025f1e4
ZJS
811}
812
813void server_dispatch_message(
814 Server *s,
815 struct iovec *iovec, unsigned n, unsigned m,
816 struct ucred *ucred,
817 struct timeval *tv,
818 const char *label, size_t label_len,
819 const char *unit_id,
968f3196
ZJS
820 int priority,
821 pid_t object_pid) {
d025f1e4 822
7027ff61 823 int rl, r;
7fd1b19b 824 _cleanup_free_ char *path = NULL;
db91ea32 825 char *c;
d025f1e4
ZJS
826
827 assert(s);
828 assert(iovec || n == 0);
829
830 if (n == 0)
831 return;
832
833 if (LOG_PRI(priority) > s->max_level_store)
834 return;
835
2f5df74a
HHPF
836 /* Stop early in case the information will not be stored
837 * in a journal. */
838 if (s->storage == STORAGE_NONE)
839 return;
840
d025f1e4
ZJS
841 if (!ucred)
842 goto finish;
843
7027ff61
LP
844 r = cg_pid_get_path_shifted(ucred->pid, NULL, &path);
845 if (r < 0)
d025f1e4
ZJS
846 goto finish;
847
848 /* example: /user/lennart/3/foobar
849 * /system/dbus.service/foobar
850 *
851 * So let's cut of everything past the third /, since that is
852 * where user directories start */
853
854 c = strchr(path, '/');
855 if (c) {
856 c = strchr(c+1, '/');
857 if (c) {
858 c = strchr(c+1, '/');
859 if (c)
860 *c = 0;
861 }
862 }
863
db91ea32 864 rl = journal_rate_limit_test(s->rate_limit, path,
670b110c 865 priority & LOG_PRIMASK, available_space(s, false));
d025f1e4 866
db91ea32 867 if (rl == 0)
d025f1e4 868 return;
d025f1e4
ZJS
869
870 /* Write a suppression message if we suppressed something */
871 if (rl > 1)
db91ea32
ZJS
872 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
873 "Suppressed %u messages from %s", rl - 1, path);
d025f1e4
ZJS
874
875finish:
d07f7b9e 876 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, priority, object_pid);
d025f1e4
ZJS
877}
878
879
880static int system_journal_open(Server *s) {
881 int r;
882 char *fn;
883 sd_id128_t machine;
884 char ids[33];
885
886 r = sd_id128_get_machine(&machine);
00a16861
OB
887 if (r < 0) {
888 log_error("Failed to get machine id: %s", strerror(-r));
d025f1e4 889 return r;
00a16861 890 }
d025f1e4
ZJS
891
892 sd_id128_to_string(machine, ids);
893
894 if (!s->system_journal &&
895 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
896 access("/run/systemd/journal/flushed", F_OK) >= 0) {
897
898 /* If in auto mode: first try to create the machine
899 * path, but not the prefix.
900 *
901 * If in persistent mode: create /var/log/journal and
902 * the machine path */
903
904 if (s->storage == STORAGE_PERSISTENT)
905 (void) mkdir("/var/log/journal/", 0755);
906
e40ec7ae 907 fn = strappenda("/var/log/journal/", ids);
d025f1e4 908 (void) mkdir(fn, 0755);
d025f1e4 909
e40ec7ae 910 fn = strappenda(fn, "/system.journal");
d025f1e4 911 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
d025f1e4 912
670b110c 913 if (r >= 0)
d025f1e4 914 server_fix_perms(s, s->system_journal, 0);
433dd100
LN
915 else if (r < 0) {
916 if (r != -ENOENT && r != -EROFS)
917 log_warning("Failed to open system journal: %s", strerror(-r));
e40ec7ae 918
433dd100
LN
919 r = 0;
920 }
d025f1e4
ZJS
921 }
922
923 if (!s->runtime_journal &&
924 (s->storage != STORAGE_NONE)) {
925
926 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
927 if (!fn)
928 return -ENOMEM;
929
930 if (s->system_journal) {
931
932 /* Try to open the runtime journal, but only
933 * if it already exists, so that we can flush
934 * it into the system journal */
935
936 r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
937 free(fn);
938
939 if (r < 0) {
940 if (r != -ENOENT)
941 log_warning("Failed to open runtime journal: %s", strerror(-r));
942
943 r = 0;
944 }
945
946 } else {
947
948 /* OK, we really need the runtime journal, so create
949 * it if necessary. */
950
951 (void) mkdir_parents(fn, 0755);
952 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
953 free(fn);
954
955 if (r < 0) {
956 log_error("Failed to open runtime journal: %s", strerror(-r));
957 return r;
958 }
959 }
960
670b110c 961 if (s->runtime_journal)
d025f1e4 962 server_fix_perms(s, s->runtime_journal, 0);
d025f1e4
ZJS
963 }
964
670b110c
ZJS
965 available_space(s, true);
966
d025f1e4
ZJS
967 return r;
968}
969
970int server_flush_to_var(Server *s) {
971 int r;
972 sd_id128_t machine;
973 sd_journal *j = NULL;
974
975 assert(s);
976
977 if (s->storage != STORAGE_AUTO &&
978 s->storage != STORAGE_PERSISTENT)
979 return 0;
980
981 if (!s->runtime_journal)
982 return 0;
983
984 system_journal_open(s);
985
986 if (!s->system_journal)
987 return 0;
988
989 log_debug("Flushing to /var...");
990
991 r = sd_id128_get_machine(&machine);
00a16861 992 if (r < 0)
d025f1e4 993 return r;
d025f1e4
ZJS
994
995 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
996 if (r < 0) {
997 log_error("Failed to read runtime journal: %s", strerror(-r));
998 return r;
999 }
1000
93b73b06
LP
1001 sd_journal_set_data_threshold(j, 0);
1002
d025f1e4
ZJS
1003 SD_JOURNAL_FOREACH(j) {
1004 Object *o = NULL;
1005 JournalFile *f;
1006
1007 f = j->current_file;
1008 assert(f && f->current_offset > 0);
1009
1010 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1011 if (r < 0) {
1012 log_error("Can't read entry: %s", strerror(-r));
1013 goto finish;
1014 }
1015
1016 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1017 if (r >= 0)
1018 continue;
1019
1020 if (!shall_try_append_again(s->system_journal, r)) {
1021 log_error("Can't write entry: %s", strerror(-r));
1022 goto finish;
1023 }
1024
1025 server_rotate(s);
1026 server_vacuum(s);
1027
253f59df
LP
1028 if (!s->system_journal) {
1029 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1030 r = -EIO;
1031 goto finish;
1032 }
1033
d025f1e4
ZJS
1034 log_debug("Retrying write.");
1035 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1036 if (r < 0) {
1037 log_error("Can't write entry: %s", strerror(-r));
1038 goto finish;
1039 }
1040 }
1041
1042finish:
1043 journal_file_post_change(s->system_journal);
1044
1045 journal_file_close(s->runtime_journal);
1046 s->runtime_journal = NULL;
1047
1048 if (r >= 0)
1049 rm_rf("/run/log/journal", false, true, false);
1050
763c7aa2 1051 sd_journal_close(j);
d025f1e4
ZJS
1052
1053 return r;
1054}
1055
1056int process_event(Server *s, struct epoll_event *ev) {
1057 assert(s);
1058 assert(ev);
1059
1060 if (ev->data.fd == s->signal_fd) {
1061 struct signalfd_siginfo sfsi;
1062 ssize_t n;
1063
1064 if (ev->events != EPOLLIN) {
5843c5eb
ZJS
1065 log_error("Got invalid event from epoll for %s: %"PRIx32,
1066 "signal fd", ev->events);
d025f1e4
ZJS
1067 return -EIO;
1068 }
1069
1070 n = read(s->signal_fd, &sfsi, sizeof(sfsi));
1071 if (n != sizeof(sfsi)) {
1072
1073 if (n >= 0)
1074 return -EIO;
1075
1076 if (errno == EINTR || errno == EAGAIN)
1077 return 1;
1078
1079 return -errno;
1080 }
1081
d025f1e4 1082 if (sfsi.ssi_signo == SIGUSR1) {
289f910e
ZJS
1083 log_info("Received request to flush runtime journal from PID %"PRIu32,
1084 sfsi.ssi_pid);
d025f1e4
ZJS
1085 touch("/run/systemd/journal/flushed");
1086 server_flush_to_var(s);
26687bf8 1087 server_sync(s);
d025f1e4
ZJS
1088 return 1;
1089 }
1090
1091 if (sfsi.ssi_signo == SIGUSR2) {
289f910e
ZJS
1092 log_info("Received request to rotate journal from PID %"PRIu32,
1093 sfsi.ssi_pid);
d025f1e4
ZJS
1094 server_rotate(s);
1095 server_vacuum(s);
1096 return 1;
1097 }
1098
26687bf8
OS
1099 log_info("Received SIG%s", signal_to_string(sfsi.ssi_signo));
1100
d025f1e4
ZJS
1101 return 0;
1102
26687bf8
OS
1103 } else if (ev->data.fd == s->sync_timer_fd) {
1104 int r;
1105 uint64_t t;
1106
1107 log_debug("Got sync request from epoll.");
1108
1109 r = read(ev->data.fd, (void *)&t, sizeof(t));
1110 if (r < 0)
1111 return 0;
1112
1113 server_sync(s);
1114 return 1;
1115
d025f1e4
ZJS
1116 } else if (ev->data.fd == s->dev_kmsg_fd) {
1117 int r;
1118
5843c5eb
ZJS
1119 if (ev->events & EPOLLERR)
1120 log_warning("/dev/kmsg buffer overrun, some messages lost.");
1121
1122 if (!(ev->events & EPOLLIN)) {
1123 log_error("Got invalid event from epoll for %s: %"PRIx32,
1124 "/dev/kmsg", ev->events);
d025f1e4
ZJS
1125 return -EIO;
1126 }
1127
1128 r = server_read_dev_kmsg(s);
1129 if (r < 0)
1130 return r;
1131
1132 return 1;
1133
1134 } else if (ev->data.fd == s->native_fd ||
1135 ev->data.fd == s->syslog_fd) {
1136
1137 if (ev->events != EPOLLIN) {
5843c5eb
ZJS
1138 log_error("Got invalid event from epoll for %s: %"PRIx32,
1139 ev->data.fd == s->native_fd ? "native fd" : "syslog fd",
1140 ev->events);
d025f1e4
ZJS
1141 return -EIO;
1142 }
1143
1144 for (;;) {
1145 struct msghdr msghdr;
1146 struct iovec iovec;
1147 struct ucred *ucred = NULL;
1148 struct timeval *tv = NULL;
1149 struct cmsghdr *cmsg;
1150 char *label = NULL;
1151 size_t label_len = 0;
1152 union {
1153 struct cmsghdr cmsghdr;
1154
1155 /* We use NAME_MAX space for the
1156 * SELinux label here. The kernel
1157 * currently enforces no limit, but
1158 * according to suggestions from the
1159 * SELinux people this will change and
1160 * it will probably be identical to
1161 * NAME_MAX. For now we use that, but
1162 * this should be updated one day when
1163 * the final limit is known.*/
1164 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1165 CMSG_SPACE(sizeof(struct timeval)) +
1166 CMSG_SPACE(sizeof(int)) + /* fd */
1167 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1168 } control;
1169 ssize_t n;
1170 int v;
1171 int *fds = NULL;
1172 unsigned n_fds = 0;
1173
1174 if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) {
1175 log_error("SIOCINQ failed: %m");
1176 return -errno;
1177 }
1178
1179 if (s->buffer_size < (size_t) v) {
1180 void *b;
1181 size_t l;
1182
1183 l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2);
1184 b = realloc(s->buffer, l+1);
1185
1186 if (!b) {
1187 log_error("Couldn't increase buffer.");
1188 return -ENOMEM;
1189 }
1190
1191 s->buffer_size = l;
1192 s->buffer = b;
1193 }
1194
1195 zero(iovec);
1196 iovec.iov_base = s->buffer;
1197 iovec.iov_len = s->buffer_size;
1198
1199 zero(control);
1200 zero(msghdr);
1201 msghdr.msg_iov = &iovec;
1202 msghdr.msg_iovlen = 1;
1203 msghdr.msg_control = &control;
1204 msghdr.msg_controllen = sizeof(control);
1205
1206 n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1207 if (n < 0) {
1208
1209 if (errno == EINTR || errno == EAGAIN)
1210 return 1;
1211
1212 log_error("recvmsg() failed: %m");
1213 return -errno;
1214 }
1215
1216 for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1217
1218 if (cmsg->cmsg_level == SOL_SOCKET &&
1219 cmsg->cmsg_type == SCM_CREDENTIALS &&
1220 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1221 ucred = (struct ucred*) CMSG_DATA(cmsg);
1222 else if (cmsg->cmsg_level == SOL_SOCKET &&
1223 cmsg->cmsg_type == SCM_SECURITY) {
1224 label = (char*) CMSG_DATA(cmsg);
1225 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1226 } else if (cmsg->cmsg_level == SOL_SOCKET &&
670b110c
ZJS
1227 cmsg->cmsg_type == SO_TIMESTAMP &&
1228 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
d025f1e4
ZJS
1229 tv = (struct timeval*) CMSG_DATA(cmsg);
1230 else if (cmsg->cmsg_level == SOL_SOCKET &&
1231 cmsg->cmsg_type == SCM_RIGHTS) {
1232 fds = (int*) CMSG_DATA(cmsg);
1233 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1234 }
1235 }
1236
1237 if (ev->data.fd == s->syslog_fd) {
d025f1e4 1238 if (n > 0 && n_fds == 0) {
04fefcdd 1239 s->buffer[n] = 0;
d025f1e4
ZJS
1240 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1241 } else if (n_fds > 0)
1242 log_warning("Got file descriptors via syslog socket. Ignoring.");
1243
1244 } else {
1245 if (n > 0 && n_fds == 0)
1246 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1247 else if (n == 0 && n_fds == 1)
1248 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1249 else if (n_fds > 0)
1250 log_warning("Got too many file descriptors via native socket. Ignoring.");
1251 }
1252
1253 close_many(fds, n_fds);
1254 }
1255
1256 return 1;
1257
1258 } else if (ev->data.fd == s->stdout_fd) {
1259
1260 if (ev->events != EPOLLIN) {
5843c5eb
ZJS
1261 log_error("Got invalid event from epoll for %s: %"PRIx32,
1262 "stdout fd", ev->events);
d025f1e4
ZJS
1263 return -EIO;
1264 }
1265
1266 stdout_stream_new(s);
1267 return 1;
1268
1269 } else {
1270 StdoutStream *stream;
1271
1272 if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
5843c5eb
ZJS
1273 log_error("Got invalid event from epoll for %s: %"PRIx32,
1274 "stdout stream", ev->events);
d025f1e4
ZJS
1275 return -EIO;
1276 }
1277
1278 /* If it is none of the well-known fds, it must be an
1279 * stdout stream fd. Note that this is a bit ugly here
1280 * (since we rely that none of the well-known fds
1281 * could be interpreted as pointer), but nonetheless
1282 * safe, since the well-known fds would never get an
1283 * fd > 4096, i.e. beyond the first memory page */
1284
1285 stream = ev->data.ptr;
1286
1287 if (stdout_stream_process(stream) <= 0)
1288 stdout_stream_free(stream);
1289
1290 return 1;
1291 }
1292
1293 log_error("Unknown event.");
1294 return 0;
1295}
1296
1297static int open_signalfd(Server *s) {
1298 sigset_t mask;
1299 struct epoll_event ev;
1300
1301 assert(s);
1302
1303 assert_se(sigemptyset(&mask) == 0);
1304 sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1305 assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1306
1307 s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
1308 if (s->signal_fd < 0) {
1309 log_error("signalfd(): %m");
1310 return -errno;
1311 }
1312
1313 zero(ev);
1314 ev.events = EPOLLIN;
1315 ev.data.fd = s->signal_fd;
1316
1317 if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) {
1318 log_error("epoll_ctl(): %m");
1319 return -errno;
1320 }
1321
1322 return 0;
1323}
1324
1325static int server_parse_proc_cmdline(Server *s) {
7fd1b19b 1326 _cleanup_free_ char *line = NULL;
db91ea32 1327 char *w, *state;
d025f1e4
ZJS
1328 int r;
1329 size_t l;
1330
1331 if (detect_container(NULL) > 0)
1332 return 0;
1333
1334 r = read_one_line_file("/proc/cmdline", &line);
1335 if (r < 0) {
1336 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
1337 return 0;
1338 }
1339
1340 FOREACH_WORD_QUOTED(w, l, line, state) {
7fd1b19b 1341 _cleanup_free_ char *word;
d025f1e4
ZJS
1342
1343 word = strndup(w, l);
db91ea32
ZJS
1344 if (!word)
1345 return -ENOMEM;
d025f1e4
ZJS
1346
1347 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1348 r = parse_boolean(word + 35);
1349 if (r < 0)
1350 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1351 else
1352 s->forward_to_syslog = r;
1353 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1354 r = parse_boolean(word + 33);
1355 if (r < 0)
1356 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1357 else
1358 s->forward_to_kmsg = r;
1359 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1360 r = parse_boolean(word + 36);
1361 if (r < 0)
1362 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1363 else
1364 s->forward_to_console = r;
1365 } else if (startswith(word, "systemd.journald"))
1366 log_warning("Invalid systemd.journald parameter. Ignoring.");
d025f1e4
ZJS
1367 }
1368
db91ea32 1369 return 0;
d025f1e4
ZJS
1370}
1371
1372static int server_parse_config_file(Server *s) {
db5c0122 1373 static const char fn[] = "/etc/systemd/journald.conf";
7fd1b19b 1374 _cleanup_fclose_ FILE *f = NULL;
d025f1e4
ZJS
1375 int r;
1376
1377 assert(s);
1378
d025f1e4
ZJS
1379 f = fopen(fn, "re");
1380 if (!f) {
1381 if (errno == ENOENT)
1382 return 0;
1383
1384 log_warning("Failed to open configuration file %s: %m", fn);
1385 return -errno;
1386 }
1387
e8e581bf 1388 r = config_parse(NULL, fn, f, "Journal\0", config_item_perf_lookup,
db5c0122 1389 (void*) journald_gperf_lookup, false, false, s);
d025f1e4
ZJS
1390 if (r < 0)
1391 log_warning("Failed to parse configuration file: %s", strerror(-r));
1392
d025f1e4
ZJS
1393 return r;
1394}
1395
26687bf8
OS
1396static int server_open_sync_timer(Server *s) {
1397 int r;
1398 struct epoll_event ev;
1399
1400 assert(s);
1401
1402 s->sync_timer_fd = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC);
1403 if (s->sync_timer_fd < 0)
1404 return -errno;
1405
1406 zero(ev);
1407 ev.events = EPOLLIN;
1408 ev.data.fd = s->sync_timer_fd;
1409
1410 r = epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->sync_timer_fd, &ev);
1411 if (r < 0) {
1412 log_error("Failed to add idle timer fd to epoll object: %m");
1413 return -errno;
1414 }
1415
1416 return 0;
1417}
1418
d07f7b9e 1419int server_schedule_sync(Server *s, int priority) {
26687bf8
OS
1420 int r;
1421
26687bf8
OS
1422 assert(s);
1423
d07f7b9e
LP
1424 if (priority <= LOG_CRIT) {
1425 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1426 server_sync(s);
1427 return 0;
1428 }
1429
26687bf8
OS
1430 if (s->sync_scheduled)
1431 return 0;
1432
1433 if (s->sync_interval_usec) {
ca267016
MB
1434 struct itimerspec sync_timer_enable = {};
1435
1436 timespec_store(&sync_timer_enable.it_value, s->sync_interval_usec);
26687bf8
OS
1437
1438 r = timerfd_settime(s->sync_timer_fd, 0, &sync_timer_enable, NULL);
1439 if (r < 0)
1440 return -errno;
1441 }
1442
1443 s->sync_scheduled = true;
1444
1445 return 0;
1446}
1447
d025f1e4
ZJS
1448int server_init(Server *s) {
1449 int n, r, fd;
1450
1451 assert(s);
1452
1453 zero(*s);
26687bf8 1454 s->sync_timer_fd = s->syslog_fd = s->native_fd = s->stdout_fd =
670b110c 1455 s->signal_fd = s->epoll_fd = s->dev_kmsg_fd = -1;
d025f1e4
ZJS
1456 s->compress = true;
1457 s->seal = true;
1458
26687bf8
OS
1459 s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1460 s->sync_scheduled = false;
1461
d025f1e4
ZJS
1462 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1463 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1464
1465 s->forward_to_syslog = true;
1466
1467 s->max_level_store = LOG_DEBUG;
1468 s->max_level_syslog = LOG_DEBUG;
1469 s->max_level_kmsg = LOG_NOTICE;
1470 s->max_level_console = LOG_INFO;
1471
1472 memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1473 memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1474
1475 server_parse_config_file(s);
1476 server_parse_proc_cmdline(s);
d288f79f
ZJS
1477 if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1478 log_debug("Setting both rate limit interval and burst from %llu,%u to 0,0",
1479 (long long unsigned) s->rate_limit_interval,
1480 s->rate_limit_burst);
1481 s->rate_limit_interval = s->rate_limit_burst = 0;
1482 }
d025f1e4
ZJS
1483
1484 mkdir_p("/run/systemd/journal", 0755);
1485
1486 s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func);
1487 if (!s->user_journals)
1488 return log_oom();
1489
1490 s->mmap = mmap_cache_new();
1491 if (!s->mmap)
1492 return log_oom();
1493
1494 s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
1495 if (s->epoll_fd < 0) {
1496 log_error("Failed to create epoll object: %m");
1497 return -errno;
1498 }
1499
1500 n = sd_listen_fds(true);
1501 if (n < 0) {
1502 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
1503 return n;
1504 }
1505
1506 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1507
1508 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1509
1510 if (s->native_fd >= 0) {
1511 log_error("Too many native sockets passed.");
1512 return -EINVAL;
1513 }
1514
1515 s->native_fd = fd;
1516
1517 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1518
1519 if (s->stdout_fd >= 0) {
1520 log_error("Too many stdout sockets passed.");
1521 return -EINVAL;
1522 }
1523
1524 s->stdout_fd = fd;
1525
1526 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) {
1527
1528 if (s->syslog_fd >= 0) {
1529 log_error("Too many /dev/log sockets passed.");
1530 return -EINVAL;
1531 }
1532
1533 s->syslog_fd = fd;
1534
1535 } else {
1536 log_error("Unknown socket passed.");
1537 return -EINVAL;
1538 }
1539 }
1540
1541 r = server_open_syslog_socket(s);
1542 if (r < 0)
1543 return r;
1544
1545 r = server_open_native_socket(s);
1546 if (r < 0)
1547 return r;
1548
1549 r = server_open_stdout_socket(s);
1550 if (r < 0)
1551 return r;
1552
1553 r = server_open_dev_kmsg(s);
1554 if (r < 0)
1555 return r;
1556
1557 r = server_open_kernel_seqnum(s);
1558 if (r < 0)
1559 return r;
1560
26687bf8
OS
1561 r = server_open_sync_timer(s);
1562 if (r < 0)
1563 return r;
1564
d025f1e4
ZJS
1565 r = open_signalfd(s);
1566 if (r < 0)
1567 return r;
1568
1569 s->udev = udev_new();
1570 if (!s->udev)
1571 return -ENOMEM;
1572
d288f79f
ZJS
1573 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval,
1574 s->rate_limit_burst);
d025f1e4
ZJS
1575 if (!s->rate_limit)
1576 return -ENOMEM;
1577
1578 r = system_journal_open(s);
1579 if (r < 0)
1580 return r;
1581
1582 return 0;
1583}
1584
1585void server_maybe_append_tags(Server *s) {
1586#ifdef HAVE_GCRYPT
1587 JournalFile *f;
1588 Iterator i;
1589 usec_t n;
1590
1591 n = now(CLOCK_REALTIME);
1592
1593 if (s->system_journal)
1594 journal_file_maybe_append_tag(s->system_journal, n);
1595
1596 HASHMAP_FOREACH(f, s->user_journals, i)
1597 journal_file_maybe_append_tag(f, n);
1598#endif
1599}
1600
1601void server_done(Server *s) {
1602 JournalFile *f;
1603 assert(s);
1604
1605 while (s->stdout_streams)
1606 stdout_stream_free(s->stdout_streams);
1607
1608 if (s->system_journal)
1609 journal_file_close(s->system_journal);
1610
1611 if (s->runtime_journal)
1612 journal_file_close(s->runtime_journal);
1613
1614 while ((f = hashmap_steal_first(s->user_journals)))
1615 journal_file_close(f);
1616
1617 hashmap_free(s->user_journals);
1618
1619 if (s->epoll_fd >= 0)
1620 close_nointr_nofail(s->epoll_fd);
1621
1622 if (s->signal_fd >= 0)
1623 close_nointr_nofail(s->signal_fd);
1624
1625 if (s->syslog_fd >= 0)
1626 close_nointr_nofail(s->syslog_fd);
1627
1628 if (s->native_fd >= 0)
1629 close_nointr_nofail(s->native_fd);
1630
1631 if (s->stdout_fd >= 0)
1632 close_nointr_nofail(s->stdout_fd);
1633
1634 if (s->dev_kmsg_fd >= 0)
1635 close_nointr_nofail(s->dev_kmsg_fd);
1636
26687bf8
OS
1637 if (s->sync_timer_fd >= 0)
1638 close_nointr_nofail(s->sync_timer_fd);
1639
d025f1e4
ZJS
1640 if (s->rate_limit)
1641 journal_rate_limit_free(s->rate_limit);
1642
1643 if (s->kernel_seqnum)
1644 munmap(s->kernel_seqnum, sizeof(uint64_t));
1645
1646 free(s->buffer);
1647 free(s->tty_path);
1648
1649 if (s->mmap)
1650 mmap_cache_unref(s->mmap);
1651
1652 if (s->udev)
1653 udev_unref(s->udev);
1654}