]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/journal/journald-server.c
Merge pull request #147 from poettering/cmsg
[thirdparty/systemd.git] / src / journal / journald-server.c
CommitLineData
d025f1e4
ZJS
1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3/***
4 This file is part of systemd.
5
6 Copyright 2011 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
22#include <sys/signalfd.h>
23#include <sys/ioctl.h>
24#include <linux/sockios.h>
25#include <sys/statvfs.h>
26#include <sys/mman.h>
27
24882e06
LP
28#ifdef HAVE_SELINUX
29#include <selinux/selinux.h>
30#endif
31
d025f1e4 32#include <libudev.h>
d025f1e4 33
74df0fca
LP
34#include "sd-journal.h"
35#include "sd-messages.h"
36#include "sd-daemon.h"
d025f1e4 37#include "mkdir.h"
c6878637 38#include "rm-rf.h"
d025f1e4
ZJS
39#include "hashmap.h"
40#include "journal-file.h"
41#include "socket-util.h"
42#include "cgroup-util.h"
d025f1e4
ZJS
43#include "missing.h"
44#include "conf-parser.h"
74df0fca 45#include "selinux-util.h"
958b66ea
LP
46#include "acl-util.h"
47#include "formats-util.h"
48#include "process-util.h"
49#include "hostname-util.h"
24882e06 50#include "signal-util.h"
d025f1e4
ZJS
51#include "journal-internal.h"
52#include "journal-vacuum.h"
53#include "journal-authenticate.h"
d025f1e4
ZJS
54#include "journald-rate-limit.h"
55#include "journald-kmsg.h"
56#include "journald-syslog.h"
57#include "journald-stream.h"
d025f1e4 58#include "journald-native.h"
875c2e22 59#include "journald-audit.h"
74df0fca 60#include "journald-server.h"
d025f1e4 61
d025f1e4
ZJS
62#define USER_JOURNALS_MAX 1024
63
26687bf8 64#define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
7f1ad696
LP
65#define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
66#define DEFAULT_RATE_LIMIT_BURST 1000
e150e820 67#define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
d025f1e4
ZJS
68
69#define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
70
2c5859af 71static const char* const storage_table[_STORAGE_MAX] = {
d025f1e4
ZJS
72 [STORAGE_AUTO] = "auto",
73 [STORAGE_VOLATILE] = "volatile",
74 [STORAGE_PERSISTENT] = "persistent",
75 [STORAGE_NONE] = "none"
76};
77
78DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
79DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
80
2c5859af
DM
81static const char* const split_mode_table[_SPLIT_MAX] = {
82 [SPLIT_LOGIN] = "login",
d025f1e4 83 [SPLIT_UID] = "uid",
2c5859af 84 [SPLIT_NONE] = "none",
d025f1e4
ZJS
85};
86
87DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
88DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
89
670b110c 90static uint64_t available_space(Server *s, bool verbose) {
db91ea32 91 char ids[33];
7fd1b19b 92 _cleanup_free_ char *p = NULL;
d025f1e4
ZJS
93 sd_id128_t machine;
94 struct statvfs ss;
670b110c 95 uint64_t sum = 0, ss_avail = 0, avail = 0;
d025f1e4 96 int r;
7fd1b19b 97 _cleanup_closedir_ DIR *d = NULL;
d025f1e4 98 usec_t ts;
670b110c 99 const char *f;
d025f1e4
ZJS
100 JournalMetrics *m;
101
102 ts = now(CLOCK_MONOTONIC);
103
670b110c
ZJS
104 if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts
105 && !verbose)
d025f1e4
ZJS
106 return s->cached_available_space;
107
108 r = sd_id128_get_machine(&machine);
109 if (r < 0)
110 return 0;
111
112 if (s->system_journal) {
113 f = "/var/log/journal/";
114 m = &s->system_metrics;
115 } else {
116 f = "/run/log/journal/";
117 m = &s->runtime_metrics;
118 }
119
120 assert(m);
121
122 p = strappend(f, sd_id128_to_string(machine, ids));
123 if (!p)
124 return 0;
125
126 d = opendir(p);
d025f1e4
ZJS
127 if (!d)
128 return 0;
129
130 if (fstatvfs(dirfd(d), &ss) < 0)
db91ea32 131 return 0;
d025f1e4
ZJS
132
133 for (;;) {
134 struct stat st;
135 struct dirent *de;
d025f1e4 136
0371ca0d
FW
137 errno = 0;
138 de = readdir(d);
139 if (!de && errno != 0)
140 return 0;
d025f1e4
ZJS
141
142 if (!de)
143 break;
144
145 if (!endswith(de->d_name, ".journal") &&
146 !endswith(de->d_name, ".journal~"))
147 continue;
148
149 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
150 continue;
151
152 if (!S_ISREG(st.st_mode))
153 continue;
154
155 sum += (uint64_t) st.st_blocks * 512UL;
156 }
157
d025f1e4
ZJS
158 ss_avail = ss.f_bsize * ss.f_bavail;
159
348ced90
ZJS
160 /* If we reached a high mark, we will always allow this much
161 * again, unless usage goes above max_use. This watermark
162 * value is cached so that we don't give up space on pressure,
163 * but hover below the maximum usage. */
164
165 if (m->use < sum)
166 m->use = sum;
167
168 avail = LESS_BY(ss_avail, m->keep_free);
169
170 s->cached_available_space = LESS_BY(MIN(m->max_use, avail), sum);
d025f1e4
ZJS
171 s->cached_available_space_timestamp = ts;
172
670b110c
ZJS
173 if (verbose) {
174 char fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
175 fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX];
176
177 server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
348ced90
ZJS
178 "%s journal is using %s (max allowed %s, "
179 "trying to leave %s free of %s available → current limit %s).",
670b110c
ZJS
180 s->system_journal ? "Permanent" : "Runtime",
181 format_bytes(fb1, sizeof(fb1), sum),
182 format_bytes(fb2, sizeof(fb2), m->max_use),
183 format_bytes(fb3, sizeof(fb3), m->keep_free),
184 format_bytes(fb4, sizeof(fb4), ss_avail),
348ced90 185 format_bytes(fb5, sizeof(fb5), s->cached_available_space + sum));
670b110c
ZJS
186 }
187
188 return s->cached_available_space;
d025f1e4
ZJS
189}
190
d025f1e4
ZJS
191void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
192 int r;
193#ifdef HAVE_ACL
194 acl_t acl;
195 acl_entry_t entry;
196 acl_permset_t permset;
197#endif
198
199 assert(f);
200
4608af43 201 r = fchmod(f->fd, 0640);
d025f1e4 202 if (r < 0)
da927ba9 203 log_warning_errno(r, "Failed to fix access mode on %s, ignoring: %m", f->path);
d025f1e4
ZJS
204
205#ifdef HAVE_ACL
34c10968 206 if (uid <= SYSTEM_UID_MAX)
d025f1e4
ZJS
207 return;
208
209 acl = acl_get_fd(f->fd);
210 if (!acl) {
56f64d95 211 log_warning_errno(errno, "Failed to read ACL on %s, ignoring: %m", f->path);
d025f1e4
ZJS
212 return;
213 }
214
215 r = acl_find_uid(acl, uid, &entry);
216 if (r <= 0) {
217
218 if (acl_create_entry(&acl, &entry) < 0 ||
219 acl_set_tag_type(entry, ACL_USER) < 0 ||
220 acl_set_qualifier(entry, &uid) < 0) {
56f64d95 221 log_warning_errno(errno, "Failed to patch ACL on %s, ignoring: %m", f->path);
d025f1e4
ZJS
222 goto finish;
223 }
224 }
225
23ad4dd8
JAS
226 /* We do not recalculate the mask unconditionally here,
227 * so that the fchmod() mask above stays intact. */
d025f1e4 228 if (acl_get_permset(entry, &permset) < 0 ||
23ad4dd8
JAS
229 acl_add_perm(permset, ACL_READ) < 0 ||
230 calc_acl_mask_if_needed(&acl) < 0) {
56f64d95 231 log_warning_errno(errno, "Failed to patch ACL on %s, ignoring: %m", f->path);
d025f1e4
ZJS
232 goto finish;
233 }
234
235 if (acl_set_fd(f->fd, acl) < 0)
56f64d95 236 log_warning_errno(errno, "Failed to set ACL on %s, ignoring: %m", f->path);
d025f1e4
ZJS
237
238finish:
239 acl_free(acl);
240#endif
241}
242
243static JournalFile* find_journal(Server *s, uid_t uid) {
ed375beb 244 _cleanup_free_ char *p = NULL;
d025f1e4
ZJS
245 int r;
246 JournalFile *f;
247 sd_id128_t machine;
248
249 assert(s);
250
251 /* We split up user logs only on /var, not on /run. If the
252 * runtime file is open, we write to it exclusively, in order
253 * to guarantee proper order as soon as we flush /run to
254 * /var and close the runtime file. */
255
256 if (s->runtime_journal)
257 return s->runtime_journal;
258
f7dc3ab9 259 if (uid <= SYSTEM_UID_MAX)
d025f1e4
ZJS
260 return s->system_journal;
261
262 r = sd_id128_get_machine(&machine);
263 if (r < 0)
264 return s->system_journal;
265
43cf8388 266 f = ordered_hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
d025f1e4
ZJS
267 if (f)
268 return f;
269
de0671ee
ZJS
270 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-"UID_FMT".journal",
271 SD_ID128_FORMAT_VAL(machine), uid) < 0)
d025f1e4
ZJS
272 return s->system_journal;
273
43cf8388 274 while (ordered_hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
d025f1e4 275 /* Too many open? Then let's close one */
43cf8388 276 f = ordered_hashmap_steal_first(s->user_journals);
d025f1e4
ZJS
277 assert(f);
278 journal_file_close(f);
279 }
280
cbd67177 281 r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &f);
d025f1e4
ZJS
282 if (r < 0)
283 return s->system_journal;
284
285 server_fix_perms(s, f, uid);
286
43cf8388 287 r = ordered_hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
d025f1e4
ZJS
288 if (r < 0) {
289 journal_file_close(f);
290 return s->system_journal;
291 }
292
293 return f;
294}
295
ea69bd41
LP
296static int do_rotate(
297 Server *s,
298 JournalFile **f,
299 const char* name,
300 bool seal,
301 uint32_t uid) {
302
fc55baee
ZJS
303 int r;
304 assert(s);
305
306 if (!*f)
307 return -EINVAL;
308
309 r = journal_file_rotate(f, s->compress, seal);
310 if (r < 0)
311 if (*f)
ea69bd41 312 log_error_errno(r, "Failed to rotate %s: %m", (*f)->path);
fc55baee 313 else
ea69bd41 314 log_error_errno(r, "Failed to create new %s journal: %m", name);
fc55baee
ZJS
315 else
316 server_fix_perms(s, *f, uid);
2678031a 317
fc55baee
ZJS
318 return r;
319}
320
d025f1e4
ZJS
321void server_rotate(Server *s) {
322 JournalFile *f;
323 void *k;
324 Iterator i;
325 int r;
326
327 log_debug("Rotating...");
328
fc55baee
ZJS
329 do_rotate(s, &s->runtime_journal, "runtime", false, 0);
330 do_rotate(s, &s->system_journal, "system", s->seal, 0);
d025f1e4 331
43cf8388 332 ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
fc55baee
ZJS
333 r = do_rotate(s, &f, "user", s->seal, PTR_TO_UINT32(k));
334 if (r >= 0)
43cf8388 335 ordered_hashmap_replace(s->user_journals, k, f);
fc55baee
ZJS
336 else if (!f)
337 /* Old file has been closed and deallocated */
43cf8388 338 ordered_hashmap_remove(s->user_journals, k);
d025f1e4
ZJS
339 }
340}
341
26687bf8
OS
342void server_sync(Server *s) {
343 JournalFile *f;
344 void *k;
345 Iterator i;
346 int r;
347
26687bf8
OS
348 if (s->system_journal) {
349 r = journal_file_set_offline(s->system_journal);
350 if (r < 0)
da927ba9 351 log_error_errno(r, "Failed to sync system journal: %m");
26687bf8
OS
352 }
353
43cf8388 354 ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
26687bf8
OS
355 r = journal_file_set_offline(f);
356 if (r < 0)
da927ba9 357 log_error_errno(r, "Failed to sync user journal: %m");
26687bf8
OS
358 }
359
f9a810be
LP
360 if (s->sync_event_source) {
361 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
362 if (r < 0)
da927ba9 363 log_error_errno(r, "Failed to disable sync timer source: %m");
f9a810be 364 }
26687bf8
OS
365
366 s->sync_scheduled = false;
367}
368
ea69bd41
LP
369static void do_vacuum(
370 Server *s,
371 const char *id,
372 JournalFile *f,
373 const char* path,
374 JournalMetrics *metrics) {
375
376 const char *p;
63c8666b
ZJS
377 int r;
378
379 if (!f)
380 return;
381
63c372cb 382 p = strjoina(path, id);
dbd2a83f 383 r = journal_directory_vacuum(p, metrics->max_use, s->max_retention_usec, &s->oldest_file_usec, false);
63c8666b 384 if (r < 0 && r != -ENOENT)
da927ba9 385 log_error_errno(r, "Failed to vacuum %s: %m", p);
63c8666b
ZJS
386}
387
d025f1e4 388void server_vacuum(Server *s) {
d025f1e4
ZJS
389 char ids[33];
390 sd_id128_t machine;
391 int r;
392
393 log_debug("Vacuuming...");
394
395 s->oldest_file_usec = 0;
396
397 r = sd_id128_get_machine(&machine);
398 if (r < 0) {
da927ba9 399 log_error_errno(r, "Failed to get machine ID: %m");
d025f1e4
ZJS
400 return;
401 }
d025f1e4
ZJS
402 sd_id128_to_string(machine, ids);
403
63c8666b
ZJS
404 do_vacuum(s, ids, s->system_journal, "/var/log/journal/", &s->system_metrics);
405 do_vacuum(s, ids, s->runtime_journal, "/run/log/journal/", &s->runtime_metrics);
d025f1e4
ZJS
406
407 s->cached_available_space_timestamp = 0;
408}
409
0c24bb23
LP
410static void server_cache_machine_id(Server *s) {
411 sd_id128_t id;
412 int r;
413
414 assert(s);
415
416 r = sd_id128_get_machine(&id);
417 if (r < 0)
418 return;
419
420 sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
421}
422
423static void server_cache_boot_id(Server *s) {
424 sd_id128_t id;
425 int r;
426
427 assert(s);
428
429 r = sd_id128_get_boot(&id);
430 if (r < 0)
431 return;
432
433 sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
434}
435
436static void server_cache_hostname(Server *s) {
437 _cleanup_free_ char *t = NULL;
438 char *x;
439
440 assert(s);
441
442 t = gethostname_malloc();
443 if (!t)
444 return;
445
446 x = strappend("_HOSTNAME=", t);
447 if (!x)
448 return;
449
450 free(s->hostname_field);
451 s->hostname_field = x;
452}
453
8531ae70 454static bool shall_try_append_again(JournalFile *f, int r) {
d025f1e4
ZJS
455
456 /* -E2BIG Hit configured limit
457 -EFBIG Hit fs limit
458 -EDQUOT Quota limit hit
459 -ENOSPC Disk full
fa6ac760 460 -EIO I/O error of some kind (mmap)
d025f1e4
ZJS
461 -EHOSTDOWN Other machine
462 -EBUSY Unclean shutdown
463 -EPROTONOSUPPORT Unsupported feature
464 -EBADMSG Corrupted
465 -ENODATA Truncated
2678031a
LP
466 -ESHUTDOWN Already archived
467 -EIDRM Journal file has been deleted */
d025f1e4
ZJS
468
469 if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
470 log_debug("%s: Allocation limit reached, rotating.", f->path);
471 else if (r == -EHOSTDOWN)
472 log_info("%s: Journal file from other machine, rotating.", f->path);
473 else if (r == -EBUSY)
474 log_info("%s: Unclean shutdown, rotating.", f->path);
475 else if (r == -EPROTONOSUPPORT)
476 log_info("%s: Unsupported feature, rotating.", f->path);
477 else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
478 log_warning("%s: Journal file corrupted, rotating.", f->path);
fa6ac760
LP
479 else if (r == -EIO)
480 log_warning("%s: IO error, rotating.", f->path);
2678031a
LP
481 else if (r == -EIDRM)
482 log_warning("%s: Journal file has been deleted, rotating.", f->path);
d025f1e4
ZJS
483 else
484 return false;
485
486 return true;
487}
488
d07f7b9e 489static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
d025f1e4
ZJS
490 JournalFile *f;
491 bool vacuumed = false;
492 int r;
493
494 assert(s);
495 assert(iovec);
496 assert(n > 0);
497
498 f = find_journal(s, uid);
499 if (!f)
500 return;
501
502 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
503 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
504 server_rotate(s);
505 server_vacuum(s);
506 vacuumed = true;
507
508 f = find_journal(s, uid);
509 if (!f)
510 return;
511 }
512
513 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
26687bf8 514 if (r >= 0) {
d07f7b9e 515 server_schedule_sync(s, priority);
d025f1e4 516 return;
26687bf8 517 }
d025f1e4
ZJS
518
519 if (vacuumed || !shall_try_append_again(f, r)) {
8266e1c0 520 log_error_errno(r, "Failed to write entry (%d items, %zu bytes), ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
d025f1e4
ZJS
521 return;
522 }
523
524 server_rotate(s);
525 server_vacuum(s);
526
527 f = find_journal(s, uid);
528 if (!f)
529 return;
530
531 log_debug("Retrying write.");
532 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
8266e1c0
LP
533 if (r < 0)
534 log_error_errno(r, "Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
535 else
d07f7b9e 536 server_schedule_sync(s, priority);
d025f1e4
ZJS
537}
538
539static void dispatch_message_real(
540 Server *s,
541 struct iovec *iovec, unsigned n, unsigned m,
3b3154df
LP
542 const struct ucred *ucred,
543 const struct timeval *tv,
d025f1e4 544 const char *label, size_t label_len,
968f3196 545 const char *unit_id,
d07f7b9e 546 int priority,
968f3196 547 pid_t object_pid) {
d025f1e4 548
968f3196 549 char pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
ae018d9b
LP
550 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
551 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
552 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
d3789917 553 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
968f3196
ZJS
554 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
555 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
556 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
557 uid_t object_uid;
558 gid_t object_gid;
968f3196 559 char *x;
d025f1e4 560 int r;
ae018d9b 561 char *t, *c;
82499507
LP
562 uid_t realuid = 0, owner = 0, journal_uid;
563 bool owner_valid = false;
ae018d9b 564#ifdef HAVE_AUDIT
968f3196
ZJS
565 char audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
566 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
567 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
568 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
ae018d9b
LP
569
570 uint32_t audit;
571 uid_t loginuid;
572#endif
d025f1e4
ZJS
573
574 assert(s);
575 assert(iovec);
576 assert(n > 0);
968f3196 577 assert(n + N_IOVEC_META_FIELDS + (object_pid ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
d025f1e4
ZJS
578
579 if (ucred) {
d025f1e4
ZJS
580 realuid = ucred->uid;
581
de0671ee 582 sprintf(pid, "_PID="PID_FMT, ucred->pid);
c2457105 583 IOVEC_SET_STRING(iovec[n++], pid);
d025f1e4 584
de0671ee 585 sprintf(uid, "_UID="UID_FMT, ucred->uid);
c2457105 586 IOVEC_SET_STRING(iovec[n++], uid);
d025f1e4 587
de0671ee 588 sprintf(gid, "_GID="GID_FMT, ucred->gid);
c2457105 589 IOVEC_SET_STRING(iovec[n++], gid);
d025f1e4
ZJS
590
591 r = get_process_comm(ucred->pid, &t);
592 if (r >= 0) {
63c372cb 593 x = strjoina("_COMM=", t);
d025f1e4 594 free(t);
968f3196 595 IOVEC_SET_STRING(iovec[n++], x);
d025f1e4
ZJS
596 }
597
598 r = get_process_exe(ucred->pid, &t);
599 if (r >= 0) {
63c372cb 600 x = strjoina("_EXE=", t);
d025f1e4 601 free(t);
968f3196 602 IOVEC_SET_STRING(iovec[n++], x);
d025f1e4
ZJS
603 }
604
9bdbc2e2 605 r = get_process_cmdline(ucred->pid, 0, false, &t);
d025f1e4 606 if (r >= 0) {
63c372cb 607 x = strjoina("_CMDLINE=", t);
d025f1e4 608 free(t);
3a832116
SL
609 IOVEC_SET_STRING(iovec[n++], x);
610 }
611
612 r = get_process_capeff(ucred->pid, &t);
613 if (r >= 0) {
63c372cb 614 x = strjoina("_CAP_EFFECTIVE=", t);
3a832116 615 free(t);
968f3196 616 IOVEC_SET_STRING(iovec[n++], x);
d025f1e4
ZJS
617 }
618
0a20e3c1 619#ifdef HAVE_AUDIT
d025f1e4 620 r = audit_session_from_pid(ucred->pid, &audit);
ae018d9b 621 if (r >= 0) {
de0671ee 622 sprintf(audit_session, "_AUDIT_SESSION=%"PRIu32, audit);
ae018d9b
LP
623 IOVEC_SET_STRING(iovec[n++], audit_session);
624 }
d025f1e4
ZJS
625
626 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
7027ff61 627 if (r >= 0) {
de0671ee 628 sprintf(audit_loginuid, "_AUDIT_LOGINUID="UID_FMT, loginuid);
ae018d9b 629 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
d025f1e4 630 }
ae018d9b 631#endif
d025f1e4 632
e9174f29 633 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &c);
7027ff61 634 if (r >= 0) {
968f3196
ZJS
635 char *session = NULL;
636
63c372cb 637 x = strjoina("_SYSTEMD_CGROUP=", c);
968f3196 638 IOVEC_SET_STRING(iovec[n++], x);
d025f1e4 639
ae018d9b
LP
640 r = cg_path_get_session(c, &t);
641 if (r >= 0) {
63c372cb 642 session = strjoina("_SYSTEMD_SESSION=", t);
ae018d9b 643 free(t);
d025f1e4 644 IOVEC_SET_STRING(iovec[n++], session);
ae018d9b
LP
645 }
646
647 if (cg_path_get_owner_uid(c, &owner) >= 0) {
648 owner_valid = true;
d025f1e4 649
de0671ee 650 sprintf(owner_uid, "_SYSTEMD_OWNER_UID="UID_FMT, owner);
d025f1e4 651 IOVEC_SET_STRING(iovec[n++], owner_uid);
ae018d9b 652 }
d025f1e4 653
ae018d9b 654 if (cg_path_get_unit(c, &t) >= 0) {
63c372cb 655 x = strjoina("_SYSTEMD_UNIT=", t);
ae018d9b 656 free(t);
19cace37
LP
657 IOVEC_SET_STRING(iovec[n++], x);
658 } else if (unit_id && !session) {
63c372cb 659 x = strjoina("_SYSTEMD_UNIT=", unit_id);
19cace37
LP
660 IOVEC_SET_STRING(iovec[n++], x);
661 }
662
663 if (cg_path_get_user_unit(c, &t) >= 0) {
63c372cb 664 x = strjoina("_SYSTEMD_USER_UNIT=", t);
ae018d9b 665 free(t);
968f3196 666 IOVEC_SET_STRING(iovec[n++], x);
19cace37 667 } else if (unit_id && session) {
63c372cb 668 x = strjoina("_SYSTEMD_USER_UNIT=", unit_id);
19cace37
LP
669 IOVEC_SET_STRING(iovec[n++], x);
670 }
ae018d9b 671
0a244b8e 672 if (cg_path_get_slice(c, &t) >= 0) {
63c372cb 673 x = strjoina("_SYSTEMD_SLICE=", t);
0a244b8e
LP
674 free(t);
675 IOVEC_SET_STRING(iovec[n++], x);
676 }
677
ae018d9b 678 free(c);
2d43b190 679 } else if (unit_id) {
63c372cb 680 x = strjoina("_SYSTEMD_UNIT=", unit_id);
2d43b190 681 IOVEC_SET_STRING(iovec[n++], x);
ef1673d1 682 }
d025f1e4 683
d025f1e4 684#ifdef HAVE_SELINUX
6baa7db0 685 if (mac_selinux_use()) {
d682b3a7 686 if (label) {
f8294e41 687 x = alloca(strlen("_SELINUX_CONTEXT=") + label_len + 1);
ae018d9b 688
d682b3a7
LP
689 *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
690 IOVEC_SET_STRING(iovec[n++], x);
691 } else {
692 security_context_t con;
d025f1e4 693
d682b3a7 694 if (getpidcon(ucred->pid, &con) >= 0) {
63c372cb 695 x = strjoina("_SELINUX_CONTEXT=", con);
e7ff4e7f 696
d682b3a7
LP
697 freecon(con);
698 IOVEC_SET_STRING(iovec[n++], x);
699 }
d025f1e4
ZJS
700 }
701 }
702#endif
703 }
968f3196
ZJS
704 assert(n <= m);
705
706 if (object_pid) {
707 r = get_process_uid(object_pid, &object_uid);
708 if (r >= 0) {
de0671ee 709 sprintf(o_uid, "OBJECT_UID="UID_FMT, object_uid);
968f3196
ZJS
710 IOVEC_SET_STRING(iovec[n++], o_uid);
711 }
712
713 r = get_process_gid(object_pid, &object_gid);
714 if (r >= 0) {
de0671ee 715 sprintf(o_gid, "OBJECT_GID="GID_FMT, object_gid);
968f3196
ZJS
716 IOVEC_SET_STRING(iovec[n++], o_gid);
717 }
718
719 r = get_process_comm(object_pid, &t);
720 if (r >= 0) {
63c372cb 721 x = strjoina("OBJECT_COMM=", t);
968f3196
ZJS
722 free(t);
723 IOVEC_SET_STRING(iovec[n++], x);
724 }
725
726 r = get_process_exe(object_pid, &t);
727 if (r >= 0) {
63c372cb 728 x = strjoina("OBJECT_EXE=", t);
968f3196
ZJS
729 free(t);
730 IOVEC_SET_STRING(iovec[n++], x);
731 }
732
733 r = get_process_cmdline(object_pid, 0, false, &t);
734 if (r >= 0) {
63c372cb 735 x = strjoina("OBJECT_CMDLINE=", t);
968f3196
ZJS
736 free(t);
737 IOVEC_SET_STRING(iovec[n++], x);
738 }
739
740#ifdef HAVE_AUDIT
741 r = audit_session_from_pid(object_pid, &audit);
742 if (r >= 0) {
de0671ee 743 sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%"PRIu32, audit);
968f3196
ZJS
744 IOVEC_SET_STRING(iovec[n++], o_audit_session);
745 }
746
747 r = audit_loginuid_from_pid(object_pid, &loginuid);
748 if (r >= 0) {
de0671ee 749 sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID="UID_FMT, loginuid);
968f3196
ZJS
750 IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
751 }
752#endif
753
e9174f29 754 r = cg_pid_get_path_shifted(object_pid, s->cgroup_root, &c);
968f3196 755 if (r >= 0) {
63c372cb 756 x = strjoina("OBJECT_SYSTEMD_CGROUP=", c);
968f3196
ZJS
757 IOVEC_SET_STRING(iovec[n++], x);
758
759 r = cg_path_get_session(c, &t);
760 if (r >= 0) {
63c372cb 761 x = strjoina("OBJECT_SYSTEMD_SESSION=", t);
968f3196
ZJS
762 free(t);
763 IOVEC_SET_STRING(iovec[n++], x);
764 }
765
766 if (cg_path_get_owner_uid(c, &owner) >= 0) {
de0671ee 767 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID="UID_FMT, owner);
968f3196
ZJS
768 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
769 }
770
771 if (cg_path_get_unit(c, &t) >= 0) {
63c372cb 772 x = strjoina("OBJECT_SYSTEMD_UNIT=", t);
968f3196 773 free(t);
19cace37
LP
774 IOVEC_SET_STRING(iovec[n++], x);
775 }
776
777 if (cg_path_get_user_unit(c, &t) >= 0) {
63c372cb 778 x = strjoina("OBJECT_SYSTEMD_USER_UNIT=", t);
968f3196 779 free(t);
968f3196 780 IOVEC_SET_STRING(iovec[n++], x);
19cace37 781 }
968f3196
ZJS
782
783 free(c);
784 }
785 }
786 assert(n <= m);
d025f1e4
ZJS
787
788 if (tv) {
ae018d9b 789 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv));
a5693989 790 IOVEC_SET_STRING(iovec[n++], source_time);
d025f1e4
ZJS
791 }
792
793 /* Note that strictly speaking storing the boot id here is
794 * redundant since the entry includes this in-line
795 * anyway. However, we need this indexed, too. */
0c24bb23
LP
796 if (!isempty(s->boot_id_field))
797 IOVEC_SET_STRING(iovec[n++], s->boot_id_field);
d025f1e4 798
0c24bb23
LP
799 if (!isempty(s->machine_id_field))
800 IOVEC_SET_STRING(iovec[n++], s->machine_id_field);
d025f1e4 801
0c24bb23
LP
802 if (!isempty(s->hostname_field))
803 IOVEC_SET_STRING(iovec[n++], s->hostname_field);
d025f1e4
ZJS
804
805 assert(n <= m);
806
da499392 807 if (s->split_mode == SPLIT_UID && realuid > 0)
40adcda8 808 /* Split up strictly by any UID */
759c945a 809 journal_uid = realuid;
82499507 810 else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
edc3797f
LP
811 /* Split up by login UIDs. We do this only if the
812 * realuid is not root, in order not to accidentally
813 * leak privileged information to the user that is
814 * logged by a privileged process that is part of an
7517e174 815 * unprivileged session. */
8a0889df 816 journal_uid = owner;
da499392
KS
817 else
818 journal_uid = 0;
759c945a 819
d07f7b9e 820 write_to_journal(s, journal_uid, iovec, n, priority);
d025f1e4
ZJS
821}
822
823void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
824 char mid[11 + 32 + 1];
825 char buffer[16 + LINE_MAX + 1];
826 struct iovec iovec[N_IOVEC_META_FIELDS + 4];
827 int n = 0;
828 va_list ap;
b92bea5d 829 struct ucred ucred = {};
d025f1e4
ZJS
830
831 assert(s);
832 assert(format);
833
834 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
835 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
836
837 memcpy(buffer, "MESSAGE=", 8);
838 va_start(ap, format);
839 vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
840 va_end(ap);
d025f1e4
ZJS
841 IOVEC_SET_STRING(iovec[n++], buffer);
842
843 if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
e2cc6eca 844 snprintf(mid, sizeof(mid), LOG_MESSAGE_ID(message_id));
d025f1e4
ZJS
845 IOVEC_SET_STRING(iovec[n++], mid);
846 }
847
d025f1e4
ZJS
848 ucred.pid = getpid();
849 ucred.uid = getuid();
850 ucred.gid = getgid();
851
d07f7b9e 852 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
d025f1e4
ZJS
853}
854
855void server_dispatch_message(
856 Server *s,
857 struct iovec *iovec, unsigned n, unsigned m,
3b3154df
LP
858 const struct ucred *ucred,
859 const struct timeval *tv,
d025f1e4
ZJS
860 const char *label, size_t label_len,
861 const char *unit_id,
968f3196
ZJS
862 int priority,
863 pid_t object_pid) {
d025f1e4 864
7027ff61 865 int rl, r;
7fd1b19b 866 _cleanup_free_ char *path = NULL;
db91ea32 867 char *c;
d025f1e4
ZJS
868
869 assert(s);
870 assert(iovec || n == 0);
871
872 if (n == 0)
873 return;
874
875 if (LOG_PRI(priority) > s->max_level_store)
876 return;
877
2f5df74a
HHPF
878 /* Stop early in case the information will not be stored
879 * in a journal. */
880 if (s->storage == STORAGE_NONE)
881 return;
882
d025f1e4
ZJS
883 if (!ucred)
884 goto finish;
885
e9174f29 886 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &path);
7027ff61 887 if (r < 0)
d025f1e4
ZJS
888 goto finish;
889
890 /* example: /user/lennart/3/foobar
891 * /system/dbus.service/foobar
892 *
893 * So let's cut of everything past the third /, since that is
894 * where user directories start */
895
896 c = strchr(path, '/');
897 if (c) {
898 c = strchr(c+1, '/');
899 if (c) {
900 c = strchr(c+1, '/');
901 if (c)
902 *c = 0;
903 }
904 }
905
db91ea32 906 rl = journal_rate_limit_test(s->rate_limit, path,
670b110c 907 priority & LOG_PRIMASK, available_space(s, false));
d025f1e4 908
db91ea32 909 if (rl == 0)
d025f1e4 910 return;
d025f1e4
ZJS
911
912 /* Write a suppression message if we suppressed something */
913 if (rl > 1)
db91ea32
ZJS
914 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
915 "Suppressed %u messages from %s", rl - 1, path);
d025f1e4
ZJS
916
917finish:
d07f7b9e 918 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, priority, object_pid);
d025f1e4
ZJS
919}
920
921
caa2f4c0 922static int system_journal_open(Server *s, bool flush_requested) {
d025f1e4
ZJS
923 int r;
924 char *fn;
925 sd_id128_t machine;
926 char ids[33];
927
928 r = sd_id128_get_machine(&machine);
23bbb0de
MS
929 if (r < 0)
930 return log_error_errno(r, "Failed to get machine id: %m");
d025f1e4
ZJS
931
932 sd_id128_to_string(machine, ids);
933
934 if (!s->system_journal &&
935 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
caa2f4c0
ZJS
936 (flush_requested
937 || access("/run/systemd/journal/flushed", F_OK) >= 0)) {
d025f1e4
ZJS
938
939 /* If in auto mode: first try to create the machine
940 * path, but not the prefix.
941 *
942 * If in persistent mode: create /var/log/journal and
943 * the machine path */
944
945 if (s->storage == STORAGE_PERSISTENT)
946 (void) mkdir("/var/log/journal/", 0755);
947
63c372cb 948 fn = strjoina("/var/log/journal/", ids);
d025f1e4 949 (void) mkdir(fn, 0755);
d025f1e4 950
63c372cb 951 fn = strjoina(fn, "/system.journal");
d025f1e4 952 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
d025f1e4 953
670b110c 954 if (r >= 0)
d025f1e4 955 server_fix_perms(s, s->system_journal, 0);
433dd100
LN
956 else if (r < 0) {
957 if (r != -ENOENT && r != -EROFS)
da927ba9 958 log_warning_errno(r, "Failed to open system journal: %m");
e40ec7ae 959
433dd100
LN
960 r = 0;
961 }
d025f1e4
ZJS
962 }
963
964 if (!s->runtime_journal &&
965 (s->storage != STORAGE_NONE)) {
966
967 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
968 if (!fn)
969 return -ENOMEM;
970
971 if (s->system_journal) {
972
973 /* Try to open the runtime journal, but only
974 * if it already exists, so that we can flush
975 * it into the system journal */
976
977 r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
978 free(fn);
979
980 if (r < 0) {
981 if (r != -ENOENT)
da927ba9 982 log_warning_errno(r, "Failed to open runtime journal: %m");
d025f1e4
ZJS
983
984 r = 0;
985 }
986
987 } else {
988
989 /* OK, we really need the runtime journal, so create
990 * it if necessary. */
991
fc1d70af
LP
992 (void) mkdir("/run/log", 0755);
993 (void) mkdir("/run/log/journal", 0755);
994 (void) mkdir_parents(fn, 0750);
995
d025f1e4
ZJS
996 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
997 free(fn);
998
23bbb0de
MS
999 if (r < 0)
1000 return log_error_errno(r, "Failed to open runtime journal: %m");
d025f1e4
ZJS
1001 }
1002
670b110c 1003 if (s->runtime_journal)
d025f1e4 1004 server_fix_perms(s, s->runtime_journal, 0);
d025f1e4
ZJS
1005 }
1006
670b110c
ZJS
1007 available_space(s, true);
1008
d025f1e4
ZJS
1009 return r;
1010}
1011
1012int server_flush_to_var(Server *s) {
d025f1e4
ZJS
1013 sd_id128_t machine;
1014 sd_journal *j = NULL;
fbb63411
LP
1015 char ts[FORMAT_TIMESPAN_MAX];
1016 usec_t start;
1017 unsigned n = 0;
1018 int r;
d025f1e4
ZJS
1019
1020 assert(s);
1021
1022 if (s->storage != STORAGE_AUTO &&
1023 s->storage != STORAGE_PERSISTENT)
1024 return 0;
1025
1026 if (!s->runtime_journal)
1027 return 0;
1028
caa2f4c0 1029 system_journal_open(s, true);
d025f1e4
ZJS
1030
1031 if (!s->system_journal)
1032 return 0;
1033
1034 log_debug("Flushing to /var...");
1035
fbb63411
LP
1036 start = now(CLOCK_MONOTONIC);
1037
d025f1e4 1038 r = sd_id128_get_machine(&machine);
00a16861 1039 if (r < 0)
d025f1e4 1040 return r;
d025f1e4
ZJS
1041
1042 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
23bbb0de
MS
1043 if (r < 0)
1044 return log_error_errno(r, "Failed to read runtime journal: %m");
d025f1e4 1045
93b73b06
LP
1046 sd_journal_set_data_threshold(j, 0);
1047
d025f1e4
ZJS
1048 SD_JOURNAL_FOREACH(j) {
1049 Object *o = NULL;
1050 JournalFile *f;
1051
1052 f = j->current_file;
1053 assert(f && f->current_offset > 0);
1054
fbb63411
LP
1055 n++;
1056
d025f1e4
ZJS
1057 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1058 if (r < 0) {
da927ba9 1059 log_error_errno(r, "Can't read entry: %m");
d025f1e4
ZJS
1060 goto finish;
1061 }
1062
1063 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1064 if (r >= 0)
1065 continue;
1066
1067 if (!shall_try_append_again(s->system_journal, r)) {
da927ba9 1068 log_error_errno(r, "Can't write entry: %m");
d025f1e4
ZJS
1069 goto finish;
1070 }
1071
1072 server_rotate(s);
1073 server_vacuum(s);
1074
253f59df
LP
1075 if (!s->system_journal) {
1076 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1077 r = -EIO;
1078 goto finish;
1079 }
1080
d025f1e4
ZJS
1081 log_debug("Retrying write.");
1082 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1083 if (r < 0) {
da927ba9 1084 log_error_errno(r, "Can't write entry: %m");
d025f1e4
ZJS
1085 goto finish;
1086 }
1087 }
1088
1089finish:
1090 journal_file_post_change(s->system_journal);
1091
1092 journal_file_close(s->runtime_journal);
1093 s->runtime_journal = NULL;
1094
1095 if (r >= 0)
c6878637 1096 (void) rm_rf("/run/log/journal", REMOVE_ROOT);
d025f1e4 1097
763c7aa2 1098 sd_journal_close(j);
d025f1e4 1099
fbb63411
LP
1100 server_driver_message(s, SD_ID128_NULL, "Time spent on flushing to /var is %s for %u entries.", format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0), n);
1101
d025f1e4
ZJS
1102 return r;
1103}
1104
8531ae70 1105int server_process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
f9a810be
LP
1106 Server *s = userdata;
1107
d025f1e4 1108 assert(s);
875c2e22 1109 assert(fd == s->native_fd || fd == s->syslog_fd || fd == s->audit_fd);
f9a810be
LP
1110
1111 if (revents != EPOLLIN) {
1112 log_error("Got invalid event from epoll for datagram fd: %"PRIx32, revents);
1113 return -EIO;
1114 }
1115
1116 for (;;) {
1117 struct ucred *ucred = NULL;
1118 struct timeval *tv = NULL;
1119 struct cmsghdr *cmsg;
1120 char *label = NULL;
1121 size_t label_len = 0;
1122 struct iovec iovec;
1123
1124 union {
1125 struct cmsghdr cmsghdr;
1126
bdd13f6b
ZJS
1127 /* We use NAME_MAX space for the SELinux label
1128 * here. The kernel currently enforces no
1129 * limit, but according to suggestions from
1130 * the SELinux people this will change and it
1131 * will probably be identical to NAME_MAX. For
1132 * now we use that, but this should be updated
7517e174 1133 * one day when the final limit is known. */
f9a810be
LP
1134 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1135 CMSG_SPACE(sizeof(struct timeval)) +
1136 CMSG_SPACE(sizeof(int)) + /* fd */
1137 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1138 } control = {};
875c2e22 1139 union sockaddr_union sa = {};
f9a810be
LP
1140 struct msghdr msghdr = {
1141 .msg_iov = &iovec,
1142 .msg_iovlen = 1,
1143 .msg_control = &control,
1144 .msg_controllen = sizeof(control),
875c2e22
LP
1145 .msg_name = &sa,
1146 .msg_namelen = sizeof(sa),
f9a810be 1147 };
d025f1e4 1148
d025f1e4 1149 ssize_t n;
f9a810be
LP
1150 int *fds = NULL;
1151 unsigned n_fds = 0;
875c2e22
LP
1152 int v = 0;
1153 size_t m;
d025f1e4 1154
875c2e22
LP
1155 /* Try to get the right size, if we can. (Not all
1156 * sockets support SIOCINQ, hence we just try, but
1157 * don't rely on it. */
1158 (void) ioctl(fd, SIOCINQ, &v);
1159
7517e174 1160 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
875c2e22
LP
1161 m = PAGE_ALIGN(MAX3((size_t) v + 1,
1162 (size_t) LINE_MAX,
1163 ALIGN(sizeof(struct nlmsghdr)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH)) + 1);
d025f1e4 1164
875c2e22 1165 if (!GREEDY_REALLOC(s->buffer, s->buffer_size, m))
f9a810be 1166 return log_oom();
d025f1e4 1167
f9a810be 1168 iovec.iov_base = s->buffer;
875c2e22 1169 iovec.iov_len = s->buffer_size - 1; /* Leave room for trailing NUL we add later */
d025f1e4 1170
f9a810be
LP
1171 n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1172 if (n < 0) {
d025f1e4 1173 if (errno == EINTR || errno == EAGAIN)
f9a810be 1174 return 0;
d025f1e4 1175
56f64d95 1176 log_error_errno(errno, "recvmsg() failed: %m");
d025f1e4
ZJS
1177 return -errno;
1178 }
1179
2a1288ff 1180 CMSG_FOREACH(cmsg, &msghdr) {
f9a810be
LP
1181
1182 if (cmsg->cmsg_level == SOL_SOCKET &&
1183 cmsg->cmsg_type == SCM_CREDENTIALS &&
1184 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1185 ucred = (struct ucred*) CMSG_DATA(cmsg);
1186 else if (cmsg->cmsg_level == SOL_SOCKET &&
1187 cmsg->cmsg_type == SCM_SECURITY) {
1188 label = (char*) CMSG_DATA(cmsg);
1189 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1190 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1191 cmsg->cmsg_type == SO_TIMESTAMP &&
1192 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1193 tv = (struct timeval*) CMSG_DATA(cmsg);
1194 else if (cmsg->cmsg_level == SOL_SOCKET &&
1195 cmsg->cmsg_type == SCM_RIGHTS) {
1196 fds = (int*) CMSG_DATA(cmsg);
1197 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1198 }
d025f1e4
ZJS
1199 }
1200
875c2e22
LP
1201 /* And a trailing NUL, just in case */
1202 s->buffer[n] = 0;
1203
f9a810be 1204 if (fd == s->syslog_fd) {
875c2e22 1205 if (n > 0 && n_fds == 0)
f9a810be 1206 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
875c2e22 1207 else if (n_fds > 0)
f9a810be 1208 log_warning("Got file descriptors via syslog socket. Ignoring.");
d025f1e4 1209
875c2e22 1210 } else if (fd == s->native_fd) {
f9a810be
LP
1211 if (n > 0 && n_fds == 0)
1212 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1213 else if (n == 0 && n_fds == 1)
1214 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1215 else if (n_fds > 0)
1216 log_warning("Got too many file descriptors via native socket. Ignoring.");
875c2e22
LP
1217
1218 } else {
1219 assert(fd == s->audit_fd);
1220
1221 if (n > 0 && n_fds == 0)
0b97208d 1222 server_process_audit_message(s, s->buffer, n, ucred, &sa, msghdr.msg_namelen);
875c2e22
LP
1223 else if (n_fds > 0)
1224 log_warning("Got file descriptors via audit socket. Ignoring.");
d025f1e4
ZJS
1225 }
1226
f9a810be
LP
1227 close_many(fds, n_fds);
1228 }
f9a810be 1229}
d025f1e4 1230
f9a810be
LP
1231static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1232 Server *s = userdata;
d025f1e4 1233
f9a810be 1234 assert(s);
d025f1e4 1235
f9a810be 1236 log_info("Received request to flush runtime journal from PID %"PRIu32, si->ssi_pid);
d025f1e4 1237
f9a810be
LP
1238 server_flush_to_var(s);
1239 server_sync(s);
3bfd4e0c 1240 server_vacuum(s);
d025f1e4 1241
74055aa7
LP
1242 touch("/run/systemd/journal/flushed");
1243
f9a810be
LP
1244 return 0;
1245}
d025f1e4 1246
f9a810be
LP
1247static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1248 Server *s = userdata;
d025f1e4 1249
f9a810be 1250 assert(s);
d025f1e4 1251
f9a810be
LP
1252 log_info("Received request to rotate journal from PID %"PRIu32, si->ssi_pid);
1253 server_rotate(s);
1254 server_vacuum(s);
d025f1e4 1255
f9a810be
LP
1256 return 0;
1257}
d025f1e4 1258
f9a810be
LP
1259static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1260 Server *s = userdata;
d025f1e4 1261
f9a810be 1262 assert(s);
d025f1e4 1263
4daf54a8 1264 log_received_signal(LOG_INFO, si);
d025f1e4 1265
6203e07a 1266 sd_event_exit(s->event, 0);
d025f1e4
ZJS
1267 return 0;
1268}
1269
f9a810be 1270static int setup_signals(Server *s) {
d025f1e4 1271 sigset_t mask;
f9a810be 1272 int r;
d025f1e4
ZJS
1273
1274 assert(s);
1275
1276 assert_se(sigemptyset(&mask) == 0);
1277 sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1278 assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1279
151b9b96 1280 r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
f9a810be
LP
1281 if (r < 0)
1282 return r;
1283
151b9b96 1284 r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s);
f9a810be
LP
1285 if (r < 0)
1286 return r;
d025f1e4 1287
151b9b96 1288 r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s);
f9a810be
LP
1289 if (r < 0)
1290 return r;
d025f1e4 1291
151b9b96 1292 r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s);
f9a810be
LP
1293 if (r < 0)
1294 return r;
d025f1e4
ZJS
1295
1296 return 0;
1297}
1298
1299static int server_parse_proc_cmdline(Server *s) {
7fd1b19b 1300 _cleanup_free_ char *line = NULL;
a2a5291b 1301 const char *w, *state;
d025f1e4 1302 size_t l;
74df0fca 1303 int r;
d025f1e4 1304
74df0fca 1305 r = proc_cmdline(&line);
b5884878 1306 if (r < 0) {
da927ba9 1307 log_warning_errno(r, "Failed to read /proc/cmdline, ignoring: %m");
d025f1e4 1308 return 0;
b5884878 1309 }
d025f1e4
ZJS
1310
1311 FOREACH_WORD_QUOTED(w, l, line, state) {
7fd1b19b 1312 _cleanup_free_ char *word;
d025f1e4
ZJS
1313
1314 word = strndup(w, l);
db91ea32
ZJS
1315 if (!word)
1316 return -ENOMEM;
d025f1e4
ZJS
1317
1318 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1319 r = parse_boolean(word + 35);
1320 if (r < 0)
1321 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1322 else
1323 s->forward_to_syslog = r;
1324 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1325 r = parse_boolean(word + 33);
1326 if (r < 0)
1327 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1328 else
1329 s->forward_to_kmsg = r;
1330 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1331 r = parse_boolean(word + 36);
1332 if (r < 0)
1333 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1334 else
1335 s->forward_to_console = r;
40b71e89
ST
1336 } else if (startswith(word, "systemd.journald.forward_to_wall=")) {
1337 r = parse_boolean(word + 33);
1338 if (r < 0)
1339 log_warning("Failed to parse forward to wall switch %s. Ignoring.", word + 33);
1340 else
1341 s->forward_to_wall = r;
d025f1e4
ZJS
1342 } else if (startswith(word, "systemd.journald"))
1343 log_warning("Invalid systemd.journald parameter. Ignoring.");
d025f1e4 1344 }
b2fadec6 1345 /* do not warn about state here, since probably systemd already did */
d025f1e4 1346
db91ea32 1347 return 0;
d025f1e4
ZJS
1348}
1349
1350static int server_parse_config_file(Server *s) {
d025f1e4
ZJS
1351 assert(s);
1352
a9edaeff
JT
1353 return config_parse_many("/etc/systemd/journald.conf",
1354 CONF_DIRS_NULSTR("systemd/journald.conf"),
1355 "Journal\0",
1356 config_item_perf_lookup, journald_gperf_lookup,
1357 false, s);
d025f1e4
ZJS
1358}
1359
f9a810be
LP
1360static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
1361 Server *s = userdata;
26687bf8
OS
1362
1363 assert(s);
1364
f9a810be 1365 server_sync(s);
26687bf8
OS
1366 return 0;
1367}
1368
d07f7b9e 1369int server_schedule_sync(Server *s, int priority) {
26687bf8
OS
1370 int r;
1371
26687bf8
OS
1372 assert(s);
1373
d07f7b9e
LP
1374 if (priority <= LOG_CRIT) {
1375 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1376 server_sync(s);
1377 return 0;
1378 }
1379
26687bf8
OS
1380 if (s->sync_scheduled)
1381 return 0;
1382
f9a810be
LP
1383 if (s->sync_interval_usec > 0) {
1384 usec_t when;
ca267016 1385
6a0f1f6d 1386 r = sd_event_now(s->event, CLOCK_MONOTONIC, &when);
f9a810be
LP
1387 if (r < 0)
1388 return r;
26687bf8 1389
f9a810be
LP
1390 when += s->sync_interval_usec;
1391
1392 if (!s->sync_event_source) {
6a0f1f6d
LP
1393 r = sd_event_add_time(
1394 s->event,
1395 &s->sync_event_source,
1396 CLOCK_MONOTONIC,
1397 when, 0,
1398 server_dispatch_sync, s);
f9a810be
LP
1399 if (r < 0)
1400 return r;
1401
1402 r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
1403 } else {
1404 r = sd_event_source_set_time(s->sync_event_source, when);
1405 if (r < 0)
1406 return r;
1407
1408 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
1409 }
26687bf8 1410 if (r < 0)
f9a810be 1411 return r;
26687bf8 1412
f9a810be
LP
1413 s->sync_scheduled = true;
1414 }
26687bf8
OS
1415
1416 return 0;
1417}
1418
0c24bb23
LP
1419static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1420 Server *s = userdata;
1421
1422 assert(s);
1423
1424 server_cache_hostname(s);
1425 return 0;
1426}
1427
1428static int server_open_hostname(Server *s) {
1429 int r;
1430
1431 assert(s);
1432
1433 s->hostname_fd = open("/proc/sys/kernel/hostname", O_RDONLY|O_CLOEXEC|O_NDELAY|O_NOCTTY);
4a62c710
MS
1434 if (s->hostname_fd < 0)
1435 return log_error_errno(errno, "Failed to open /proc/sys/kernel/hostname: %m");
0c24bb23 1436
151b9b96 1437 r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s);
0c24bb23 1438 if (r < 0) {
28def94c
DR
1439 /* kernels prior to 3.2 don't support polling this file. Ignore
1440 * the failure. */
1441 if (r == -EPERM) {
1442 log_warning("Failed to register hostname fd in event loop: %s. Ignoring.",
1443 strerror(-r));
03e334a1 1444 s->hostname_fd = safe_close(s->hostname_fd);
28def94c
DR
1445 return 0;
1446 }
1447
23bbb0de 1448 return log_error_errno(r, "Failed to register hostname fd in event loop: %m");
0c24bb23
LP
1449 }
1450
1451 r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
23bbb0de
MS
1452 if (r < 0)
1453 return log_error_errno(r, "Failed to adjust priority of host name event source: %m");
0c24bb23
LP
1454
1455 return 0;
1456}
1457
d025f1e4 1458int server_init(Server *s) {
13790add 1459 _cleanup_fdset_free_ FDSet *fds = NULL;
d025f1e4
ZJS
1460 int n, r, fd;
1461
1462 assert(s);
1463
1464 zero(*s);
875c2e22 1465 s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->audit_fd = s->hostname_fd = -1;
d025f1e4
ZJS
1466 s->compress = true;
1467 s->seal = true;
1468
26687bf8
OS
1469 s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1470 s->sync_scheduled = false;
1471
d025f1e4
ZJS
1472 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1473 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1474
40b71e89 1475 s->forward_to_wall = true;
d025f1e4 1476
e150e820
MB
1477 s->max_file_usec = DEFAULT_MAX_FILE_USEC;
1478
d025f1e4
ZJS
1479 s->max_level_store = LOG_DEBUG;
1480 s->max_level_syslog = LOG_DEBUG;
1481 s->max_level_kmsg = LOG_NOTICE;
1482 s->max_level_console = LOG_INFO;
40b71e89 1483 s->max_level_wall = LOG_EMERG;
d025f1e4
ZJS
1484
1485 memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1486 memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1487
1488 server_parse_config_file(s);
1489 server_parse_proc_cmdline(s);
d288f79f 1490 if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
b1389b0d
ZJS
1491 log_debug("Setting both rate limit interval and burst from "USEC_FMT",%u to 0,0",
1492 s->rate_limit_interval, s->rate_limit_burst);
d288f79f
ZJS
1493 s->rate_limit_interval = s->rate_limit_burst = 0;
1494 }
d025f1e4
ZJS
1495
1496 mkdir_p("/run/systemd/journal", 0755);
1497
43cf8388 1498 s->user_journals = ordered_hashmap_new(NULL);
d025f1e4
ZJS
1499 if (!s->user_journals)
1500 return log_oom();
1501
1502 s->mmap = mmap_cache_new();
1503 if (!s->mmap)
1504 return log_oom();
1505
f9a810be 1506 r = sd_event_default(&s->event);
23bbb0de
MS
1507 if (r < 0)
1508 return log_error_errno(r, "Failed to create event loop: %m");
d025f1e4 1509
f9a810be
LP
1510 sd_event_set_watchdog(s->event, true);
1511
d025f1e4 1512 n = sd_listen_fds(true);
23bbb0de
MS
1513 if (n < 0)
1514 return log_error_errno(n, "Failed to read listening file descriptors from environment: %m");
d025f1e4
ZJS
1515
1516 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1517
1518 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1519
1520 if (s->native_fd >= 0) {
1521 log_error("Too many native sockets passed.");
1522 return -EINVAL;
1523 }
1524
1525 s->native_fd = fd;
1526
1527 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1528
1529 if (s->stdout_fd >= 0) {
1530 log_error("Too many stdout sockets passed.");
1531 return -EINVAL;
1532 }
1533
1534 s->stdout_fd = fd;
1535
03ee5c38
LP
1536 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0 ||
1537 sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/dev-log", 0) > 0) {
d025f1e4
ZJS
1538
1539 if (s->syslog_fd >= 0) {
1540 log_error("Too many /dev/log sockets passed.");
1541 return -EINVAL;
1542 }
1543
1544 s->syslog_fd = fd;
1545
875c2e22
LP
1546 } else if (sd_is_socket(fd, AF_NETLINK, SOCK_RAW, -1) > 0) {
1547
1548 if (s->audit_fd >= 0) {
1549 log_error("Too many audit sockets passed.");
1550 return -EINVAL;
1551 }
1552
1553 s->audit_fd = fd;
1554
4ec3cd73 1555 } else {
4ec3cd73 1556
13790add
LP
1557 if (!fds) {
1558 fds = fdset_new();
1559 if (!fds)
1560 return log_oom();
1561 }
4ec3cd73 1562
13790add
LP
1563 r = fdset_put(fds, fd);
1564 if (r < 0)
1565 return log_oom();
4ec3cd73 1566 }
d025f1e4
ZJS
1567 }
1568
13790add 1569 r = server_open_stdout_socket(s, fds);
d025f1e4
ZJS
1570 if (r < 0)
1571 return r;
1572
13790add
LP
1573 if (fdset_size(fds) > 0) {
1574 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds));
1575 fds = fdset_free(fds);
1576 }
1577
1578 r = server_open_syslog_socket(s);
d025f1e4
ZJS
1579 if (r < 0)
1580 return r;
1581
13790add 1582 r = server_open_native_socket(s);
d025f1e4
ZJS
1583 if (r < 0)
1584 return r;
1585
1586 r = server_open_dev_kmsg(s);
1587 if (r < 0)
1588 return r;
1589
875c2e22
LP
1590 r = server_open_audit(s);
1591 if (r < 0)
1592 return r;
1593
d025f1e4
ZJS
1594 r = server_open_kernel_seqnum(s);
1595 if (r < 0)
1596 return r;
1597
0c24bb23
LP
1598 r = server_open_hostname(s);
1599 if (r < 0)
1600 return r;
1601
f9a810be 1602 r = setup_signals(s);
d025f1e4
ZJS
1603 if (r < 0)
1604 return r;
1605
1606 s->udev = udev_new();
1607 if (!s->udev)
1608 return -ENOMEM;
1609
f9a810be 1610 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
d025f1e4
ZJS
1611 if (!s->rate_limit)
1612 return -ENOMEM;
1613
e9174f29
LP
1614 r = cg_get_root_path(&s->cgroup_root);
1615 if (r < 0)
1616 return r;
1617
0c24bb23
LP
1618 server_cache_hostname(s);
1619 server_cache_boot_id(s);
1620 server_cache_machine_id(s);
1621
caa2f4c0 1622 r = system_journal_open(s, false);
d025f1e4
ZJS
1623 if (r < 0)
1624 return r;
1625
1626 return 0;
1627}
1628
1629void server_maybe_append_tags(Server *s) {
1630#ifdef HAVE_GCRYPT
1631 JournalFile *f;
1632 Iterator i;
1633 usec_t n;
1634
1635 n = now(CLOCK_REALTIME);
1636
1637 if (s->system_journal)
1638 journal_file_maybe_append_tag(s->system_journal, n);
1639
43cf8388 1640 ORDERED_HASHMAP_FOREACH(f, s->user_journals, i)
d025f1e4
ZJS
1641 journal_file_maybe_append_tag(f, n);
1642#endif
1643}
1644
1645void server_done(Server *s) {
1646 JournalFile *f;
1647 assert(s);
1648
1649 while (s->stdout_streams)
1650 stdout_stream_free(s->stdout_streams);
1651
1652 if (s->system_journal)
1653 journal_file_close(s->system_journal);
1654
1655 if (s->runtime_journal)
1656 journal_file_close(s->runtime_journal);
1657
43cf8388 1658 while ((f = ordered_hashmap_steal_first(s->user_journals)))
d025f1e4
ZJS
1659 journal_file_close(f);
1660
43cf8388 1661 ordered_hashmap_free(s->user_journals);
d025f1e4 1662
f9a810be
LP
1663 sd_event_source_unref(s->syslog_event_source);
1664 sd_event_source_unref(s->native_event_source);
1665 sd_event_source_unref(s->stdout_event_source);
1666 sd_event_source_unref(s->dev_kmsg_event_source);
875c2e22 1667 sd_event_source_unref(s->audit_event_source);
f9a810be
LP
1668 sd_event_source_unref(s->sync_event_source);
1669 sd_event_source_unref(s->sigusr1_event_source);
1670 sd_event_source_unref(s->sigusr2_event_source);
1671 sd_event_source_unref(s->sigterm_event_source);
1672 sd_event_source_unref(s->sigint_event_source);
0c24bb23 1673 sd_event_source_unref(s->hostname_event_source);
f9a810be 1674 sd_event_unref(s->event);
d025f1e4 1675
03e334a1
LP
1676 safe_close(s->syslog_fd);
1677 safe_close(s->native_fd);
1678 safe_close(s->stdout_fd);
1679 safe_close(s->dev_kmsg_fd);
875c2e22 1680 safe_close(s->audit_fd);
03e334a1 1681 safe_close(s->hostname_fd);
0c24bb23 1682
d025f1e4
ZJS
1683 if (s->rate_limit)
1684 journal_rate_limit_free(s->rate_limit);
1685
1686 if (s->kernel_seqnum)
1687 munmap(s->kernel_seqnum, sizeof(uint64_t));
1688
1689 free(s->buffer);
1690 free(s->tty_path);
e9174f29 1691 free(s->cgroup_root);
99d0966e 1692 free(s->hostname_field);
d025f1e4
ZJS
1693
1694 if (s->mmap)
1695 mmap_cache_unref(s->mmap);
1696
1697 if (s->udev)
1698 udev_unref(s->udev);
1699}