]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/journal/journald-server.c
util: rework rm_rf() logic
[thirdparty/systemd.git] / src / journal / journald-server.c
CommitLineData
d025f1e4
ZJS
1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3/***
4 This file is part of systemd.
5
6 Copyright 2011 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
22#include <sys/signalfd.h>
23#include <sys/ioctl.h>
24#include <linux/sockios.h>
25#include <sys/statvfs.h>
26#include <sys/mman.h>
27
28#include <libudev.h>
d025f1e4 29
74df0fca
LP
30#include "sd-journal.h"
31#include "sd-messages.h"
32#include "sd-daemon.h"
d025f1e4 33#include "mkdir.h"
c6878637 34#include "rm-rf.h"
d025f1e4
ZJS
35#include "hashmap.h"
36#include "journal-file.h"
37#include "socket-util.h"
38#include "cgroup-util.h"
d025f1e4
ZJS
39#include "missing.h"
40#include "conf-parser.h"
74df0fca 41#include "selinux-util.h"
d025f1e4
ZJS
42#include "journal-internal.h"
43#include "journal-vacuum.h"
44#include "journal-authenticate.h"
d025f1e4
ZJS
45#include "journald-rate-limit.h"
46#include "journald-kmsg.h"
47#include "journald-syslog.h"
48#include "journald-stream.h"
d025f1e4 49#include "journald-native.h"
875c2e22 50#include "journald-audit.h"
74df0fca 51#include "journald-server.h"
d025f1e4 52#include "acl-util.h"
d025f1e4
ZJS
53
54#ifdef HAVE_SELINUX
55#include <selinux/selinux.h>
56#endif
57
58#define USER_JOURNALS_MAX 1024
59
26687bf8 60#define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
7f1ad696
LP
61#define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
62#define DEFAULT_RATE_LIMIT_BURST 1000
e150e820 63#define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
d025f1e4
ZJS
64
65#define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
66
2c5859af 67static const char* const storage_table[_STORAGE_MAX] = {
d025f1e4
ZJS
68 [STORAGE_AUTO] = "auto",
69 [STORAGE_VOLATILE] = "volatile",
70 [STORAGE_PERSISTENT] = "persistent",
71 [STORAGE_NONE] = "none"
72};
73
74DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
75DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
76
2c5859af
DM
77static const char* const split_mode_table[_SPLIT_MAX] = {
78 [SPLIT_LOGIN] = "login",
d025f1e4 79 [SPLIT_UID] = "uid",
2c5859af 80 [SPLIT_NONE] = "none",
d025f1e4
ZJS
81};
82
83DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
84DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
85
670b110c 86static uint64_t available_space(Server *s, bool verbose) {
db91ea32 87 char ids[33];
7fd1b19b 88 _cleanup_free_ char *p = NULL;
d025f1e4
ZJS
89 sd_id128_t machine;
90 struct statvfs ss;
670b110c 91 uint64_t sum = 0, ss_avail = 0, avail = 0;
d025f1e4 92 int r;
7fd1b19b 93 _cleanup_closedir_ DIR *d = NULL;
d025f1e4 94 usec_t ts;
670b110c 95 const char *f;
d025f1e4
ZJS
96 JournalMetrics *m;
97
98 ts = now(CLOCK_MONOTONIC);
99
670b110c
ZJS
100 if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts
101 && !verbose)
d025f1e4
ZJS
102 return s->cached_available_space;
103
104 r = sd_id128_get_machine(&machine);
105 if (r < 0)
106 return 0;
107
108 if (s->system_journal) {
109 f = "/var/log/journal/";
110 m = &s->system_metrics;
111 } else {
112 f = "/run/log/journal/";
113 m = &s->runtime_metrics;
114 }
115
116 assert(m);
117
118 p = strappend(f, sd_id128_to_string(machine, ids));
119 if (!p)
120 return 0;
121
122 d = opendir(p);
d025f1e4
ZJS
123 if (!d)
124 return 0;
125
126 if (fstatvfs(dirfd(d), &ss) < 0)
db91ea32 127 return 0;
d025f1e4
ZJS
128
129 for (;;) {
130 struct stat st;
131 struct dirent *de;
d025f1e4 132
0371ca0d
FW
133 errno = 0;
134 de = readdir(d);
135 if (!de && errno != 0)
136 return 0;
d025f1e4
ZJS
137
138 if (!de)
139 break;
140
141 if (!endswith(de->d_name, ".journal") &&
142 !endswith(de->d_name, ".journal~"))
143 continue;
144
145 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
146 continue;
147
148 if (!S_ISREG(st.st_mode))
149 continue;
150
151 sum += (uint64_t) st.st_blocks * 512UL;
152 }
153
d025f1e4
ZJS
154 ss_avail = ss.f_bsize * ss.f_bavail;
155
348ced90
ZJS
156 /* If we reached a high mark, we will always allow this much
157 * again, unless usage goes above max_use. This watermark
158 * value is cached so that we don't give up space on pressure,
159 * but hover below the maximum usage. */
160
161 if (m->use < sum)
162 m->use = sum;
163
164 avail = LESS_BY(ss_avail, m->keep_free);
165
166 s->cached_available_space = LESS_BY(MIN(m->max_use, avail), sum);
d025f1e4
ZJS
167 s->cached_available_space_timestamp = ts;
168
670b110c
ZJS
169 if (verbose) {
170 char fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
171 fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX];
172
173 server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
348ced90
ZJS
174 "%s journal is using %s (max allowed %s, "
175 "trying to leave %s free of %s available → current limit %s).",
670b110c
ZJS
176 s->system_journal ? "Permanent" : "Runtime",
177 format_bytes(fb1, sizeof(fb1), sum),
178 format_bytes(fb2, sizeof(fb2), m->max_use),
179 format_bytes(fb3, sizeof(fb3), m->keep_free),
180 format_bytes(fb4, sizeof(fb4), ss_avail),
348ced90 181 format_bytes(fb5, sizeof(fb5), s->cached_available_space + sum));
670b110c
ZJS
182 }
183
184 return s->cached_available_space;
d025f1e4
ZJS
185}
186
d025f1e4
ZJS
187void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
188 int r;
189#ifdef HAVE_ACL
190 acl_t acl;
191 acl_entry_t entry;
192 acl_permset_t permset;
193#endif
194
195 assert(f);
196
4608af43 197 r = fchmod(f->fd, 0640);
d025f1e4 198 if (r < 0)
da927ba9 199 log_warning_errno(r, "Failed to fix access mode on %s, ignoring: %m", f->path);
d025f1e4
ZJS
200
201#ifdef HAVE_ACL
34c10968 202 if (uid <= SYSTEM_UID_MAX)
d025f1e4
ZJS
203 return;
204
205 acl = acl_get_fd(f->fd);
206 if (!acl) {
56f64d95 207 log_warning_errno(errno, "Failed to read ACL on %s, ignoring: %m", f->path);
d025f1e4
ZJS
208 return;
209 }
210
211 r = acl_find_uid(acl, uid, &entry);
212 if (r <= 0) {
213
214 if (acl_create_entry(&acl, &entry) < 0 ||
215 acl_set_tag_type(entry, ACL_USER) < 0 ||
216 acl_set_qualifier(entry, &uid) < 0) {
56f64d95 217 log_warning_errno(errno, "Failed to patch ACL on %s, ignoring: %m", f->path);
d025f1e4
ZJS
218 goto finish;
219 }
220 }
221
23ad4dd8
JAS
222 /* We do not recalculate the mask unconditionally here,
223 * so that the fchmod() mask above stays intact. */
d025f1e4 224 if (acl_get_permset(entry, &permset) < 0 ||
23ad4dd8
JAS
225 acl_add_perm(permset, ACL_READ) < 0 ||
226 calc_acl_mask_if_needed(&acl) < 0) {
56f64d95 227 log_warning_errno(errno, "Failed to patch ACL on %s, ignoring: %m", f->path);
d025f1e4
ZJS
228 goto finish;
229 }
230
231 if (acl_set_fd(f->fd, acl) < 0)
56f64d95 232 log_warning_errno(errno, "Failed to set ACL on %s, ignoring: %m", f->path);
d025f1e4
ZJS
233
234finish:
235 acl_free(acl);
236#endif
237}
238
239static JournalFile* find_journal(Server *s, uid_t uid) {
ed375beb 240 _cleanup_free_ char *p = NULL;
d025f1e4
ZJS
241 int r;
242 JournalFile *f;
243 sd_id128_t machine;
244
245 assert(s);
246
247 /* We split up user logs only on /var, not on /run. If the
248 * runtime file is open, we write to it exclusively, in order
249 * to guarantee proper order as soon as we flush /run to
250 * /var and close the runtime file. */
251
252 if (s->runtime_journal)
253 return s->runtime_journal;
254
f7dc3ab9 255 if (uid <= SYSTEM_UID_MAX)
d025f1e4
ZJS
256 return s->system_journal;
257
258 r = sd_id128_get_machine(&machine);
259 if (r < 0)
260 return s->system_journal;
261
43cf8388 262 f = ordered_hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
d025f1e4
ZJS
263 if (f)
264 return f;
265
de0671ee
ZJS
266 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-"UID_FMT".journal",
267 SD_ID128_FORMAT_VAL(machine), uid) < 0)
d025f1e4
ZJS
268 return s->system_journal;
269
43cf8388 270 while (ordered_hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
d025f1e4 271 /* Too many open? Then let's close one */
43cf8388 272 f = ordered_hashmap_steal_first(s->user_journals);
d025f1e4
ZJS
273 assert(f);
274 journal_file_close(f);
275 }
276
cbd67177 277 r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &f);
d025f1e4
ZJS
278 if (r < 0)
279 return s->system_journal;
280
281 server_fix_perms(s, f, uid);
282
43cf8388 283 r = ordered_hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
d025f1e4
ZJS
284 if (r < 0) {
285 journal_file_close(f);
286 return s->system_journal;
287 }
288
289 return f;
290}
291
ea69bd41
LP
292static int do_rotate(
293 Server *s,
294 JournalFile **f,
295 const char* name,
296 bool seal,
297 uint32_t uid) {
298
fc55baee
ZJS
299 int r;
300 assert(s);
301
302 if (!*f)
303 return -EINVAL;
304
305 r = journal_file_rotate(f, s->compress, seal);
306 if (r < 0)
307 if (*f)
ea69bd41 308 log_error_errno(r, "Failed to rotate %s: %m", (*f)->path);
fc55baee 309 else
ea69bd41 310 log_error_errno(r, "Failed to create new %s journal: %m", name);
fc55baee
ZJS
311 else
312 server_fix_perms(s, *f, uid);
2678031a 313
fc55baee
ZJS
314 return r;
315}
316
d025f1e4
ZJS
317void server_rotate(Server *s) {
318 JournalFile *f;
319 void *k;
320 Iterator i;
321 int r;
322
323 log_debug("Rotating...");
324
fc55baee
ZJS
325 do_rotate(s, &s->runtime_journal, "runtime", false, 0);
326 do_rotate(s, &s->system_journal, "system", s->seal, 0);
d025f1e4 327
43cf8388 328 ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
fc55baee
ZJS
329 r = do_rotate(s, &f, "user", s->seal, PTR_TO_UINT32(k));
330 if (r >= 0)
43cf8388 331 ordered_hashmap_replace(s->user_journals, k, f);
fc55baee
ZJS
332 else if (!f)
333 /* Old file has been closed and deallocated */
43cf8388 334 ordered_hashmap_remove(s->user_journals, k);
d025f1e4
ZJS
335 }
336}
337
26687bf8
OS
338void server_sync(Server *s) {
339 JournalFile *f;
340 void *k;
341 Iterator i;
342 int r;
343
26687bf8
OS
344 if (s->system_journal) {
345 r = journal_file_set_offline(s->system_journal);
346 if (r < 0)
da927ba9 347 log_error_errno(r, "Failed to sync system journal: %m");
26687bf8
OS
348 }
349
43cf8388 350 ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
26687bf8
OS
351 r = journal_file_set_offline(f);
352 if (r < 0)
da927ba9 353 log_error_errno(r, "Failed to sync user journal: %m");
26687bf8
OS
354 }
355
f9a810be
LP
356 if (s->sync_event_source) {
357 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
358 if (r < 0)
da927ba9 359 log_error_errno(r, "Failed to disable sync timer source: %m");
f9a810be 360 }
26687bf8
OS
361
362 s->sync_scheduled = false;
363}
364
ea69bd41
LP
365static void do_vacuum(
366 Server *s,
367 const char *id,
368 JournalFile *f,
369 const char* path,
370 JournalMetrics *metrics) {
371
372 const char *p;
63c8666b
ZJS
373 int r;
374
375 if (!f)
376 return;
377
63c372cb 378 p = strjoina(path, id);
dbd2a83f 379 r = journal_directory_vacuum(p, metrics->max_use, s->max_retention_usec, &s->oldest_file_usec, false);
63c8666b 380 if (r < 0 && r != -ENOENT)
da927ba9 381 log_error_errno(r, "Failed to vacuum %s: %m", p);
63c8666b
ZJS
382}
383
d025f1e4 384void server_vacuum(Server *s) {
d025f1e4
ZJS
385 char ids[33];
386 sd_id128_t machine;
387 int r;
388
389 log_debug("Vacuuming...");
390
391 s->oldest_file_usec = 0;
392
393 r = sd_id128_get_machine(&machine);
394 if (r < 0) {
da927ba9 395 log_error_errno(r, "Failed to get machine ID: %m");
d025f1e4
ZJS
396 return;
397 }
d025f1e4
ZJS
398 sd_id128_to_string(machine, ids);
399
63c8666b
ZJS
400 do_vacuum(s, ids, s->system_journal, "/var/log/journal/", &s->system_metrics);
401 do_vacuum(s, ids, s->runtime_journal, "/run/log/journal/", &s->runtime_metrics);
d025f1e4
ZJS
402
403 s->cached_available_space_timestamp = 0;
404}
405
0c24bb23
LP
406static void server_cache_machine_id(Server *s) {
407 sd_id128_t id;
408 int r;
409
410 assert(s);
411
412 r = sd_id128_get_machine(&id);
413 if (r < 0)
414 return;
415
416 sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
417}
418
419static void server_cache_boot_id(Server *s) {
420 sd_id128_t id;
421 int r;
422
423 assert(s);
424
425 r = sd_id128_get_boot(&id);
426 if (r < 0)
427 return;
428
429 sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
430}
431
432static void server_cache_hostname(Server *s) {
433 _cleanup_free_ char *t = NULL;
434 char *x;
435
436 assert(s);
437
438 t = gethostname_malloc();
439 if (!t)
440 return;
441
442 x = strappend("_HOSTNAME=", t);
443 if (!x)
444 return;
445
446 free(s->hostname_field);
447 s->hostname_field = x;
448}
449
8531ae70 450static bool shall_try_append_again(JournalFile *f, int r) {
d025f1e4
ZJS
451
452 /* -E2BIG Hit configured limit
453 -EFBIG Hit fs limit
454 -EDQUOT Quota limit hit
455 -ENOSPC Disk full
fa6ac760 456 -EIO I/O error of some kind (mmap)
d025f1e4
ZJS
457 -EHOSTDOWN Other machine
458 -EBUSY Unclean shutdown
459 -EPROTONOSUPPORT Unsupported feature
460 -EBADMSG Corrupted
461 -ENODATA Truncated
2678031a
LP
462 -ESHUTDOWN Already archived
463 -EIDRM Journal file has been deleted */
d025f1e4
ZJS
464
465 if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
466 log_debug("%s: Allocation limit reached, rotating.", f->path);
467 else if (r == -EHOSTDOWN)
468 log_info("%s: Journal file from other machine, rotating.", f->path);
469 else if (r == -EBUSY)
470 log_info("%s: Unclean shutdown, rotating.", f->path);
471 else if (r == -EPROTONOSUPPORT)
472 log_info("%s: Unsupported feature, rotating.", f->path);
473 else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
474 log_warning("%s: Journal file corrupted, rotating.", f->path);
fa6ac760
LP
475 else if (r == -EIO)
476 log_warning("%s: IO error, rotating.", f->path);
2678031a
LP
477 else if (r == -EIDRM)
478 log_warning("%s: Journal file has been deleted, rotating.", f->path);
d025f1e4
ZJS
479 else
480 return false;
481
482 return true;
483}
484
d07f7b9e 485static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
d025f1e4
ZJS
486 JournalFile *f;
487 bool vacuumed = false;
488 int r;
489
490 assert(s);
491 assert(iovec);
492 assert(n > 0);
493
494 f = find_journal(s, uid);
495 if (!f)
496 return;
497
498 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
499 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
500 server_rotate(s);
501 server_vacuum(s);
502 vacuumed = true;
503
504 f = find_journal(s, uid);
505 if (!f)
506 return;
507 }
508
509 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
26687bf8 510 if (r >= 0) {
d07f7b9e 511 server_schedule_sync(s, priority);
d025f1e4 512 return;
26687bf8 513 }
d025f1e4
ZJS
514
515 if (vacuumed || !shall_try_append_again(f, r)) {
8266e1c0 516 log_error_errno(r, "Failed to write entry (%d items, %zu bytes), ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
d025f1e4
ZJS
517 return;
518 }
519
520 server_rotate(s);
521 server_vacuum(s);
522
523 f = find_journal(s, uid);
524 if (!f)
525 return;
526
527 log_debug("Retrying write.");
528 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
8266e1c0
LP
529 if (r < 0)
530 log_error_errno(r, "Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
531 else
d07f7b9e 532 server_schedule_sync(s, priority);
d025f1e4
ZJS
533}
534
535static void dispatch_message_real(
536 Server *s,
537 struct iovec *iovec, unsigned n, unsigned m,
3b3154df
LP
538 const struct ucred *ucred,
539 const struct timeval *tv,
d025f1e4 540 const char *label, size_t label_len,
968f3196 541 const char *unit_id,
d07f7b9e 542 int priority,
968f3196 543 pid_t object_pid) {
d025f1e4 544
968f3196 545 char pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
ae018d9b
LP
546 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
547 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
548 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
d3789917 549 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
968f3196
ZJS
550 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
551 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
552 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
553 uid_t object_uid;
554 gid_t object_gid;
968f3196 555 char *x;
d025f1e4 556 int r;
ae018d9b 557 char *t, *c;
82499507
LP
558 uid_t realuid = 0, owner = 0, journal_uid;
559 bool owner_valid = false;
ae018d9b 560#ifdef HAVE_AUDIT
968f3196
ZJS
561 char audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
562 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
563 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
564 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
ae018d9b
LP
565
566 uint32_t audit;
567 uid_t loginuid;
568#endif
d025f1e4
ZJS
569
570 assert(s);
571 assert(iovec);
572 assert(n > 0);
968f3196 573 assert(n + N_IOVEC_META_FIELDS + (object_pid ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
d025f1e4
ZJS
574
575 if (ucred) {
d025f1e4
ZJS
576 realuid = ucred->uid;
577
de0671ee 578 sprintf(pid, "_PID="PID_FMT, ucred->pid);
c2457105 579 IOVEC_SET_STRING(iovec[n++], pid);
d025f1e4 580
de0671ee 581 sprintf(uid, "_UID="UID_FMT, ucred->uid);
c2457105 582 IOVEC_SET_STRING(iovec[n++], uid);
d025f1e4 583
de0671ee 584 sprintf(gid, "_GID="GID_FMT, ucred->gid);
c2457105 585 IOVEC_SET_STRING(iovec[n++], gid);
d025f1e4
ZJS
586
587 r = get_process_comm(ucred->pid, &t);
588 if (r >= 0) {
63c372cb 589 x = strjoina("_COMM=", t);
d025f1e4 590 free(t);
968f3196 591 IOVEC_SET_STRING(iovec[n++], x);
d025f1e4
ZJS
592 }
593
594 r = get_process_exe(ucred->pid, &t);
595 if (r >= 0) {
63c372cb 596 x = strjoina("_EXE=", t);
d025f1e4 597 free(t);
968f3196 598 IOVEC_SET_STRING(iovec[n++], x);
d025f1e4
ZJS
599 }
600
9bdbc2e2 601 r = get_process_cmdline(ucred->pid, 0, false, &t);
d025f1e4 602 if (r >= 0) {
63c372cb 603 x = strjoina("_CMDLINE=", t);
d025f1e4 604 free(t);
3a832116
SL
605 IOVEC_SET_STRING(iovec[n++], x);
606 }
607
608 r = get_process_capeff(ucred->pid, &t);
609 if (r >= 0) {
63c372cb 610 x = strjoina("_CAP_EFFECTIVE=", t);
3a832116 611 free(t);
968f3196 612 IOVEC_SET_STRING(iovec[n++], x);
d025f1e4
ZJS
613 }
614
0a20e3c1 615#ifdef HAVE_AUDIT
d025f1e4 616 r = audit_session_from_pid(ucred->pid, &audit);
ae018d9b 617 if (r >= 0) {
de0671ee 618 sprintf(audit_session, "_AUDIT_SESSION=%"PRIu32, audit);
ae018d9b
LP
619 IOVEC_SET_STRING(iovec[n++], audit_session);
620 }
d025f1e4
ZJS
621
622 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
7027ff61 623 if (r >= 0) {
de0671ee 624 sprintf(audit_loginuid, "_AUDIT_LOGINUID="UID_FMT, loginuid);
ae018d9b 625 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
d025f1e4 626 }
ae018d9b 627#endif
d025f1e4 628
e9174f29 629 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &c);
7027ff61 630 if (r >= 0) {
968f3196
ZJS
631 char *session = NULL;
632
63c372cb 633 x = strjoina("_SYSTEMD_CGROUP=", c);
968f3196 634 IOVEC_SET_STRING(iovec[n++], x);
d025f1e4 635
ae018d9b
LP
636 r = cg_path_get_session(c, &t);
637 if (r >= 0) {
63c372cb 638 session = strjoina("_SYSTEMD_SESSION=", t);
ae018d9b 639 free(t);
d025f1e4 640 IOVEC_SET_STRING(iovec[n++], session);
ae018d9b
LP
641 }
642
643 if (cg_path_get_owner_uid(c, &owner) >= 0) {
644 owner_valid = true;
d025f1e4 645
de0671ee 646 sprintf(owner_uid, "_SYSTEMD_OWNER_UID="UID_FMT, owner);
d025f1e4 647 IOVEC_SET_STRING(iovec[n++], owner_uid);
ae018d9b 648 }
d025f1e4 649
ae018d9b 650 if (cg_path_get_unit(c, &t) >= 0) {
63c372cb 651 x = strjoina("_SYSTEMD_UNIT=", t);
ae018d9b 652 free(t);
19cace37
LP
653 IOVEC_SET_STRING(iovec[n++], x);
654 } else if (unit_id && !session) {
63c372cb 655 x = strjoina("_SYSTEMD_UNIT=", unit_id);
19cace37
LP
656 IOVEC_SET_STRING(iovec[n++], x);
657 }
658
659 if (cg_path_get_user_unit(c, &t) >= 0) {
63c372cb 660 x = strjoina("_SYSTEMD_USER_UNIT=", t);
ae018d9b 661 free(t);
968f3196 662 IOVEC_SET_STRING(iovec[n++], x);
19cace37 663 } else if (unit_id && session) {
63c372cb 664 x = strjoina("_SYSTEMD_USER_UNIT=", unit_id);
19cace37
LP
665 IOVEC_SET_STRING(iovec[n++], x);
666 }
ae018d9b 667
0a244b8e 668 if (cg_path_get_slice(c, &t) >= 0) {
63c372cb 669 x = strjoina("_SYSTEMD_SLICE=", t);
0a244b8e
LP
670 free(t);
671 IOVEC_SET_STRING(iovec[n++], x);
672 }
673
ae018d9b 674 free(c);
2d43b190 675 } else if (unit_id) {
63c372cb 676 x = strjoina("_SYSTEMD_UNIT=", unit_id);
2d43b190 677 IOVEC_SET_STRING(iovec[n++], x);
ef1673d1 678 }
d025f1e4 679
d025f1e4 680#ifdef HAVE_SELINUX
6baa7db0 681 if (mac_selinux_use()) {
d682b3a7 682 if (label) {
f8294e41 683 x = alloca(strlen("_SELINUX_CONTEXT=") + label_len + 1);
ae018d9b 684
d682b3a7
LP
685 *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
686 IOVEC_SET_STRING(iovec[n++], x);
687 } else {
688 security_context_t con;
d025f1e4 689
d682b3a7 690 if (getpidcon(ucred->pid, &con) >= 0) {
63c372cb 691 x = strjoina("_SELINUX_CONTEXT=", con);
e7ff4e7f 692
d682b3a7
LP
693 freecon(con);
694 IOVEC_SET_STRING(iovec[n++], x);
695 }
d025f1e4
ZJS
696 }
697 }
698#endif
699 }
968f3196
ZJS
700 assert(n <= m);
701
702 if (object_pid) {
703 r = get_process_uid(object_pid, &object_uid);
704 if (r >= 0) {
de0671ee 705 sprintf(o_uid, "OBJECT_UID="UID_FMT, object_uid);
968f3196
ZJS
706 IOVEC_SET_STRING(iovec[n++], o_uid);
707 }
708
709 r = get_process_gid(object_pid, &object_gid);
710 if (r >= 0) {
de0671ee 711 sprintf(o_gid, "OBJECT_GID="GID_FMT, object_gid);
968f3196
ZJS
712 IOVEC_SET_STRING(iovec[n++], o_gid);
713 }
714
715 r = get_process_comm(object_pid, &t);
716 if (r >= 0) {
63c372cb 717 x = strjoina("OBJECT_COMM=", t);
968f3196
ZJS
718 free(t);
719 IOVEC_SET_STRING(iovec[n++], x);
720 }
721
722 r = get_process_exe(object_pid, &t);
723 if (r >= 0) {
63c372cb 724 x = strjoina("OBJECT_EXE=", t);
968f3196
ZJS
725 free(t);
726 IOVEC_SET_STRING(iovec[n++], x);
727 }
728
729 r = get_process_cmdline(object_pid, 0, false, &t);
730 if (r >= 0) {
63c372cb 731 x = strjoina("OBJECT_CMDLINE=", t);
968f3196
ZJS
732 free(t);
733 IOVEC_SET_STRING(iovec[n++], x);
734 }
735
736#ifdef HAVE_AUDIT
737 r = audit_session_from_pid(object_pid, &audit);
738 if (r >= 0) {
de0671ee 739 sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%"PRIu32, audit);
968f3196
ZJS
740 IOVEC_SET_STRING(iovec[n++], o_audit_session);
741 }
742
743 r = audit_loginuid_from_pid(object_pid, &loginuid);
744 if (r >= 0) {
de0671ee 745 sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID="UID_FMT, loginuid);
968f3196
ZJS
746 IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
747 }
748#endif
749
e9174f29 750 r = cg_pid_get_path_shifted(object_pid, s->cgroup_root, &c);
968f3196 751 if (r >= 0) {
63c372cb 752 x = strjoina("OBJECT_SYSTEMD_CGROUP=", c);
968f3196
ZJS
753 IOVEC_SET_STRING(iovec[n++], x);
754
755 r = cg_path_get_session(c, &t);
756 if (r >= 0) {
63c372cb 757 x = strjoina("OBJECT_SYSTEMD_SESSION=", t);
968f3196
ZJS
758 free(t);
759 IOVEC_SET_STRING(iovec[n++], x);
760 }
761
762 if (cg_path_get_owner_uid(c, &owner) >= 0) {
de0671ee 763 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID="UID_FMT, owner);
968f3196
ZJS
764 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
765 }
766
767 if (cg_path_get_unit(c, &t) >= 0) {
63c372cb 768 x = strjoina("OBJECT_SYSTEMD_UNIT=", t);
968f3196 769 free(t);
19cace37
LP
770 IOVEC_SET_STRING(iovec[n++], x);
771 }
772
773 if (cg_path_get_user_unit(c, &t) >= 0) {
63c372cb 774 x = strjoina("OBJECT_SYSTEMD_USER_UNIT=", t);
968f3196 775 free(t);
968f3196 776 IOVEC_SET_STRING(iovec[n++], x);
19cace37 777 }
968f3196
ZJS
778
779 free(c);
780 }
781 }
782 assert(n <= m);
d025f1e4
ZJS
783
784 if (tv) {
ae018d9b 785 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv));
a5693989 786 IOVEC_SET_STRING(iovec[n++], source_time);
d025f1e4
ZJS
787 }
788
789 /* Note that strictly speaking storing the boot id here is
790 * redundant since the entry includes this in-line
791 * anyway. However, we need this indexed, too. */
0c24bb23
LP
792 if (!isempty(s->boot_id_field))
793 IOVEC_SET_STRING(iovec[n++], s->boot_id_field);
d025f1e4 794
0c24bb23
LP
795 if (!isempty(s->machine_id_field))
796 IOVEC_SET_STRING(iovec[n++], s->machine_id_field);
d025f1e4 797
0c24bb23
LP
798 if (!isempty(s->hostname_field))
799 IOVEC_SET_STRING(iovec[n++], s->hostname_field);
d025f1e4
ZJS
800
801 assert(n <= m);
802
da499392 803 if (s->split_mode == SPLIT_UID && realuid > 0)
40adcda8 804 /* Split up strictly by any UID */
759c945a 805 journal_uid = realuid;
82499507 806 else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
edc3797f
LP
807 /* Split up by login UIDs. We do this only if the
808 * realuid is not root, in order not to accidentally
809 * leak privileged information to the user that is
810 * logged by a privileged process that is part of an
7517e174 811 * unprivileged session. */
8a0889df 812 journal_uid = owner;
da499392
KS
813 else
814 journal_uid = 0;
759c945a 815
d07f7b9e 816 write_to_journal(s, journal_uid, iovec, n, priority);
d025f1e4
ZJS
817}
818
819void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
820 char mid[11 + 32 + 1];
821 char buffer[16 + LINE_MAX + 1];
822 struct iovec iovec[N_IOVEC_META_FIELDS + 4];
823 int n = 0;
824 va_list ap;
b92bea5d 825 struct ucred ucred = {};
d025f1e4
ZJS
826
827 assert(s);
828 assert(format);
829
830 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
831 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
832
833 memcpy(buffer, "MESSAGE=", 8);
834 va_start(ap, format);
835 vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
836 va_end(ap);
d025f1e4
ZJS
837 IOVEC_SET_STRING(iovec[n++], buffer);
838
839 if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
e2cc6eca 840 snprintf(mid, sizeof(mid), LOG_MESSAGE_ID(message_id));
d025f1e4
ZJS
841 IOVEC_SET_STRING(iovec[n++], mid);
842 }
843
d025f1e4
ZJS
844 ucred.pid = getpid();
845 ucred.uid = getuid();
846 ucred.gid = getgid();
847
d07f7b9e 848 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
d025f1e4
ZJS
849}
850
851void server_dispatch_message(
852 Server *s,
853 struct iovec *iovec, unsigned n, unsigned m,
3b3154df
LP
854 const struct ucred *ucred,
855 const struct timeval *tv,
d025f1e4
ZJS
856 const char *label, size_t label_len,
857 const char *unit_id,
968f3196
ZJS
858 int priority,
859 pid_t object_pid) {
d025f1e4 860
7027ff61 861 int rl, r;
7fd1b19b 862 _cleanup_free_ char *path = NULL;
db91ea32 863 char *c;
d025f1e4
ZJS
864
865 assert(s);
866 assert(iovec || n == 0);
867
868 if (n == 0)
869 return;
870
871 if (LOG_PRI(priority) > s->max_level_store)
872 return;
873
2f5df74a
HHPF
874 /* Stop early in case the information will not be stored
875 * in a journal. */
876 if (s->storage == STORAGE_NONE)
877 return;
878
d025f1e4
ZJS
879 if (!ucred)
880 goto finish;
881
e9174f29 882 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &path);
7027ff61 883 if (r < 0)
d025f1e4
ZJS
884 goto finish;
885
886 /* example: /user/lennart/3/foobar
887 * /system/dbus.service/foobar
888 *
889 * So let's cut of everything past the third /, since that is
890 * where user directories start */
891
892 c = strchr(path, '/');
893 if (c) {
894 c = strchr(c+1, '/');
895 if (c) {
896 c = strchr(c+1, '/');
897 if (c)
898 *c = 0;
899 }
900 }
901
db91ea32 902 rl = journal_rate_limit_test(s->rate_limit, path,
670b110c 903 priority & LOG_PRIMASK, available_space(s, false));
d025f1e4 904
db91ea32 905 if (rl == 0)
d025f1e4 906 return;
d025f1e4
ZJS
907
908 /* Write a suppression message if we suppressed something */
909 if (rl > 1)
db91ea32
ZJS
910 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
911 "Suppressed %u messages from %s", rl - 1, path);
d025f1e4
ZJS
912
913finish:
d07f7b9e 914 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, priority, object_pid);
d025f1e4
ZJS
915}
916
917
caa2f4c0 918static int system_journal_open(Server *s, bool flush_requested) {
d025f1e4
ZJS
919 int r;
920 char *fn;
921 sd_id128_t machine;
922 char ids[33];
923
924 r = sd_id128_get_machine(&machine);
23bbb0de
MS
925 if (r < 0)
926 return log_error_errno(r, "Failed to get machine id: %m");
d025f1e4
ZJS
927
928 sd_id128_to_string(machine, ids);
929
930 if (!s->system_journal &&
931 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
caa2f4c0
ZJS
932 (flush_requested
933 || access("/run/systemd/journal/flushed", F_OK) >= 0)) {
d025f1e4
ZJS
934
935 /* If in auto mode: first try to create the machine
936 * path, but not the prefix.
937 *
938 * If in persistent mode: create /var/log/journal and
939 * the machine path */
940
941 if (s->storage == STORAGE_PERSISTENT)
942 (void) mkdir("/var/log/journal/", 0755);
943
63c372cb 944 fn = strjoina("/var/log/journal/", ids);
d025f1e4 945 (void) mkdir(fn, 0755);
d025f1e4 946
63c372cb 947 fn = strjoina(fn, "/system.journal");
d025f1e4 948 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
d025f1e4 949
670b110c 950 if (r >= 0)
d025f1e4 951 server_fix_perms(s, s->system_journal, 0);
433dd100
LN
952 else if (r < 0) {
953 if (r != -ENOENT && r != -EROFS)
da927ba9 954 log_warning_errno(r, "Failed to open system journal: %m");
e40ec7ae 955
433dd100
LN
956 r = 0;
957 }
d025f1e4
ZJS
958 }
959
960 if (!s->runtime_journal &&
961 (s->storage != STORAGE_NONE)) {
962
963 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
964 if (!fn)
965 return -ENOMEM;
966
967 if (s->system_journal) {
968
969 /* Try to open the runtime journal, but only
970 * if it already exists, so that we can flush
971 * it into the system journal */
972
973 r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
974 free(fn);
975
976 if (r < 0) {
977 if (r != -ENOENT)
da927ba9 978 log_warning_errno(r, "Failed to open runtime journal: %m");
d025f1e4
ZJS
979
980 r = 0;
981 }
982
983 } else {
984
985 /* OK, we really need the runtime journal, so create
986 * it if necessary. */
987
fc1d70af
LP
988 (void) mkdir("/run/log", 0755);
989 (void) mkdir("/run/log/journal", 0755);
990 (void) mkdir_parents(fn, 0750);
991
d025f1e4
ZJS
992 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
993 free(fn);
994
23bbb0de
MS
995 if (r < 0)
996 return log_error_errno(r, "Failed to open runtime journal: %m");
d025f1e4
ZJS
997 }
998
670b110c 999 if (s->runtime_journal)
d025f1e4 1000 server_fix_perms(s, s->runtime_journal, 0);
d025f1e4
ZJS
1001 }
1002
670b110c
ZJS
1003 available_space(s, true);
1004
d025f1e4
ZJS
1005 return r;
1006}
1007
1008int server_flush_to_var(Server *s) {
d025f1e4
ZJS
1009 sd_id128_t machine;
1010 sd_journal *j = NULL;
fbb63411
LP
1011 char ts[FORMAT_TIMESPAN_MAX];
1012 usec_t start;
1013 unsigned n = 0;
1014 int r;
d025f1e4
ZJS
1015
1016 assert(s);
1017
1018 if (s->storage != STORAGE_AUTO &&
1019 s->storage != STORAGE_PERSISTENT)
1020 return 0;
1021
1022 if (!s->runtime_journal)
1023 return 0;
1024
caa2f4c0 1025 system_journal_open(s, true);
d025f1e4
ZJS
1026
1027 if (!s->system_journal)
1028 return 0;
1029
1030 log_debug("Flushing to /var...");
1031
fbb63411
LP
1032 start = now(CLOCK_MONOTONIC);
1033
d025f1e4 1034 r = sd_id128_get_machine(&machine);
00a16861 1035 if (r < 0)
d025f1e4 1036 return r;
d025f1e4
ZJS
1037
1038 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
23bbb0de
MS
1039 if (r < 0)
1040 return log_error_errno(r, "Failed to read runtime journal: %m");
d025f1e4 1041
93b73b06
LP
1042 sd_journal_set_data_threshold(j, 0);
1043
d025f1e4
ZJS
1044 SD_JOURNAL_FOREACH(j) {
1045 Object *o = NULL;
1046 JournalFile *f;
1047
1048 f = j->current_file;
1049 assert(f && f->current_offset > 0);
1050
fbb63411
LP
1051 n++;
1052
d025f1e4
ZJS
1053 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1054 if (r < 0) {
da927ba9 1055 log_error_errno(r, "Can't read entry: %m");
d025f1e4
ZJS
1056 goto finish;
1057 }
1058
1059 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1060 if (r >= 0)
1061 continue;
1062
1063 if (!shall_try_append_again(s->system_journal, r)) {
da927ba9 1064 log_error_errno(r, "Can't write entry: %m");
d025f1e4
ZJS
1065 goto finish;
1066 }
1067
1068 server_rotate(s);
1069 server_vacuum(s);
1070
253f59df
LP
1071 if (!s->system_journal) {
1072 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1073 r = -EIO;
1074 goto finish;
1075 }
1076
d025f1e4
ZJS
1077 log_debug("Retrying write.");
1078 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1079 if (r < 0) {
da927ba9 1080 log_error_errno(r, "Can't write entry: %m");
d025f1e4
ZJS
1081 goto finish;
1082 }
1083 }
1084
1085finish:
1086 journal_file_post_change(s->system_journal);
1087
1088 journal_file_close(s->runtime_journal);
1089 s->runtime_journal = NULL;
1090
1091 if (r >= 0)
c6878637 1092 (void) rm_rf("/run/log/journal", REMOVE_ROOT);
d025f1e4 1093
763c7aa2 1094 sd_journal_close(j);
d025f1e4 1095
fbb63411
LP
1096 server_driver_message(s, SD_ID128_NULL, "Time spent on flushing to /var is %s for %u entries.", format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0), n);
1097
d025f1e4
ZJS
1098 return r;
1099}
1100
8531ae70 1101int server_process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
f9a810be
LP
1102 Server *s = userdata;
1103
d025f1e4 1104 assert(s);
875c2e22 1105 assert(fd == s->native_fd || fd == s->syslog_fd || fd == s->audit_fd);
f9a810be
LP
1106
1107 if (revents != EPOLLIN) {
1108 log_error("Got invalid event from epoll for datagram fd: %"PRIx32, revents);
1109 return -EIO;
1110 }
1111
1112 for (;;) {
1113 struct ucred *ucred = NULL;
1114 struct timeval *tv = NULL;
1115 struct cmsghdr *cmsg;
1116 char *label = NULL;
1117 size_t label_len = 0;
1118 struct iovec iovec;
1119
1120 union {
1121 struct cmsghdr cmsghdr;
1122
bdd13f6b
ZJS
1123 /* We use NAME_MAX space for the SELinux label
1124 * here. The kernel currently enforces no
1125 * limit, but according to suggestions from
1126 * the SELinux people this will change and it
1127 * will probably be identical to NAME_MAX. For
1128 * now we use that, but this should be updated
7517e174 1129 * one day when the final limit is known. */
f9a810be
LP
1130 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1131 CMSG_SPACE(sizeof(struct timeval)) +
1132 CMSG_SPACE(sizeof(int)) + /* fd */
1133 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1134 } control = {};
875c2e22 1135 union sockaddr_union sa = {};
f9a810be
LP
1136 struct msghdr msghdr = {
1137 .msg_iov = &iovec,
1138 .msg_iovlen = 1,
1139 .msg_control = &control,
1140 .msg_controllen = sizeof(control),
875c2e22
LP
1141 .msg_name = &sa,
1142 .msg_namelen = sizeof(sa),
f9a810be 1143 };
d025f1e4 1144
d025f1e4 1145 ssize_t n;
f9a810be
LP
1146 int *fds = NULL;
1147 unsigned n_fds = 0;
875c2e22
LP
1148 int v = 0;
1149 size_t m;
d025f1e4 1150
875c2e22
LP
1151 /* Try to get the right size, if we can. (Not all
1152 * sockets support SIOCINQ, hence we just try, but
1153 * don't rely on it. */
1154 (void) ioctl(fd, SIOCINQ, &v);
1155
7517e174 1156 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
875c2e22
LP
1157 m = PAGE_ALIGN(MAX3((size_t) v + 1,
1158 (size_t) LINE_MAX,
1159 ALIGN(sizeof(struct nlmsghdr)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH)) + 1);
d025f1e4 1160
875c2e22 1161 if (!GREEDY_REALLOC(s->buffer, s->buffer_size, m))
f9a810be 1162 return log_oom();
d025f1e4 1163
f9a810be 1164 iovec.iov_base = s->buffer;
875c2e22 1165 iovec.iov_len = s->buffer_size - 1; /* Leave room for trailing NUL we add later */
d025f1e4 1166
f9a810be
LP
1167 n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1168 if (n < 0) {
d025f1e4 1169 if (errno == EINTR || errno == EAGAIN)
f9a810be 1170 return 0;
d025f1e4 1171
56f64d95 1172 log_error_errno(errno, "recvmsg() failed: %m");
d025f1e4
ZJS
1173 return -errno;
1174 }
1175
f9a810be
LP
1176 for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1177
1178 if (cmsg->cmsg_level == SOL_SOCKET &&
1179 cmsg->cmsg_type == SCM_CREDENTIALS &&
1180 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1181 ucred = (struct ucred*) CMSG_DATA(cmsg);
1182 else if (cmsg->cmsg_level == SOL_SOCKET &&
1183 cmsg->cmsg_type == SCM_SECURITY) {
1184 label = (char*) CMSG_DATA(cmsg);
1185 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1186 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1187 cmsg->cmsg_type == SO_TIMESTAMP &&
1188 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1189 tv = (struct timeval*) CMSG_DATA(cmsg);
1190 else if (cmsg->cmsg_level == SOL_SOCKET &&
1191 cmsg->cmsg_type == SCM_RIGHTS) {
1192 fds = (int*) CMSG_DATA(cmsg);
1193 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1194 }
d025f1e4
ZJS
1195 }
1196
875c2e22
LP
1197 /* And a trailing NUL, just in case */
1198 s->buffer[n] = 0;
1199
f9a810be 1200 if (fd == s->syslog_fd) {
875c2e22 1201 if (n > 0 && n_fds == 0)
f9a810be 1202 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
875c2e22 1203 else if (n_fds > 0)
f9a810be 1204 log_warning("Got file descriptors via syslog socket. Ignoring.");
d025f1e4 1205
875c2e22 1206 } else if (fd == s->native_fd) {
f9a810be
LP
1207 if (n > 0 && n_fds == 0)
1208 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1209 else if (n == 0 && n_fds == 1)
1210 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1211 else if (n_fds > 0)
1212 log_warning("Got too many file descriptors via native socket. Ignoring.");
875c2e22
LP
1213
1214 } else {
1215 assert(fd == s->audit_fd);
1216
1217 if (n > 0 && n_fds == 0)
0b97208d 1218 server_process_audit_message(s, s->buffer, n, ucred, &sa, msghdr.msg_namelen);
875c2e22
LP
1219 else if (n_fds > 0)
1220 log_warning("Got file descriptors via audit socket. Ignoring.");
d025f1e4
ZJS
1221 }
1222
f9a810be
LP
1223 close_many(fds, n_fds);
1224 }
f9a810be 1225}
d025f1e4 1226
f9a810be
LP
1227static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1228 Server *s = userdata;
d025f1e4 1229
f9a810be 1230 assert(s);
d025f1e4 1231
f9a810be 1232 log_info("Received request to flush runtime journal from PID %"PRIu32, si->ssi_pid);
d025f1e4 1233
f9a810be
LP
1234 server_flush_to_var(s);
1235 server_sync(s);
3bfd4e0c 1236 server_vacuum(s);
d025f1e4 1237
74055aa7
LP
1238 touch("/run/systemd/journal/flushed");
1239
f9a810be
LP
1240 return 0;
1241}
d025f1e4 1242
f9a810be
LP
1243static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1244 Server *s = userdata;
d025f1e4 1245
f9a810be 1246 assert(s);
d025f1e4 1247
f9a810be
LP
1248 log_info("Received request to rotate journal from PID %"PRIu32, si->ssi_pid);
1249 server_rotate(s);
1250 server_vacuum(s);
d025f1e4 1251
f9a810be
LP
1252 return 0;
1253}
d025f1e4 1254
f9a810be
LP
1255static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1256 Server *s = userdata;
d025f1e4 1257
f9a810be 1258 assert(s);
d025f1e4 1259
4daf54a8 1260 log_received_signal(LOG_INFO, si);
d025f1e4 1261
6203e07a 1262 sd_event_exit(s->event, 0);
d025f1e4
ZJS
1263 return 0;
1264}
1265
f9a810be 1266static int setup_signals(Server *s) {
d025f1e4 1267 sigset_t mask;
f9a810be 1268 int r;
d025f1e4
ZJS
1269
1270 assert(s);
1271
1272 assert_se(sigemptyset(&mask) == 0);
1273 sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1274 assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1275
151b9b96 1276 r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
f9a810be
LP
1277 if (r < 0)
1278 return r;
1279
151b9b96 1280 r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s);
f9a810be
LP
1281 if (r < 0)
1282 return r;
d025f1e4 1283
151b9b96 1284 r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s);
f9a810be
LP
1285 if (r < 0)
1286 return r;
d025f1e4 1287
151b9b96 1288 r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s);
f9a810be
LP
1289 if (r < 0)
1290 return r;
d025f1e4
ZJS
1291
1292 return 0;
1293}
1294
1295static int server_parse_proc_cmdline(Server *s) {
7fd1b19b 1296 _cleanup_free_ char *line = NULL;
a2a5291b 1297 const char *w, *state;
d025f1e4 1298 size_t l;
74df0fca 1299 int r;
d025f1e4 1300
74df0fca 1301 r = proc_cmdline(&line);
b5884878 1302 if (r < 0) {
da927ba9 1303 log_warning_errno(r, "Failed to read /proc/cmdline, ignoring: %m");
d025f1e4 1304 return 0;
b5884878 1305 }
d025f1e4
ZJS
1306
1307 FOREACH_WORD_QUOTED(w, l, line, state) {
7fd1b19b 1308 _cleanup_free_ char *word;
d025f1e4
ZJS
1309
1310 word = strndup(w, l);
db91ea32
ZJS
1311 if (!word)
1312 return -ENOMEM;
d025f1e4
ZJS
1313
1314 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1315 r = parse_boolean(word + 35);
1316 if (r < 0)
1317 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1318 else
1319 s->forward_to_syslog = r;
1320 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1321 r = parse_boolean(word + 33);
1322 if (r < 0)
1323 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1324 else
1325 s->forward_to_kmsg = r;
1326 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1327 r = parse_boolean(word + 36);
1328 if (r < 0)
1329 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1330 else
1331 s->forward_to_console = r;
40b71e89
ST
1332 } else if (startswith(word, "systemd.journald.forward_to_wall=")) {
1333 r = parse_boolean(word + 33);
1334 if (r < 0)
1335 log_warning("Failed to parse forward to wall switch %s. Ignoring.", word + 33);
1336 else
1337 s->forward_to_wall = r;
d025f1e4
ZJS
1338 } else if (startswith(word, "systemd.journald"))
1339 log_warning("Invalid systemd.journald parameter. Ignoring.");
d025f1e4 1340 }
b2fadec6 1341 /* do not warn about state here, since probably systemd already did */
d025f1e4 1342
db91ea32 1343 return 0;
d025f1e4
ZJS
1344}
1345
1346static int server_parse_config_file(Server *s) {
d025f1e4
ZJS
1347 assert(s);
1348
a9edaeff
JT
1349 return config_parse_many("/etc/systemd/journald.conf",
1350 CONF_DIRS_NULSTR("systemd/journald.conf"),
1351 "Journal\0",
1352 config_item_perf_lookup, journald_gperf_lookup,
1353 false, s);
d025f1e4
ZJS
1354}
1355
f9a810be
LP
1356static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
1357 Server *s = userdata;
26687bf8
OS
1358
1359 assert(s);
1360
f9a810be 1361 server_sync(s);
26687bf8
OS
1362 return 0;
1363}
1364
d07f7b9e 1365int server_schedule_sync(Server *s, int priority) {
26687bf8
OS
1366 int r;
1367
26687bf8
OS
1368 assert(s);
1369
d07f7b9e
LP
1370 if (priority <= LOG_CRIT) {
1371 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1372 server_sync(s);
1373 return 0;
1374 }
1375
26687bf8
OS
1376 if (s->sync_scheduled)
1377 return 0;
1378
f9a810be
LP
1379 if (s->sync_interval_usec > 0) {
1380 usec_t when;
ca267016 1381
6a0f1f6d 1382 r = sd_event_now(s->event, CLOCK_MONOTONIC, &when);
f9a810be
LP
1383 if (r < 0)
1384 return r;
26687bf8 1385
f9a810be
LP
1386 when += s->sync_interval_usec;
1387
1388 if (!s->sync_event_source) {
6a0f1f6d
LP
1389 r = sd_event_add_time(
1390 s->event,
1391 &s->sync_event_source,
1392 CLOCK_MONOTONIC,
1393 when, 0,
1394 server_dispatch_sync, s);
f9a810be
LP
1395 if (r < 0)
1396 return r;
1397
1398 r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
1399 } else {
1400 r = sd_event_source_set_time(s->sync_event_source, when);
1401 if (r < 0)
1402 return r;
1403
1404 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
1405 }
26687bf8 1406 if (r < 0)
f9a810be 1407 return r;
26687bf8 1408
f9a810be
LP
1409 s->sync_scheduled = true;
1410 }
26687bf8
OS
1411
1412 return 0;
1413}
1414
0c24bb23
LP
1415static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1416 Server *s = userdata;
1417
1418 assert(s);
1419
1420 server_cache_hostname(s);
1421 return 0;
1422}
1423
1424static int server_open_hostname(Server *s) {
1425 int r;
1426
1427 assert(s);
1428
1429 s->hostname_fd = open("/proc/sys/kernel/hostname", O_RDONLY|O_CLOEXEC|O_NDELAY|O_NOCTTY);
4a62c710
MS
1430 if (s->hostname_fd < 0)
1431 return log_error_errno(errno, "Failed to open /proc/sys/kernel/hostname: %m");
0c24bb23 1432
151b9b96 1433 r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s);
0c24bb23 1434 if (r < 0) {
28def94c
DR
1435 /* kernels prior to 3.2 don't support polling this file. Ignore
1436 * the failure. */
1437 if (r == -EPERM) {
1438 log_warning("Failed to register hostname fd in event loop: %s. Ignoring.",
1439 strerror(-r));
03e334a1 1440 s->hostname_fd = safe_close(s->hostname_fd);
28def94c
DR
1441 return 0;
1442 }
1443
23bbb0de 1444 return log_error_errno(r, "Failed to register hostname fd in event loop: %m");
0c24bb23
LP
1445 }
1446
1447 r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
23bbb0de
MS
1448 if (r < 0)
1449 return log_error_errno(r, "Failed to adjust priority of host name event source: %m");
0c24bb23
LP
1450
1451 return 0;
1452}
1453
d025f1e4 1454int server_init(Server *s) {
13790add 1455 _cleanup_fdset_free_ FDSet *fds = NULL;
d025f1e4
ZJS
1456 int n, r, fd;
1457
1458 assert(s);
1459
1460 zero(*s);
875c2e22 1461 s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->audit_fd = s->hostname_fd = -1;
d025f1e4
ZJS
1462 s->compress = true;
1463 s->seal = true;
1464
26687bf8
OS
1465 s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1466 s->sync_scheduled = false;
1467
d025f1e4
ZJS
1468 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1469 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1470
40b71e89 1471 s->forward_to_wall = true;
d025f1e4 1472
e150e820
MB
1473 s->max_file_usec = DEFAULT_MAX_FILE_USEC;
1474
d025f1e4
ZJS
1475 s->max_level_store = LOG_DEBUG;
1476 s->max_level_syslog = LOG_DEBUG;
1477 s->max_level_kmsg = LOG_NOTICE;
1478 s->max_level_console = LOG_INFO;
40b71e89 1479 s->max_level_wall = LOG_EMERG;
d025f1e4
ZJS
1480
1481 memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1482 memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1483
1484 server_parse_config_file(s);
1485 server_parse_proc_cmdline(s);
d288f79f 1486 if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
b1389b0d
ZJS
1487 log_debug("Setting both rate limit interval and burst from "USEC_FMT",%u to 0,0",
1488 s->rate_limit_interval, s->rate_limit_burst);
d288f79f
ZJS
1489 s->rate_limit_interval = s->rate_limit_burst = 0;
1490 }
d025f1e4
ZJS
1491
1492 mkdir_p("/run/systemd/journal", 0755);
1493
43cf8388 1494 s->user_journals = ordered_hashmap_new(NULL);
d025f1e4
ZJS
1495 if (!s->user_journals)
1496 return log_oom();
1497
1498 s->mmap = mmap_cache_new();
1499 if (!s->mmap)
1500 return log_oom();
1501
f9a810be 1502 r = sd_event_default(&s->event);
23bbb0de
MS
1503 if (r < 0)
1504 return log_error_errno(r, "Failed to create event loop: %m");
d025f1e4 1505
f9a810be
LP
1506 sd_event_set_watchdog(s->event, true);
1507
d025f1e4 1508 n = sd_listen_fds(true);
23bbb0de
MS
1509 if (n < 0)
1510 return log_error_errno(n, "Failed to read listening file descriptors from environment: %m");
d025f1e4
ZJS
1511
1512 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1513
1514 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1515
1516 if (s->native_fd >= 0) {
1517 log_error("Too many native sockets passed.");
1518 return -EINVAL;
1519 }
1520
1521 s->native_fd = fd;
1522
1523 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1524
1525 if (s->stdout_fd >= 0) {
1526 log_error("Too many stdout sockets passed.");
1527 return -EINVAL;
1528 }
1529
1530 s->stdout_fd = fd;
1531
03ee5c38
LP
1532 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0 ||
1533 sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/dev-log", 0) > 0) {
d025f1e4
ZJS
1534
1535 if (s->syslog_fd >= 0) {
1536 log_error("Too many /dev/log sockets passed.");
1537 return -EINVAL;
1538 }
1539
1540 s->syslog_fd = fd;
1541
875c2e22
LP
1542 } else if (sd_is_socket(fd, AF_NETLINK, SOCK_RAW, -1) > 0) {
1543
1544 if (s->audit_fd >= 0) {
1545 log_error("Too many audit sockets passed.");
1546 return -EINVAL;
1547 }
1548
1549 s->audit_fd = fd;
1550
4ec3cd73 1551 } else {
4ec3cd73 1552
13790add
LP
1553 if (!fds) {
1554 fds = fdset_new();
1555 if (!fds)
1556 return log_oom();
1557 }
4ec3cd73 1558
13790add
LP
1559 r = fdset_put(fds, fd);
1560 if (r < 0)
1561 return log_oom();
4ec3cd73 1562 }
d025f1e4
ZJS
1563 }
1564
13790add 1565 r = server_open_stdout_socket(s, fds);
d025f1e4
ZJS
1566 if (r < 0)
1567 return r;
1568
13790add
LP
1569 if (fdset_size(fds) > 0) {
1570 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds));
1571 fds = fdset_free(fds);
1572 }
1573
1574 r = server_open_syslog_socket(s);
d025f1e4
ZJS
1575 if (r < 0)
1576 return r;
1577
13790add 1578 r = server_open_native_socket(s);
d025f1e4
ZJS
1579 if (r < 0)
1580 return r;
1581
1582 r = server_open_dev_kmsg(s);
1583 if (r < 0)
1584 return r;
1585
875c2e22
LP
1586 r = server_open_audit(s);
1587 if (r < 0)
1588 return r;
1589
d025f1e4
ZJS
1590 r = server_open_kernel_seqnum(s);
1591 if (r < 0)
1592 return r;
1593
0c24bb23
LP
1594 r = server_open_hostname(s);
1595 if (r < 0)
1596 return r;
1597
f9a810be 1598 r = setup_signals(s);
d025f1e4
ZJS
1599 if (r < 0)
1600 return r;
1601
1602 s->udev = udev_new();
1603 if (!s->udev)
1604 return -ENOMEM;
1605
f9a810be 1606 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
d025f1e4
ZJS
1607 if (!s->rate_limit)
1608 return -ENOMEM;
1609
e9174f29
LP
1610 r = cg_get_root_path(&s->cgroup_root);
1611 if (r < 0)
1612 return r;
1613
0c24bb23
LP
1614 server_cache_hostname(s);
1615 server_cache_boot_id(s);
1616 server_cache_machine_id(s);
1617
caa2f4c0 1618 r = system_journal_open(s, false);
d025f1e4
ZJS
1619 if (r < 0)
1620 return r;
1621
1622 return 0;
1623}
1624
1625void server_maybe_append_tags(Server *s) {
1626#ifdef HAVE_GCRYPT
1627 JournalFile *f;
1628 Iterator i;
1629 usec_t n;
1630
1631 n = now(CLOCK_REALTIME);
1632
1633 if (s->system_journal)
1634 journal_file_maybe_append_tag(s->system_journal, n);
1635
43cf8388 1636 ORDERED_HASHMAP_FOREACH(f, s->user_journals, i)
d025f1e4
ZJS
1637 journal_file_maybe_append_tag(f, n);
1638#endif
1639}
1640
1641void server_done(Server *s) {
1642 JournalFile *f;
1643 assert(s);
1644
1645 while (s->stdout_streams)
1646 stdout_stream_free(s->stdout_streams);
1647
1648 if (s->system_journal)
1649 journal_file_close(s->system_journal);
1650
1651 if (s->runtime_journal)
1652 journal_file_close(s->runtime_journal);
1653
43cf8388 1654 while ((f = ordered_hashmap_steal_first(s->user_journals)))
d025f1e4
ZJS
1655 journal_file_close(f);
1656
43cf8388 1657 ordered_hashmap_free(s->user_journals);
d025f1e4 1658
f9a810be
LP
1659 sd_event_source_unref(s->syslog_event_source);
1660 sd_event_source_unref(s->native_event_source);
1661 sd_event_source_unref(s->stdout_event_source);
1662 sd_event_source_unref(s->dev_kmsg_event_source);
875c2e22 1663 sd_event_source_unref(s->audit_event_source);
f9a810be
LP
1664 sd_event_source_unref(s->sync_event_source);
1665 sd_event_source_unref(s->sigusr1_event_source);
1666 sd_event_source_unref(s->sigusr2_event_source);
1667 sd_event_source_unref(s->sigterm_event_source);
1668 sd_event_source_unref(s->sigint_event_source);
0c24bb23 1669 sd_event_source_unref(s->hostname_event_source);
f9a810be 1670 sd_event_unref(s->event);
d025f1e4 1671
03e334a1
LP
1672 safe_close(s->syslog_fd);
1673 safe_close(s->native_fd);
1674 safe_close(s->stdout_fd);
1675 safe_close(s->dev_kmsg_fd);
875c2e22 1676 safe_close(s->audit_fd);
03e334a1 1677 safe_close(s->hostname_fd);
0c24bb23 1678
d025f1e4
ZJS
1679 if (s->rate_limit)
1680 journal_rate_limit_free(s->rate_limit);
1681
1682 if (s->kernel_seqnum)
1683 munmap(s->kernel_seqnum, sizeof(uint64_t));
1684
1685 free(s->buffer);
1686 free(s->tty_path);
e9174f29 1687 free(s->cgroup_root);
99d0966e 1688 free(s->hostname_field);
d025f1e4
ZJS
1689
1690 if (s->mmap)
1691 mmap_cache_unref(s->mmap);
1692
1693 if (s->udev)
1694 udev_unref(s->udev);
1695}