]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/journal/journald-native.c
journal: Serialize __MONOTONIC_TIMESTAMP metadata field as well
[thirdparty/systemd.git] / src / journal / journald-native.c
CommitLineData
db9ecf05 1/* SPDX-License-Identifier: LGPL-2.1-or-later */
0153028a 2
4871690d 3#include <stddef.h>
0153028a 4#include <sys/epoll.h>
c79e98ea 5#include <sys/mman.h>
1e603a48 6#include <sys/statvfs.h>
07630cea 7#include <unistd.h>
0153028a 8
b5efdb8a 9#include "alloc-util.h"
3ffd4af2 10#include "fd-util.h"
f4f15635 11#include "fs-util.h"
bd1ae178 12#include "iovec-util.h"
b18453ed 13#include "journal-importer.h"
d9799ea2 14#include "journal-internal.h"
53978b98 15#include "journal-util.h"
87a13dab 16#include "journald-client.h"
0153028a 17#include "journald-console.h"
07630cea 18#include "journald-kmsg.h"
3ffd4af2 19#include "journald-native.h"
07630cea 20#include "journald-server.h"
0153028a 21#include "journald-syslog.h"
40b71e89 22#include "journald-wall.h"
a09abc4a 23#include "memfd-util.h"
0a970718 24#include "memory-util.h"
6bedfcbb 25#include "parse-util.h"
07630cea 26#include "path-util.h"
22e3a02b 27#include "process-util.h"
07630cea
LP
28#include "selinux-util.h"
29#include "socket-util.h"
30#include "string-util.h"
da9fc98d 31#include "strv.h"
731e10f3 32#include "unaligned.h"
0153028a 33
3b3154df 34static bool allow_object_pid(const struct ucred *ucred) {
968f3196
ZJS
35 return ucred && ucred->uid == 0;
36}
37
4b29a7f4
ZJS
38static void server_process_entry_meta(
39 const char *p, size_t l,
40 const struct ucred *ucred,
41 int *priority,
42 char **identifier,
43 char **message,
44 pid_t *object_pid) {
45
46 /* We need to determine the priority of this entry for the rate limiting logic */
47
48 if (l == 10 &&
49 startswith(p, "PRIORITY=") &&
50 p[9] >= '0' && p[9] <= '9')
51 *priority = (*priority & LOG_FACMASK) | (p[9] - '0');
52
53 else if (l == 17 &&
54 startswith(p, "SYSLOG_FACILITY=") &&
55 p[16] >= '0' && p[16] <= '9')
56 *priority = (*priority & LOG_PRIMASK) | ((p[16] - '0') << 3);
57
58 else if (l == 18 &&
59 startswith(p, "SYSLOG_FACILITY=") &&
60 p[16] >= '0' && p[16] <= '9' &&
61 p[17] >= '0' && p[17] <= '9')
62 *priority = (*priority & LOG_PRIMASK) | (((p[16] - '0')*10 + (p[17] - '0')) << 3);
63
64 else if (l >= 19 &&
65 startswith(p, "SYSLOG_IDENTIFIER=")) {
66 char *t;
67
d9adc8a8 68 t = memdup_suffix0(p + 18, l - 18);
d6f2cd67
DT
69 if (t)
70 free_and_replace(*identifier, t);
4b29a7f4
ZJS
71
72 } else if (l >= 8 &&
73 startswith(p, "MESSAGE=")) {
74 char *t;
75
d9adc8a8 76 t = memdup_suffix0(p + 8, l - 8);
d6f2cd67
DT
77 if (t)
78 free_and_replace(*message, t);
4b29a7f4 79
fbd0b64f
LP
80 } else if (l > STRLEN("OBJECT_PID=") &&
81 l < STRLEN("OBJECT_PID=") + DECIMAL_STR_MAX(pid_t) &&
4b29a7f4
ZJS
82 startswith(p, "OBJECT_PID=") &&
83 allow_object_pid(ucred)) {
84 char buf[DECIMAL_STR_MAX(pid_t)];
fbd0b64f
LP
85 memcpy(buf, p + STRLEN("OBJECT_PID="),
86 l - STRLEN("OBJECT_PID="));
87 buf[l-STRLEN("OBJECT_PID=")] = '\0';
4b29a7f4
ZJS
88
89 (void) parse_pid(buf, object_pid);
90 }
91}
92
68944f19 93static int server_process_entry(
0153028a 94 Server *s,
68944f19 95 const void *buffer, size_t *remaining,
22e3a02b 96 ClientContext *context,
3b3154df
LP
97 const struct ucred *ucred,
98 const struct timeval *tv,
0153028a
LP
99 const char *label, size_t label_len) {
100
d3070fbd
LP
101 /* Process a single entry from a native message. Returns 0 if nothing special happened and the message
102 * processing should continue, and a negative or positive value otherwise.
68944f19
ZJS
103 *
104 * Note that *remaining is altered on both success and failure. */
105
319a4f4b 106 size_t n = 0, j, tn = SIZE_MAX, entry_size = 0;
d3070fbd 107 char *identifier = NULL, *message = NULL;
0153028a 108 struct iovec *iovec = NULL;
0153028a 109 int priority = LOG_INFO;
968f3196 110 pid_t object_pid = 0;
d3070fbd 111 const char *p;
964ef920 112 int r = 1;
0153028a
LP
113
114 p = buffer;
0153028a 115
68944f19 116 while (*remaining > 0) {
0153028a
LP
117 const char *e, *q;
118
68944f19 119 e = memchr(p, '\n', *remaining);
0153028a
LP
120
121 if (!e) {
122 /* Trailing noise, let's ignore it, and flush what we collected */
f9fbac8b 123 log_debug("Received message with trailing noise, ignoring.");
964ef920 124 break; /* finish processing of the message */
0153028a
LP
125 }
126
127 if (e == p) {
128 /* Entry separator */
68944f19
ZJS
129 *remaining -= 1;
130 break;
0153028a
LP
131 }
132
4c701096 133 if (IN_SET(*p, '.', '#')) {
964ef920 134 /* Ignore control commands for now, and comments too. */
68944f19 135 *remaining -= (e - p) + 1;
0153028a
LP
136 p = e + 1;
137 continue;
138 }
139
140 /* A property follows */
052c57f1 141 if (n > ENTRY_FIELD_COUNT_MAX) {
f9fbac8b 142 log_debug("Received an entry that has more than " STRINGIFY(ENTRY_FIELD_COUNT_MAX) " fields, ignoring entry.");
052c57f1
ZJS
143 goto finish;
144 }
0153028a 145
12a717f8 146 /* n existing properties, 1 new, +1 for _TRANSPORT */
319a4f4b 147 if (!GREEDY_REALLOC(iovec,
d3070fbd
LP
148 n + 2 +
149 N_IOVEC_META_FIELDS + N_IOVEC_OBJECT_FIELDS +
150 client_context_extra_fields_n_iovec(context))) {
68944f19 151 r = log_oom();
964ef920 152 goto finish;
0153028a
LP
153 }
154
155 q = memchr(p, '=', e - p);
156 if (q) {
53978b98 157 if (journal_field_valid(p, q - p, false)) {
0153028a
LP
158 size_t l;
159
160 l = e - p;
964ef920 161 if (l > DATA_SIZE_MAX) {
f9fbac8b 162 log_debug("Received text block of %zu bytes is too large, ignoring entry.", l);
964ef920
ZJS
163 goto finish;
164 }
165
166 if (entry_size + l + n + 1 > ENTRY_SIZE_MAX) { /* data + separators + trailer */
f9fbac8b
DDM
167 log_debug("Entry is too big (%zu bytes after processing %zu entries), ignoring entry.",
168 entry_size + l, n + 1);
964ef920
ZJS
169 goto finish;
170 }
0153028a 171
dde26374
LP
172 /* If the field name starts with an underscore, skip the variable, since that indicates
173 * a trusted field */
174 iovec[n++] = IOVEC_MAKE((char*) p, l);
68944f19 175 entry_size += l;
0153028a 176
4b29a7f4
ZJS
177 server_process_entry_meta(p, l, ucred,
178 &priority,
179 &identifier,
180 &message,
181 &object_pid);
0153028a
LP
182 }
183
68944f19 184 *remaining -= (e - p) + 1;
0153028a
LP
185 p = e + 1;
186 continue;
187 } else {
964ef920 188 uint64_t l, total;
0153028a
LP
189 char *k;
190
68944f19 191 if (*remaining < e - p + 1 + sizeof(uint64_t) + 1) {
f9fbac8b 192 log_debug("Failed to parse message, ignoring.");
0153028a
LP
193 break;
194 }
195
731e10f3 196 l = unaligned_read_le64(e + 1);
505b6a61 197 if (l > DATA_SIZE_MAX) {
f9fbac8b 198 log_debug("Received binary data block of %"PRIu64" bytes is too large, ignoring entry.", l);
964ef920
ZJS
199 goto finish;
200 }
201
202 total = (e - p) + 1 + l;
203 if (entry_size + total + n + 1 > ENTRY_SIZE_MAX) { /* data + separators + trailer */
f9fbac8b
DDM
204 log_debug("Entry is too big (%"PRIu64"bytes after processing %zu fields), ignoring.",
205 entry_size + total, n + 1);
964ef920 206 goto finish;
505b6a61
LP
207 }
208
68944f19 209 if ((uint64_t) *remaining < e - p + 1 + sizeof(uint64_t) + l + 1 ||
0153028a 210 e[1+sizeof(uint64_t)+l] != '\n') {
f9fbac8b 211 log_debug("Failed to parse message, ignoring.");
0153028a
LP
212 break;
213 }
214
964ef920 215 k = malloc(total);
0153028a
LP
216 if (!k) {
217 log_oom();
218 break;
219 }
220
221 memcpy(k, p, e - p);
222 k[e - p] = '=';
223 memcpy(k + (e - p) + 1, e + 1 + sizeof(uint64_t), l);
224
53978b98 225 if (journal_field_valid(p, e - p, false)) {
5cfa2c3d 226 iovec[n] = IOVEC_MAKE(k, (e - p) + 1 + l);
874bc134 227 entry_size += iovec[n].iov_len;
a174f94d 228 n++;
4b29a7f4
ZJS
229
230 server_process_entry_meta(k, (e - p) + 1 + l, ucred,
231 &priority,
232 &identifier,
233 &message,
234 &object_pid);
0153028a
LP
235 } else
236 free(k);
237
68944f19 238 *remaining -= (e - p) + 1 + sizeof(uint64_t) + l + 1;
0153028a
LP
239 p = e + 1 + sizeof(uint64_t) + l + 1;
240 }
241 }
242
964ef920 243 if (n <= 0)
0153028a 244 goto finish;
d3070fbd 245
0153028a 246 tn = n++;
e6a7ec4b 247 iovec[tn] = IOVEC_MAKE_STRING("_TRANSPORT=journal");
fbd0b64f 248 entry_size += STRLEN("_TRANSPORT=journal");
874bc134
ZJS
249
250 if (entry_size + n + 1 > ENTRY_SIZE_MAX) { /* data + separators + trailer */
f9fbac8b 251 log_debug("Entry is too big with %zu properties and %zu bytes, ignoring.", n, entry_size);
874bc134
ZJS
252 goto finish;
253 }
0153028a 254
964ef920
ZJS
255 r = 0; /* Success, we read the message. */
256
257 if (!client_context_test_priority(context, priority))
258 goto finish;
259
0153028a 260 if (message) {
87a13dab
QD
261 /* Ensure message is not NULL, otherwise strlen(message) would crash. This check needs to
262 * be here until server_process_entry() is able to process messages containing \0 characters,
263 * as we would have access to the actual size of message. */
264 r = client_context_check_keep_log(context, message, strlen(message));
265 if (r <= 0)
266 goto finish;
267
0153028a 268 if (s->forward_to_syslog)
b6a20306 269 server_forward_syslog(s, syslog_fixup_facility(priority), identifier, message, ucred, tv);
0153028a
LP
270
271 if (s->forward_to_kmsg)
272 server_forward_kmsg(s, priority, identifier, message, ucred);
273
274 if (s->forward_to_console)
275 server_forward_console(s, priority, identifier, message, ucred);
40b71e89
ST
276
277 if (s->forward_to_wall)
278 server_forward_wall(s, priority, identifier, message, ucred);
0153028a
LP
279 }
280
319a4f4b 281 server_dispatch_message(s, iovec, n, MALLOC_ELEMENTSOF(iovec), context, tv, priority, object_pid);
0153028a
LP
282
283finish:
284 for (j = 0; j < n; j++) {
285 if (j == tn)
286 continue;
287
288 if (iovec[j].iov_base < buffer ||
68944f19 289 (const char*) iovec[j].iov_base >= p + *remaining)
0153028a
LP
290 free(iovec[j].iov_base);
291 }
292
293 free(iovec);
294 free(identifier);
295 free(message);
68944f19
ZJS
296
297 return r;
298}
299
300void server_process_native_message(
301 Server *s,
21acb27b 302 const char *buffer, size_t buffer_size,
68944f19
ZJS
303 const struct ucred *ucred,
304 const struct timeval *tv,
305 const char *label, size_t label_len) {
306
68944f19 307 size_t remaining = buffer_size;
1d3e682e 308 ClientContext *context = NULL;
22e3a02b 309 int r;
68944f19
ZJS
310
311 assert(s);
312 assert(buffer || buffer_size == 0);
313
22e3a02b
LP
314 if (ucred && pid_is_valid(ucred->pid)) {
315 r = client_context_get(s, ucred->pid, ucred, label, label_len, NULL, &context);
316 if (r < 0)
d9799ea2 317 log_ratelimit_warning_errno(r, JOURNAL_LOG_RATELIMIT,
8522691d
DDM
318 "Failed to retrieve credentials for PID " PID_FMT ", ignoring: %m",
319 ucred->pid);
22e3a02b
LP
320 }
321
68944f19
ZJS
322 do {
323 r = server_process_entry(s,
324 (const uint8_t*) buffer + (buffer_size - remaining), &remaining,
22e3a02b 325 context, ucred, tv, label, label_len);
68944f19 326 } while (r == 0);
0153028a
LP
327}
328
f5cbe313 329int server_process_native_file(
0153028a
LP
330 Server *s,
331 int fd,
3b3154df
LP
332 const struct ucred *ucred,
333 const struct timeval *tv,
0153028a
LP
334 const char *label, size_t label_len) {
335
336 struct stat st;
c79e98ea 337 bool sealed;
1dfa7e79 338 int r;
0153028a 339
964ef920 340 /* Data is in the passed fd, probably it didn't fit in a datagram. */
c79e98ea 341
0153028a
LP
342 assert(s);
343 assert(fd >= 0);
344
f5cbe313
MY
345 if (fstat(fd, &st) < 0)
346 return log_ratelimit_error_errno(errno, JOURNAL_LOG_RATELIMIT,
347 "Failed to stat passed file: %m");
1f47e27a
LP
348
349 r = stat_verify_regular(&st);
f5cbe313
MY
350 if (r < 0)
351 return log_ratelimit_error_errno(r, JOURNAL_LOG_RATELIMIT,
352 "File passed is not regular, ignoring message: %m");
1f47e27a
LP
353
354 if (st.st_size <= 0)
f5cbe313 355 return 0;
1f47e27a 356
14f38d17
MY
357 r = fd_verify_safe_flags(fd);
358 if (r == -EREMOTEIO)
f5cbe313
MY
359 return log_ratelimit_error_errno(r, JOURNAL_LOG_RATELIMIT,
360 "Unexpected flags of passed memory fd, ignoring message.");
14f38d17 361 if (r < 0)
f5cbe313
MY
362 return log_ratelimit_error_errno(r, JOURNAL_LOG_RATELIMIT,
363 "Failed to get flags of passed file: %m");
1f47e27a
LP
364
365 /* If it's a memfd, check if it is sealed. If so, we can just mmap it and use it, and do not need to
366 * copy the data out. */
73843b52 367 sealed = memfd_get_sealed(fd) > 0;
c79e98ea
LP
368
369 if (!sealed && (!ucred || ucred->uid != 0)) {
cf852ff6 370 _cleanup_free_ char *k = NULL;
1dfa7e79
LP
371 const char *e;
372
296de386
LP
373 /* If this is not a sealed memfd, and the peer is unknown or unprivileged, then verify the
374 * path. */
c79e98ea 375
cf852ff6 376 r = fd_get_path(fd, &k);
f5cbe313
MY
377 if (r < 0)
378 return log_ratelimit_error_errno(r, JOURNAL_LOG_RATELIMIT,
379 "Failed to get path of passed fd: %m");
1dfa7e79 380
da9fc98d 381 e = PATH_STARTSWITH_SET(k, "/dev/shm/", "/tmp/", "/var/tmp/");
f5cbe313
MY
382 if (!e)
383 return log_ratelimit_error_errno(SYNTHETIC_ERRNO(EPERM), JOURNAL_LOG_RATELIMIT,
384 "Received file outside of allowed directories, refusing.");
1dfa7e79 385
f5cbe313
MY
386 if (!filename_is_valid(e))
387 return log_ratelimit_error_errno(SYNTHETIC_ERRNO(EPERM), JOURNAL_LOG_RATELIMIT,
388 "Received file in subdirectory of allowed directories, refusing.");
1dfa7e79
LP
389 }
390
1f47e27a
LP
391 /* When !sealed, set a lower memory limit. We have to read the file, effectively doubling memory
392 * use. */
f5cbe313
MY
393 if (st.st_size > ENTRY_SIZE_MAX / (sealed ? 1 : 2))
394 return log_ratelimit_error_errno(SYNTHETIC_ERRNO(EFBIG), JOURNAL_LOG_RATELIMIT,
395 "File passed too large (%"PRIu64" bytes), refusing.",
396 (uint64_t) st.st_size);
0153028a 397
c79e98ea
LP
398 if (sealed) {
399 void *p;
400 size_t ps;
401
402 /* The file is sealed, we can just map it and use it. */
0153028a 403
c79e98ea 404 ps = PAGE_ALIGN(st.st_size);
cbdac0c3 405 assert(ps < SIZE_MAX);
c79e98ea 406 p = mmap(NULL, ps, PROT_READ, MAP_PRIVATE, fd, 0);
f5cbe313
MY
407 if (p == MAP_FAILED)
408 return log_ratelimit_error_errno(errno, JOURNAL_LOG_RATELIMIT,
409 "Failed to map memfd: %m");
c79e98ea
LP
410
411 server_process_native_message(s, p, st.st_size, ucred, tv, label, label_len);
412 assert_se(munmap(p, ps) >= 0);
1e603a48 413
f5cbe313
MY
414 return 0;
415 }
1e603a48 416
f5cbe313
MY
417 _cleanup_free_ void *p = NULL;
418 struct statvfs vfs;
419 ssize_t n;
420
421 if (fstatvfs(fd, &vfs) < 0)
422 return log_ratelimit_error_errno(errno, JOURNAL_LOG_RATELIMIT,
423 "Failed to stat file system of passed file: %m");
424
425 /* Refuse operating on file systems that have mandatory locking enabled.
426 * See also: https://github.com/systemd/systemd/issues/1822 */
427 if (FLAGS_SET(vfs.f_flag, ST_MANDLOCK))
428 return log_ratelimit_error_errno(SYNTHETIC_ERRNO(EPERM), JOURNAL_LOG_RATELIMIT,
429 "Received file descriptor from file system with mandatory locking enabled, not processing it.");
430
431 /* Make the fd non-blocking. On regular files this has the effect of bypassing mandatory
432 * locking. Of course, this should normally not be necessary given the check above, but let's
433 * better be safe than sorry, after all NFS is pretty confusing regarding file system flags,
434 * and we better don't trust it, and so is SMB. */
435 r = fd_nonblock(fd, true);
436 if (r < 0)
437 return log_ratelimit_error_errno(r, JOURNAL_LOG_RATELIMIT,
438 "Failed to make fd non-blocking: %m");
1e603a48 439
f5cbe313
MY
440 /* The file is not sealed, we can't map the file here, since clients might then truncate it
441 * and trigger a SIGBUS for us. So let's stupidly read it. */
c79e98ea 442
f5cbe313
MY
443 p = malloc(st.st_size);
444 if (!p)
445 return log_oom();
c79e98ea 446
f5cbe313
MY
447 n = pread(fd, p, st.st_size, 0);
448 if (n < 0)
449 return log_ratelimit_error_errno(errno, JOURNAL_LOG_RATELIMIT,
450 "Failed to read file: %m");
451 if (n > 0)
452 server_process_native_message(s, p, n, ucred, tv, label, label_len);
453
454 return 0;
0153028a
LP
455}
456
b1852c48 457int server_open_native_socket(Server *s, const char *native_socket) {
3b3154df 458 int r;
0153028a
LP
459
460 assert(s);
b1852c48 461 assert(native_socket);
0153028a
LP
462
463 if (s->native_fd < 0) {
f36a9d59
ZJS
464 union sockaddr_union sa;
465 size_t sa_len;
b1852c48
LP
466
467 r = sockaddr_un_set_path(&sa.un, native_socket);
468 if (r < 0)
469 return log_error_errno(r, "Unable to use namespace path %s for AF_UNIX socket: %m", native_socket);
f36a9d59 470 sa_len = r;
b1852c48 471
0153028a 472 s->native_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
4a62c710
MS
473 if (s->native_fd < 0)
474 return log_error_errno(errno, "socket() failed: %m");
0153028a 475
155b6876 476 (void) sockaddr_un_unlink(&sa.un);
0153028a 477
f36a9d59 478 r = bind(s->native_fd, &sa.sa, sa_len);
4a62c710
MS
479 if (r < 0)
480 return log_error_errno(errno, "bind(%s) failed: %m", sa.un.sun_path);
0153028a 481
4a61c3e5 482 (void) chmod(sa.un.sun_path, 0666);
0153028a 483 } else
48440643 484 (void) fd_nonblock(s->native_fd, true);
0153028a 485
2ff48e98 486 r = setsockopt_int(s->native_fd, SOL_SOCKET, SO_PASSCRED, true);
4a62c710 487 if (r < 0)
2ff48e98 488 return log_error_errno(r, "SO_PASSCRED failed: %m");
0153028a 489
6d395665 490 if (mac_selinux_use()) {
2ff48e98 491 r = setsockopt_int(s->native_fd, SOL_SOCKET, SO_PASSSEC, true);
d682b3a7 492 if (r < 0)
2ff48e98 493 log_warning_errno(r, "SO_PASSSEC failed: %m");
d682b3a7 494 }
0153028a 495
2ff48e98 496 r = setsockopt_int(s->native_fd, SOL_SOCKET, SO_TIMESTAMP, true);
4a62c710 497 if (r < 0)
2ff48e98 498 return log_error_errno(r, "SO_TIMESTAMP failed: %m");
0153028a 499
8531ae70 500 r = sd_event_add_io(s->event, &s->native_event_source, s->native_fd, EPOLLIN, server_process_datagram, s);
23bbb0de
MS
501 if (r < 0)
502 return log_error_errno(r, "Failed to add native server fd to event loop: %m");
0153028a 503
48cef295
VC
504 r = sd_event_source_set_priority(s->native_event_source, SD_EVENT_PRIORITY_NORMAL+5);
505 if (r < 0)
506 return log_error_errno(r, "Failed to adjust native event source priority: %m");
507
0153028a
LP
508 return 0;
509}