]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/journal/journald-native.c
Add systemd-analyze verb to list runtime unit properties (#37665)
[thirdparty/systemd.git] / src / journal / journald-native.c
1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3 #include <sys/statvfs.h>
4 #include <unistd.h>
5
6 #include "sd-event.h"
7
8 #include "alloc-util.h"
9 #include "errno-util.h"
10 #include "fd-util.h"
11 #include "format-util.h"
12 #include "iovec-util.h"
13 #include "journal-importer.h"
14 #include "journal-internal.h"
15 #include "journald-client.h"
16 #include "journald-console.h"
17 #include "journald-context.h"
18 #include "journald-kmsg.h"
19 #include "journald-manager.h"
20 #include "journald-native.h"
21 #include "journald-syslog.h"
22 #include "journald-wall.h"
23 #include "log.h"
24 #include "log-ratelimit.h"
25 #include "memfd-util.h"
26 #include "memory-util.h"
27 #include "parse-util.h"
28 #include "path-util.h"
29 #include "process-util.h"
30 #include "selinux-util.h"
31 #include "socket-util.h"
32 #include "stat-util.h"
33 #include "string-util.h"
34 #include "unaligned.h"
35
36 static bool allow_object_pid(const struct ucred *ucred) {
37 return ucred && ucred->uid == 0;
38 }
39
40 static void manager_process_entry_meta(
41 const char *p, size_t l,
42 const struct ucred *ucred,
43 int *priority,
44 char **identifier,
45 char **message,
46 pid_t *object_pid) {
47
48 /* We need to determine the priority of this entry for the rate limiting logic */
49
50 if (l == 10 &&
51 startswith(p, "PRIORITY=") &&
52 p[9] >= '0' && p[9] <= '9')
53 *priority = (*priority & LOG_FACMASK) | (p[9] - '0');
54
55 else if (l == 17 &&
56 startswith(p, "SYSLOG_FACILITY=") &&
57 p[16] >= '0' && p[16] <= '9')
58 *priority = LOG_PRI(*priority) | ((p[16] - '0') << 3);
59
60 else if (l == 18 &&
61 startswith(p, "SYSLOG_FACILITY=") &&
62 p[16] >= '0' && p[16] <= '9' &&
63 p[17] >= '0' && p[17] <= '9')
64 *priority = LOG_PRI(*priority) | (((p[16] - '0')*10 + (p[17] - '0')) << 3);
65
66 else if (l >= 19 &&
67 startswith(p, "SYSLOG_IDENTIFIER=")) {
68 char *t;
69
70 t = memdup_suffix0(p + 18, l - 18);
71 if (t)
72 free_and_replace(*identifier, t);
73
74 } else if (l >= 8 &&
75 startswith(p, "MESSAGE=")) {
76 char *t;
77
78 t = memdup_suffix0(p + 8, l - 8);
79 if (t)
80 free_and_replace(*message, t);
81
82 } else if (l > STRLEN("OBJECT_PID=") &&
83 l < STRLEN("OBJECT_PID=") + DECIMAL_STR_MAX(pid_t) &&
84 startswith(p, "OBJECT_PID=") &&
85 allow_object_pid(ucred)) {
86 char buf[DECIMAL_STR_MAX(pid_t)];
87 memcpy(buf, p + STRLEN("OBJECT_PID="),
88 l - STRLEN("OBJECT_PID="));
89 buf[l-STRLEN("OBJECT_PID=")] = '\0';
90
91 (void) parse_pid(buf, object_pid);
92 }
93 }
94
95 static int manager_process_entry(
96 Manager *m,
97 const void *buffer, size_t *remaining,
98 ClientContext *context,
99 const struct ucred *ucred,
100 const struct timeval *tv,
101 const char *label, size_t label_len) {
102
103 /* Process a single entry from a native message. Returns 0 if nothing special happened and the message
104 * processing should continue, and a negative or positive value otherwise.
105 *
106 * Note that *remaining is altered on both success and failure. */
107
108 size_t n = 0, j, tn = SIZE_MAX, entry_size = 0;
109 char *identifier = NULL, *message = NULL;
110 struct iovec *iovec = NULL;
111 int priority = LOG_INFO;
112 pid_t object_pid = 0;
113 const char *p;
114 int r = 1;
115
116 p = buffer;
117
118 while (*remaining > 0) {
119 const char *e, *q;
120
121 e = memchr(p, '\n', *remaining);
122
123 if (!e) {
124 /* Trailing noise, let's ignore it, and flush what we collected */
125 log_debug("Received message with trailing noise, ignoring.");
126 break; /* finish processing of the message */
127 }
128
129 if (e == p) {
130 /* Entry separator */
131 *remaining -= 1;
132 break;
133 }
134
135 if (IN_SET(*p, '.', '#')) {
136 /* Ignore control commands for now, and comments too. */
137 *remaining -= (e - p) + 1;
138 p = e + 1;
139 continue;
140 }
141
142 /* A property follows */
143 if (n > ENTRY_FIELD_COUNT_MAX) {
144 log_debug("Received an entry that has more than " STRINGIFY(ENTRY_FIELD_COUNT_MAX) " fields, ignoring entry.");
145 goto finish;
146 }
147
148 /* n existing properties, 1 new, +1 for _TRANSPORT */
149 if (!GREEDY_REALLOC(iovec,
150 n + 2 +
151 N_IOVEC_META_FIELDS + N_IOVEC_OBJECT_FIELDS +
152 client_context_extra_fields_n_iovec(context))) {
153 r = log_oom();
154 goto finish;
155 }
156
157 q = memchr(p, '=', e - p);
158 if (q) {
159 if (journal_field_valid(p, q - p, false)) {
160 size_t l;
161
162 l = e - p;
163 if (l > DATA_SIZE_MAX) {
164 log_debug("Received text block of %zu bytes is too large, ignoring entry.", l);
165 goto finish;
166 }
167
168 if (entry_size + l + n + 1 > ENTRY_SIZE_MAX) { /* data + separators + trailer */
169 log_debug("Entry is too big (%zu bytes after processing %zu entries), ignoring entry.",
170 entry_size + l, n + 1);
171 goto finish;
172 }
173
174 /* If the field name starts with an underscore, skip the variable, since that indicates
175 * a trusted field */
176 iovec[n++] = IOVEC_MAKE((char*) p, l);
177 entry_size += l;
178
179 manager_process_entry_meta(p, l, ucred,
180 &priority,
181 &identifier,
182 &message,
183 &object_pid);
184 }
185
186 *remaining -= (e - p) + 1;
187 p = e + 1;
188 continue;
189 } else {
190 uint64_t l, total;
191 char *k;
192
193 if (*remaining < e - p + 1 + sizeof(uint64_t) + 1) {
194 log_debug("Failed to parse message, ignoring.");
195 break;
196 }
197
198 l = unaligned_read_le64(e + 1);
199 if (l > DATA_SIZE_MAX) {
200 log_debug("Received binary data block of %"PRIu64" bytes is too large, ignoring entry.", l);
201 goto finish;
202 }
203
204 total = (e - p) + 1 + l;
205 if (entry_size + total + n + 1 > ENTRY_SIZE_MAX) { /* data + separators + trailer */
206 log_debug("Entry is too big (%"PRIu64"bytes after processing %zu fields), ignoring.",
207 entry_size + total, n + 1);
208 goto finish;
209 }
210
211 if ((uint64_t) *remaining < e - p + 1 + sizeof(uint64_t) + l + 1 ||
212 e[1+sizeof(uint64_t)+l] != '\n') {
213 log_debug("Failed to parse message, ignoring.");
214 break;
215 }
216
217 k = malloc(total);
218 if (!k) {
219 log_oom();
220 break;
221 }
222
223 memcpy(k, p, e - p);
224 k[e - p] = '=';
225 memcpy(k + (e - p) + 1, e + 1 + sizeof(uint64_t), l);
226
227 if (journal_field_valid(p, e - p, false)) {
228 iovec[n] = IOVEC_MAKE(k, (e - p) + 1 + l);
229 entry_size += iovec[n].iov_len;
230 n++;
231
232 manager_process_entry_meta(k, (e - p) + 1 + l, ucred,
233 &priority,
234 &identifier,
235 &message,
236 &object_pid);
237 } else
238 free(k);
239
240 *remaining -= (e - p) + 1 + sizeof(uint64_t) + l + 1;
241 p = e + 1 + sizeof(uint64_t) + l + 1;
242 }
243 }
244
245 if (n <= 0)
246 goto finish;
247
248 tn = n++;
249 iovec[tn] = IOVEC_MAKE_STRING("_TRANSPORT=journal");
250 entry_size += STRLEN("_TRANSPORT=journal");
251
252 if (entry_size + n + 1 > ENTRY_SIZE_MAX) { /* data + separators + trailer */
253 log_debug("Entry is too big with %zu properties and %zu bytes, ignoring.", n, entry_size);
254 goto finish;
255 }
256
257 r = 0; /* Success, we read the message. */
258
259 if (!client_context_test_priority(context, priority))
260 goto finish;
261
262 if (message) {
263 /* Ensure message is not NULL, otherwise strlen(message) would crash. This check needs to
264 * be here until manager_process_entry() is able to process messages containing \0 characters,
265 * as we would have access to the actual size of message. */
266 r = client_context_check_keep_log(context, message, strlen(message));
267 if (r <= 0)
268 goto finish;
269
270 if (m->forward_to_syslog)
271 manager_forward_syslog(m, syslog_fixup_facility(priority), identifier, message, ucred, tv);
272
273 if (m->forward_to_kmsg)
274 manager_forward_kmsg(m, priority, identifier, message, ucred);
275
276 if (m->forward_to_console)
277 manager_forward_console(m, priority, identifier, message, ucred);
278
279 if (m->forward_to_wall)
280 manager_forward_wall(m, priority, identifier, message, ucred);
281 }
282
283 manager_dispatch_message(m, iovec, n, MALLOC_ELEMENTSOF(iovec), context, tv, priority, object_pid);
284
285 finish:
286 for (j = 0; j < n; j++) {
287 if (j == tn)
288 continue;
289
290 if (iovec[j].iov_base < buffer ||
291 (const char*) iovec[j].iov_base >= p + *remaining)
292 free(iovec[j].iov_base);
293 }
294
295 free(iovec);
296 free(identifier);
297 free(message);
298
299 return r;
300 }
301
302 void manager_process_native_message(
303 Manager *m,
304 const char *buffer, size_t buffer_size,
305 const struct ucred *ucred,
306 const struct timeval *tv,
307 const char *label, size_t label_len) {
308
309 size_t remaining = buffer_size;
310 ClientContext *context = NULL;
311 int r;
312
313 assert(m);
314 assert(buffer || buffer_size == 0);
315
316 if (ucred && pid_is_valid(ucred->pid)) {
317 r = client_context_get(m, ucred->pid, ucred, label, label_len, NULL, &context);
318 if (r < 0)
319 log_ratelimit_warning_errno(r, JOURNAL_LOG_RATELIMIT,
320 "Failed to retrieve credentials for PID " PID_FMT ", ignoring: %m",
321 ucred->pid);
322 }
323
324 do {
325 r = manager_process_entry(m,
326 (const uint8_t*) buffer + (buffer_size - remaining), &remaining,
327 context, ucred, tv, label, label_len);
328 } while (r == 0);
329 }
330
331 int manager_process_native_file(
332 Manager *m,
333 int fd,
334 const struct ucred *ucred,
335 const struct timeval *tv,
336 const char *label, size_t label_len) {
337
338 struct stat st;
339 bool sealed;
340 int r;
341
342 /* Data is in the passed fd, probably it didn't fit in a datagram. */
343
344 assert(m);
345 assert(fd >= 0);
346
347 if (fstat(fd, &st) < 0)
348 return log_ratelimit_error_errno(errno, JOURNAL_LOG_RATELIMIT,
349 "Failed to stat passed file: %m");
350
351 r = stat_verify_regular(&st);
352 if (r < 0)
353 return log_ratelimit_error_errno(r, JOURNAL_LOG_RATELIMIT,
354 "File passed is not regular, ignoring message: %m");
355
356 if (st.st_size <= 0)
357 return 0;
358
359 r = fd_verify_safe_flags(fd);
360 if (r == -EREMOTEIO)
361 return log_ratelimit_error_errno(r, JOURNAL_LOG_RATELIMIT,
362 "Unexpected flags of passed memory fd, ignoring message.");
363 if (r < 0)
364 return log_ratelimit_error_errno(r, JOURNAL_LOG_RATELIMIT,
365 "Failed to get flags of passed file: %m");
366
367 /* If it's a memfd, check if it is sealed. If so, we can just mmap it and use it, and do not need to
368 * copy the data out. */
369 sealed = memfd_get_sealed(fd) > 0;
370
371 if (!sealed && (!ucred || ucred->uid != 0)) {
372 _cleanup_free_ char *k = NULL;
373 const char *e;
374
375 /* If this is not a sealed memfd, and the peer is unknown or unprivileged, then verify the
376 * path. */
377
378 r = fd_get_path(fd, &k);
379 if (r < 0)
380 return log_ratelimit_error_errno(r, JOURNAL_LOG_RATELIMIT,
381 "Failed to get path of passed fd: %m");
382
383 e = PATH_STARTSWITH_SET(k, "/dev/shm/", "/tmp/", "/var/tmp/");
384 if (!e)
385 return log_ratelimit_error_errno(SYNTHETIC_ERRNO(EPERM), JOURNAL_LOG_RATELIMIT,
386 "Received file outside of allowed directories, refusing.");
387
388 if (!filename_is_valid(e))
389 return log_ratelimit_error_errno(SYNTHETIC_ERRNO(EPERM), JOURNAL_LOG_RATELIMIT,
390 "Received file in subdirectory of allowed directories, refusing.");
391 }
392
393 /* When !sealed, set a lower memory limit. We have to read the file, effectively doubling memory
394 * use. */
395 if (st.st_size > ENTRY_SIZE_MAX / (sealed ? 1 : 2))
396 return log_ratelimit_error_errno(SYNTHETIC_ERRNO(EFBIG), JOURNAL_LOG_RATELIMIT,
397 "File passed too large (%"PRIu64" bytes), refusing.",
398 (uint64_t) st.st_size);
399
400 if (sealed) {
401 void *p;
402 size_t ps;
403
404 /* The file is sealed, we can just map it and use it. */
405
406 ps = PAGE_ALIGN(st.st_size);
407 assert(ps < SIZE_MAX);
408 p = mmap(NULL, ps, PROT_READ, MAP_PRIVATE, fd, 0);
409 if (p == MAP_FAILED)
410 return log_ratelimit_error_errno(errno, JOURNAL_LOG_RATELIMIT,
411 "Failed to map memfd: %m");
412
413 manager_process_native_message(m, p, st.st_size, ucred, tv, label, label_len);
414 assert_se(munmap(p, ps) >= 0);
415
416 return 0;
417 }
418
419 _cleanup_free_ void *p = NULL;
420 struct statvfs vfs;
421 ssize_t n;
422
423 if (fstatvfs(fd, &vfs) < 0)
424 return log_ratelimit_error_errno(errno, JOURNAL_LOG_RATELIMIT,
425 "Failed to stat file system of passed file: %m");
426
427 /* Refuse operating on file systems that have mandatory locking enabled.
428 * See also: https://github.com/systemd/systemd/issues/1822 */
429 if (FLAGS_SET(vfs.f_flag, ST_MANDLOCK))
430 return log_ratelimit_error_errno(SYNTHETIC_ERRNO(EPERM), JOURNAL_LOG_RATELIMIT,
431 "Received file descriptor from file system with mandatory locking enabled, not processing it.");
432
433 /* Make the fd non-blocking. On regular files this has the effect of bypassing mandatory
434 * locking. Of course, this should normally not be necessary given the check above, but let's
435 * better be safe than sorry, after all NFS is pretty confusing regarding file system flags,
436 * and we better don't trust it, and so is SMB. */
437 r = fd_nonblock(fd, true);
438 if (r < 0)
439 return log_ratelimit_error_errno(r, JOURNAL_LOG_RATELIMIT,
440 "Failed to make fd non-blocking: %m");
441
442 /* The file is not sealed, we can't map the file here, since clients might then truncate it
443 * and trigger a SIGBUS for us. So let's stupidly read it. */
444
445 p = malloc(st.st_size);
446 if (!p)
447 return log_oom();
448
449 n = pread(fd, p, st.st_size, 0);
450 if (n < 0)
451 return log_ratelimit_error_errno(errno, JOURNAL_LOG_RATELIMIT,
452 "Failed to read file: %m");
453 if (n > 0)
454 manager_process_native_message(m, p, n, ucred, tv, label, label_len);
455
456 return 0;
457 }
458
459 int manager_open_native_socket(Manager *m, const char *native_socket) {
460 int r;
461
462 assert(m);
463 assert(native_socket);
464
465 if (m->native_fd < 0) {
466 union sockaddr_union sa;
467 size_t sa_len;
468
469 r = sockaddr_un_set_path(&sa.un, native_socket);
470 if (r < 0)
471 return log_error_errno(r, "Unable to use namespace path %s for AF_UNIX socket: %m", native_socket);
472 sa_len = r;
473
474 m->native_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
475 if (m->native_fd < 0)
476 return log_error_errno(errno, "socket() failed: %m");
477
478 (void) sockaddr_un_unlink(&sa.un);
479
480 r = bind(m->native_fd, &sa.sa, sa_len);
481 if (r < 0)
482 return log_error_errno(errno, "bind(%s) failed: %m", sa.un.sun_path);
483
484 (void) chmod(sa.un.sun_path, 0666);
485 } else
486 (void) fd_nonblock(m->native_fd, true);
487
488 r = setsockopt_int(m->native_fd, SOL_SOCKET, SO_PASSCRED, true);
489 if (r < 0)
490 return log_error_errno(r, "SO_PASSCRED failed: %m");
491
492 if (mac_selinux_use()) {
493 r = setsockopt_int(m->native_fd, SOL_SOCKET, SO_PASSSEC, true);
494 if (r < 0)
495 log_full_errno(ERRNO_IS_NEG_NOT_SUPPORTED(r) ? LOG_DEBUG : LOG_WARNING, r, "SO_PASSSEC failed, ignoring: %m");
496 }
497
498 r = setsockopt_int(m->native_fd, SOL_SOCKET, SO_TIMESTAMP, true);
499 if (r < 0)
500 return log_error_errno(r, "SO_TIMESTAMP failed: %m");
501
502 r = sd_event_add_io(m->event, &m->native_event_source, m->native_fd, EPOLLIN, manager_process_datagram, m);
503 if (r < 0)
504 return log_error_errno(r, "Failed to add native manager fd to event loop: %m");
505
506 r = sd_event_source_set_priority(m->native_event_source, SD_EVENT_PRIORITY_NORMAL+5);
507 if (r < 0)
508 return log_error_errno(r, "Failed to adjust native event source priority: %m");
509
510 return 0;
511 }