]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/journal/journald-native.c
journald: lower the maximum entry size limit to ½ for non-sealed fds
[thirdparty/systemd.git] / src / journal / journald-native.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2
3 #include <stddef.h>
4 #include <sys/epoll.h>
5 #include <sys/mman.h>
6 #include <sys/statvfs.h>
7 #include <unistd.h>
8
9 #include "alloc-util.h"
10 #include "fd-util.h"
11 #include "fs-util.h"
12 #include "io-util.h"
13 #include "journal-importer.h"
14 #include "journal-util.h"
15 #include "journald-console.h"
16 #include "journald-kmsg.h"
17 #include "journald-native.h"
18 #include "journald-server.h"
19 #include "journald-syslog.h"
20 #include "journald-wall.h"
21 #include "memfd-util.h"
22 #include "parse-util.h"
23 #include "path-util.h"
24 #include "process-util.h"
25 #include "selinux-util.h"
26 #include "socket-util.h"
27 #include "string-util.h"
28 #include "strv.h"
29 #include "unaligned.h"
30
31 static bool allow_object_pid(const struct ucred *ucred) {
32 return ucred && ucred->uid == 0;
33 }
34
35 static void server_process_entry_meta(
36 const char *p, size_t l,
37 const struct ucred *ucred,
38 int *priority,
39 char **identifier,
40 char **message,
41 pid_t *object_pid) {
42
43 /* We need to determine the priority of this entry for the rate limiting logic */
44
45 if (l == 10 &&
46 startswith(p, "PRIORITY=") &&
47 p[9] >= '0' && p[9] <= '9')
48 *priority = (*priority & LOG_FACMASK) | (p[9] - '0');
49
50 else if (l == 17 &&
51 startswith(p, "SYSLOG_FACILITY=") &&
52 p[16] >= '0' && p[16] <= '9')
53 *priority = (*priority & LOG_PRIMASK) | ((p[16] - '0') << 3);
54
55 else if (l == 18 &&
56 startswith(p, "SYSLOG_FACILITY=") &&
57 p[16] >= '0' && p[16] <= '9' &&
58 p[17] >= '0' && p[17] <= '9')
59 *priority = (*priority & LOG_PRIMASK) | (((p[16] - '0')*10 + (p[17] - '0')) << 3);
60
61 else if (l >= 19 &&
62 startswith(p, "SYSLOG_IDENTIFIER=")) {
63 char *t;
64
65 t = strndup(p + 18, l - 18);
66 if (t) {
67 free(*identifier);
68 *identifier = t;
69 }
70
71 } else if (l >= 8 &&
72 startswith(p, "MESSAGE=")) {
73 char *t;
74
75 t = strndup(p + 8, l - 8);
76 if (t) {
77 free(*message);
78 *message = t;
79 }
80
81 } else if (l > STRLEN("OBJECT_PID=") &&
82 l < STRLEN("OBJECT_PID=") + DECIMAL_STR_MAX(pid_t) &&
83 startswith(p, "OBJECT_PID=") &&
84 allow_object_pid(ucred)) {
85 char buf[DECIMAL_STR_MAX(pid_t)];
86 memcpy(buf, p + STRLEN("OBJECT_PID="),
87 l - STRLEN("OBJECT_PID="));
88 buf[l-STRLEN("OBJECT_PID=")] = '\0';
89
90 (void) parse_pid(buf, object_pid);
91 }
92 }
93
94 static int server_process_entry(
95 Server *s,
96 const void *buffer, size_t *remaining,
97 ClientContext *context,
98 const struct ucred *ucred,
99 const struct timeval *tv,
100 const char *label, size_t label_len) {
101
102 /* Process a single entry from a native message. Returns 0 if nothing special happened and the message
103 * processing should continue, and a negative or positive value otherwise.
104 *
105 * Note that *remaining is altered on both success and failure. */
106
107 size_t n = 0, j, tn = (size_t) -1, m = 0, entry_size = 0;
108 char *identifier = NULL, *message = NULL;
109 struct iovec *iovec = NULL;
110 int priority = LOG_INFO;
111 pid_t object_pid = 0;
112 const char *p;
113 int r = 1;
114
115 p = buffer;
116
117 while (*remaining > 0) {
118 const char *e, *q;
119
120 e = memchr(p, '\n', *remaining);
121
122 if (!e) {
123 /* Trailing noise, let's ignore it, and flush what we collected */
124 log_debug("Received message with trailing noise, ignoring.");
125 break; /* finish processing of the message */
126 }
127
128 if (e == p) {
129 /* Entry separator */
130 *remaining -= 1;
131 break;
132 }
133
134 if (IN_SET(*p, '.', '#')) {
135 /* Ignore control commands for now, and comments too. */
136 *remaining -= (e - p) + 1;
137 p = e + 1;
138 continue;
139 }
140
141 /* A property follows */
142 if (n > ENTRY_FIELD_COUNT_MAX) {
143 log_debug("Received an entry that has more than " STRINGIFY(ENTRY_FIELD_COUNT_MAX) " fields, ignoring entry.");
144 goto finish;
145 }
146
147 /* n existing properties, 1 new, +1 for _TRANSPORT */
148 if (!GREEDY_REALLOC(iovec, m,
149 n + 2 +
150 N_IOVEC_META_FIELDS + N_IOVEC_OBJECT_FIELDS +
151 client_context_extra_fields_n_iovec(context))) {
152 r = log_oom();
153 goto finish;
154 }
155
156 q = memchr(p, '=', e - p);
157 if (q) {
158 if (journal_field_valid(p, q - p, false)) {
159 size_t l;
160
161 l = e - p;
162 if (l > DATA_SIZE_MAX) {
163 log_debug("Received text block of %zu bytes is too large, ignoring entry.", l);
164 goto finish;
165 }
166
167 if (entry_size + l + n + 1 > ENTRY_SIZE_MAX) { /* data + separators + trailer */
168 log_debug("Entry is too big (%zu bytes after processing %zu entries), ignoring entry.",
169 entry_size + l, n + 1);
170 goto finish;
171 }
172
173 /* If the field name starts with an underscore, skip the variable, since that indicates
174 * a trusted field */
175 iovec[n++] = IOVEC_MAKE((char*) p, l);
176 entry_size += l;
177
178 server_process_entry_meta(p, l, ucred,
179 &priority,
180 &identifier,
181 &message,
182 &object_pid);
183 }
184
185 *remaining -= (e - p) + 1;
186 p = e + 1;
187 continue;
188 } else {
189 uint64_t l, total;
190 char *k;
191
192 if (*remaining < e - p + 1 + sizeof(uint64_t) + 1) {
193 log_debug("Failed to parse message, ignoring.");
194 break;
195 }
196
197 l = unaligned_read_le64(e + 1);
198 if (l > DATA_SIZE_MAX) {
199 log_debug("Received binary data block of %"PRIu64" bytes is too large, ignoring entry.", l);
200 goto finish;
201 }
202
203 total = (e - p) + 1 + l;
204 if (entry_size + total + n + 1 > ENTRY_SIZE_MAX) { /* data + separators + trailer */
205 log_debug("Entry is too big (%"PRIu64"bytes after processing %zu fields), ignoring.",
206 entry_size + total, n + 1);
207 goto finish;
208 }
209
210 if ((uint64_t) *remaining < e - p + 1 + sizeof(uint64_t) + l + 1 ||
211 e[1+sizeof(uint64_t)+l] != '\n') {
212 log_debug("Failed to parse message, ignoring.");
213 break;
214 }
215
216 k = malloc(total);
217 if (!k) {
218 log_oom();
219 break;
220 }
221
222 memcpy(k, p, e - p);
223 k[e - p] = '=';
224 memcpy(k + (e - p) + 1, e + 1 + sizeof(uint64_t), l);
225
226 if (journal_field_valid(p, e - p, false)) {
227 iovec[n] = IOVEC_MAKE(k, (e - p) + 1 + l);
228 entry_size += iovec[n].iov_len;
229 n++;
230
231 server_process_entry_meta(k, (e - p) + 1 + l, ucred,
232 &priority,
233 &identifier,
234 &message,
235 &object_pid);
236 } else
237 free(k);
238
239 *remaining -= (e - p) + 1 + sizeof(uint64_t) + l + 1;
240 p = e + 1 + sizeof(uint64_t) + l + 1;
241 }
242 }
243
244 if (n <= 0)
245 goto finish;
246
247 tn = n++;
248 iovec[tn] = IOVEC_MAKE_STRING("_TRANSPORT=journal");
249 entry_size += STRLEN("_TRANSPORT=journal");
250
251 if (entry_size + n + 1 > ENTRY_SIZE_MAX) { /* data + separators + trailer */
252 log_debug("Entry is too big with %zu properties and %zu bytes, ignoring.", n, entry_size);
253 goto finish;
254 }
255
256 r = 0; /* Success, we read the message. */
257
258 if (!client_context_test_priority(context, priority))
259 goto finish;
260
261 if (message) {
262 if (s->forward_to_syslog)
263 server_forward_syslog(s, syslog_fixup_facility(priority), identifier, message, ucred, tv);
264
265 if (s->forward_to_kmsg)
266 server_forward_kmsg(s, priority, identifier, message, ucred);
267
268 if (s->forward_to_console)
269 server_forward_console(s, priority, identifier, message, ucred);
270
271 if (s->forward_to_wall)
272 server_forward_wall(s, priority, identifier, message, ucred);
273 }
274
275 server_dispatch_message(s, iovec, n, m, context, tv, priority, object_pid);
276
277 finish:
278 for (j = 0; j < n; j++) {
279 if (j == tn)
280 continue;
281
282 if (iovec[j].iov_base < buffer ||
283 (const char*) iovec[j].iov_base >= p + *remaining)
284 free(iovec[j].iov_base);
285 }
286
287 free(iovec);
288 free(identifier);
289 free(message);
290
291 return r;
292 }
293
294 void server_process_native_message(
295 Server *s,
296 const char *buffer, size_t buffer_size,
297 const struct ucred *ucred,
298 const struct timeval *tv,
299 const char *label, size_t label_len) {
300
301 size_t remaining = buffer_size;
302 ClientContext *context = NULL;
303 int r;
304
305 assert(s);
306 assert(buffer || buffer_size == 0);
307
308 if (ucred && pid_is_valid(ucred->pid)) {
309 r = client_context_get(s, ucred->pid, ucred, label, label_len, NULL, &context);
310 if (r < 0)
311 log_warning_errno(r, "Failed to retrieve credentials for PID " PID_FMT ", ignoring: %m", ucred->pid);
312 }
313
314 do {
315 r = server_process_entry(s,
316 (const uint8_t*) buffer + (buffer_size - remaining), &remaining,
317 context, ucred, tv, label, label_len);
318 } while (r == 0);
319 }
320
321 void server_process_native_file(
322 Server *s,
323 int fd,
324 const struct ucred *ucred,
325 const struct timeval *tv,
326 const char *label, size_t label_len) {
327
328 struct stat st;
329 bool sealed;
330 int r;
331
332 /* Data is in the passed fd, probably it didn't fit in a datagram. */
333
334 assert(s);
335 assert(fd >= 0);
336
337 /* If it's a memfd, check if it is sealed. If so, we can just
338 * mmap it and use it, and do not need to copy the data out. */
339 sealed = memfd_get_sealed(fd) > 0;
340
341 if (!sealed && (!ucred || ucred->uid != 0)) {
342 _cleanup_free_ char *k = NULL;
343 const char *e;
344
345 /* If this is not a sealed memfd, and the peer is unknown or
346 * unprivileged, then verify the path. */
347
348 r = fd_get_path(fd, &k);
349 if (r < 0) {
350 log_error_errno(r, "readlink(/proc/self/fd/%i) failed: %m", fd);
351 return;
352 }
353
354 e = PATH_STARTSWITH_SET(k, "/dev/shm/", "/tmp/", "/var/tmp/");
355 if (!e) {
356 log_error("Received file outside of allowed directories. Refusing.");
357 return;
358 }
359
360 if (!filename_is_valid(e)) {
361 log_error("Received file in subdirectory of allowed directories. Refusing.");
362 return;
363 }
364 }
365
366 if (fstat(fd, &st) < 0) {
367 log_error_errno(errno, "Failed to stat passed file, ignoring: %m");
368 return;
369 }
370
371 if (!S_ISREG(st.st_mode)) {
372 log_error("File passed is not regular. Ignoring.");
373 return;
374 }
375
376 if (st.st_size <= 0)
377 return;
378
379 /* When !sealed, set a lower memory limit. We have to read the file,
380 * effectively doubling memory use. */
381 if (st.st_size > ENTRY_SIZE_MAX / (sealed ? 1 : 2)) {
382 log_error("File passed too large (%"PRIu64" bytes). Ignoring.", (uint64_t) st.st_size);
383 return;
384 }
385
386 if (sealed) {
387 void *p;
388 size_t ps;
389
390 /* The file is sealed, we can just map it and use it. */
391
392 ps = PAGE_ALIGN(st.st_size);
393 p = mmap(NULL, ps, PROT_READ, MAP_PRIVATE, fd, 0);
394 if (p == MAP_FAILED) {
395 log_error_errno(errno, "Failed to map memfd, ignoring: %m");
396 return;
397 }
398
399 server_process_native_message(s, p, st.st_size, ucred, tv, label, label_len);
400 assert_se(munmap(p, ps) >= 0);
401 } else {
402 _cleanup_free_ void *p = NULL;
403 struct statvfs vfs;
404 ssize_t n;
405
406 if (fstatvfs(fd, &vfs) < 0) {
407 log_error_errno(errno, "Failed to stat file system of passed file, not processing it: %m");
408 return;
409 }
410
411 /* Refuse operating on file systems that have
412 * mandatory locking enabled, see:
413 *
414 * https://github.com/systemd/systemd/issues/1822
415 */
416 if (vfs.f_flag & ST_MANDLOCK) {
417 log_error("Received file descriptor from file system with mandatory locking enabled, not processing it.");
418 return;
419 }
420
421 /* Make the fd non-blocking. On regular files this has
422 * the effect of bypassing mandatory locking. Of
423 * course, this should normally not be necessary given
424 * the check above, but let's better be safe than
425 * sorry, after all NFS is pretty confusing regarding
426 * file system flags, and we better don't trust it,
427 * and so is SMB. */
428 r = fd_nonblock(fd, true);
429 if (r < 0) {
430 log_error_errno(r, "Failed to make fd non-blocking, not processing it: %m");
431 return;
432 }
433
434 /* The file is not sealed, we can't map the file here, since
435 * clients might then truncate it and trigger a SIGBUS for
436 * us. So let's stupidly read it. */
437
438 p = malloc(st.st_size);
439 if (!p) {
440 log_oom();
441 return;
442 }
443
444 n = pread(fd, p, st.st_size, 0);
445 if (n < 0)
446 log_error_errno(errno, "Failed to read file, ignoring: %m");
447 else if (n > 0)
448 server_process_native_message(s, p, n, ucred, tv, label, label_len);
449 }
450 }
451
452 int server_open_native_socket(Server *s) {
453
454 static const union sockaddr_union sa = {
455 .un.sun_family = AF_UNIX,
456 .un.sun_path = "/run/systemd/journal/socket",
457 };
458 int r;
459
460 assert(s);
461
462 if (s->native_fd < 0) {
463 s->native_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
464 if (s->native_fd < 0)
465 return log_error_errno(errno, "socket() failed: %m");
466
467 (void) sockaddr_un_unlink(&sa.un);
468
469 r = bind(s->native_fd, &sa.sa, SOCKADDR_UN_LEN(sa.un));
470 if (r < 0)
471 return log_error_errno(errno, "bind(%s) failed: %m", sa.un.sun_path);
472
473 (void) chmod(sa.un.sun_path, 0666);
474 } else
475 (void) fd_nonblock(s->native_fd, true);
476
477 r = setsockopt_int(s->native_fd, SOL_SOCKET, SO_PASSCRED, true);
478 if (r < 0)
479 return log_error_errno(r, "SO_PASSCRED failed: %m");
480
481 #if HAVE_SELINUX
482 if (mac_selinux_use()) {
483 r = setsockopt_int(s->native_fd, SOL_SOCKET, SO_PASSSEC, true);
484 if (r < 0)
485 log_warning_errno(r, "SO_PASSSEC failed: %m");
486 }
487 #endif
488
489 r = setsockopt_int(s->native_fd, SOL_SOCKET, SO_TIMESTAMP, true);
490 if (r < 0)
491 return log_error_errno(r, "SO_TIMESTAMP failed: %m");
492
493 r = sd_event_add_io(s->event, &s->native_event_source, s->native_fd, EPOLLIN, server_process_datagram, s);
494 if (r < 0)
495 return log_error_errno(r, "Failed to add native server fd to event loop: %m");
496
497 r = sd_event_source_set_priority(s->native_event_source, SD_EVENT_PRIORITY_NORMAL+5);
498 if (r < 0)
499 return log_error_errno(r, "Failed to adjust native event source priority: %m");
500
501 return 0;
502 }