]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/journal/journald-native.c
core/namespace: rework the return semantics of clone_device_node yet again
[thirdparty/systemd.git] / src / journal / journald-native.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2 /***
3 This file is part of systemd.
4
5 Copyright 2011 Lennart Poettering
6
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
11
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
19 ***/
20
21 #include <stddef.h>
22 #include <sys/epoll.h>
23 #include <sys/mman.h>
24 #include <sys/statvfs.h>
25 #include <unistd.h>
26
27 #include "alloc-util.h"
28 #include "fd-util.h"
29 #include "fs-util.h"
30 #include "io-util.h"
31 #include "journal-importer.h"
32 #include "journal-util.h"
33 #include "journald-console.h"
34 #include "journald-kmsg.h"
35 #include "journald-native.h"
36 #include "journald-server.h"
37 #include "journald-syslog.h"
38 #include "journald-wall.h"
39 #include "memfd-util.h"
40 #include "parse-util.h"
41 #include "path-util.h"
42 #include "process-util.h"
43 #include "selinux-util.h"
44 #include "socket-util.h"
45 #include "string-util.h"
46 #include "unaligned.h"
47
48 static bool allow_object_pid(const struct ucred *ucred) {
49 return ucred && ucred->uid == 0;
50 }
51
52 static void server_process_entry_meta(
53 const char *p, size_t l,
54 const struct ucred *ucred,
55 int *priority,
56 char **identifier,
57 char **message,
58 pid_t *object_pid) {
59
60 /* We need to determine the priority of this entry for the rate limiting logic */
61
62 if (l == 10 &&
63 startswith(p, "PRIORITY=") &&
64 p[9] >= '0' && p[9] <= '9')
65 *priority = (*priority & LOG_FACMASK) | (p[9] - '0');
66
67 else if (l == 17 &&
68 startswith(p, "SYSLOG_FACILITY=") &&
69 p[16] >= '0' && p[16] <= '9')
70 *priority = (*priority & LOG_PRIMASK) | ((p[16] - '0') << 3);
71
72 else if (l == 18 &&
73 startswith(p, "SYSLOG_FACILITY=") &&
74 p[16] >= '0' && p[16] <= '9' &&
75 p[17] >= '0' && p[17] <= '9')
76 *priority = (*priority & LOG_PRIMASK) | (((p[16] - '0')*10 + (p[17] - '0')) << 3);
77
78 else if (l >= 19 &&
79 startswith(p, "SYSLOG_IDENTIFIER=")) {
80 char *t;
81
82 t = strndup(p + 18, l - 18);
83 if (t) {
84 free(*identifier);
85 *identifier = t;
86 }
87
88 } else if (l >= 8 &&
89 startswith(p, "MESSAGE=")) {
90 char *t;
91
92 t = strndup(p + 8, l - 8);
93 if (t) {
94 free(*message);
95 *message = t;
96 }
97
98 } else if (l > STRLEN("OBJECT_PID=") &&
99 l < STRLEN("OBJECT_PID=") + DECIMAL_STR_MAX(pid_t) &&
100 startswith(p, "OBJECT_PID=") &&
101 allow_object_pid(ucred)) {
102 char buf[DECIMAL_STR_MAX(pid_t)];
103 memcpy(buf, p + STRLEN("OBJECT_PID="),
104 l - STRLEN("OBJECT_PID="));
105 buf[l-STRLEN("OBJECT_PID=")] = '\0';
106
107 (void) parse_pid(buf, object_pid);
108 }
109 }
110
111 static int server_process_entry(
112 Server *s,
113 const void *buffer, size_t *remaining,
114 ClientContext *context,
115 const struct ucred *ucred,
116 const struct timeval *tv,
117 const char *label, size_t label_len) {
118
119 /* Process a single entry from a native message. Returns 0 if nothing special happened and the message
120 * processing should continue, and a negative or positive value otherwise.
121 *
122 * Note that *remaining is altered on both success and failure. */
123
124 size_t n = 0, j, tn = (size_t) -1, m = 0, entry_size = 0;
125 char *identifier = NULL, *message = NULL;
126 struct iovec *iovec = NULL;
127 int priority = LOG_INFO;
128 pid_t object_pid = 0;
129 const char *p;
130 int r = 0;
131
132 p = buffer;
133
134 while (*remaining > 0) {
135 const char *e, *q;
136
137 e = memchr(p, '\n', *remaining);
138
139 if (!e) {
140 /* Trailing noise, let's ignore it, and flush what we collected */
141 log_debug("Received message with trailing noise, ignoring.");
142 r = 1; /* finish processing of the message */
143 break;
144 }
145
146 if (e == p) {
147 /* Entry separator */
148 *remaining -= 1;
149 break;
150 }
151
152 if (IN_SET(*p, '.', '#')) {
153 /* Ignore control commands for now, and
154 * comments too. */
155 *remaining -= (e - p) + 1;
156 p = e + 1;
157 continue;
158 }
159
160 /* A property follows */
161
162 /* n existing properties, 1 new, +1 for _TRANSPORT */
163 if (!GREEDY_REALLOC(iovec, m,
164 n + 2 +
165 N_IOVEC_META_FIELDS + N_IOVEC_OBJECT_FIELDS +
166 client_context_extra_fields_n_iovec(context))) {
167 r = log_oom();
168 break;
169 }
170
171 q = memchr(p, '=', e - p);
172 if (q) {
173 if (journal_field_valid(p, q - p, false)) {
174 size_t l;
175
176 l = e - p;
177
178 /* If the field name starts with an underscore, skip the variable, since that indicates
179 * a trusted field */
180 iovec[n++] = IOVEC_MAKE((char*) p, l);
181 entry_size += l;
182
183 server_process_entry_meta(p, l, ucred,
184 &priority,
185 &identifier,
186 &message,
187 &object_pid);
188 }
189
190 *remaining -= (e - p) + 1;
191 p = e + 1;
192 continue;
193 } else {
194 uint64_t l;
195 char *k;
196
197 if (*remaining < e - p + 1 + sizeof(uint64_t) + 1) {
198 log_debug("Failed to parse message, ignoring.");
199 break;
200 }
201
202 l = unaligned_read_le64(e + 1);
203
204 if (l > DATA_SIZE_MAX) {
205 log_debug("Received binary data block of %"PRIu64" bytes is too large, ignoring.", l);
206 break;
207 }
208
209 if ((uint64_t) *remaining < e - p + 1 + sizeof(uint64_t) + l + 1 ||
210 e[1+sizeof(uint64_t)+l] != '\n') {
211 log_debug("Failed to parse message, ignoring.");
212 break;
213 }
214
215 k = malloc((e - p) + 1 + l);
216 if (!k) {
217 log_oom();
218 break;
219 }
220
221 memcpy(k, p, e - p);
222 k[e - p] = '=';
223 memcpy(k + (e - p) + 1, e + 1 + sizeof(uint64_t), l);
224
225 if (journal_field_valid(p, e - p, false)) {
226 iovec[n].iov_base = k;
227 iovec[n].iov_len = (e - p) + 1 + l;
228 entry_size += iovec[n].iov_len;
229 n++;
230
231 server_process_entry_meta(k, (e - p) + 1 + l, ucred,
232 &priority,
233 &identifier,
234 &message,
235 &object_pid);
236 } else
237 free(k);
238
239 *remaining -= (e - p) + 1 + sizeof(uint64_t) + l + 1;
240 p = e + 1 + sizeof(uint64_t) + l + 1;
241 }
242 }
243
244 if (n <= 0) {
245 r = 1;
246 goto finish;
247 }
248
249 if (!client_context_test_priority(context, priority)) {
250 r = 0;
251 goto finish;
252 }
253
254 tn = n++;
255 iovec[tn] = IOVEC_MAKE_STRING("_TRANSPORT=journal");
256 entry_size += STRLEN("_TRANSPORT=journal");
257
258 if (entry_size + n + 1 > ENTRY_SIZE_MAX) { /* data + separators + trailer */
259 log_debug("Entry is too big with %zu properties and %zu bytes, ignoring.", n, entry_size);
260 goto finish;
261 }
262
263 if (message) {
264 if (s->forward_to_syslog)
265 server_forward_syslog(s, syslog_fixup_facility(priority), identifier, message, ucred, tv);
266
267 if (s->forward_to_kmsg)
268 server_forward_kmsg(s, priority, identifier, message, ucred);
269
270 if (s->forward_to_console)
271 server_forward_console(s, priority, identifier, message, ucred);
272
273 if (s->forward_to_wall)
274 server_forward_wall(s, priority, identifier, message, ucred);
275 }
276
277 server_dispatch_message(s, iovec, n, m, context, tv, priority, object_pid);
278
279 finish:
280 for (j = 0; j < n; j++) {
281 if (j == tn)
282 continue;
283
284 if (iovec[j].iov_base < buffer ||
285 (const char*) iovec[j].iov_base >= p + *remaining)
286 free(iovec[j].iov_base);
287 }
288
289 free(iovec);
290 free(identifier);
291 free(message);
292
293 return r;
294 }
295
296 void server_process_native_message(
297 Server *s,
298 const void *buffer, size_t buffer_size,
299 const struct ucred *ucred,
300 const struct timeval *tv,
301 const char *label, size_t label_len) {
302
303 size_t remaining = buffer_size;
304 ClientContext *context = NULL;
305 int r;
306
307 assert(s);
308 assert(buffer || buffer_size == 0);
309
310 if (ucred && pid_is_valid(ucred->pid)) {
311 r = client_context_get(s, ucred->pid, ucred, label, label_len, NULL, &context);
312 if (r < 0)
313 log_warning_errno(r, "Failed to retrieve credentials for PID " PID_FMT ", ignoring: %m", ucred->pid);
314 }
315
316 do {
317 r = server_process_entry(s,
318 (const uint8_t*) buffer + (buffer_size - remaining), &remaining,
319 context, ucred, tv, label, label_len);
320 } while (r == 0);
321 }
322
323 void server_process_native_file(
324 Server *s,
325 int fd,
326 const struct ucred *ucred,
327 const struct timeval *tv,
328 const char *label, size_t label_len) {
329
330 struct stat st;
331 bool sealed;
332 int r;
333
334 /* Data is in the passed fd, since it didn't fit in a
335 * datagram. */
336
337 assert(s);
338 assert(fd >= 0);
339
340 /* If it's a memfd, check if it is sealed. If so, we can just
341 * use map it and use it, and do not need to copy the data
342 * out. */
343 sealed = memfd_get_sealed(fd) > 0;
344
345 if (!sealed && (!ucred || ucred->uid != 0)) {
346 _cleanup_free_ char *sl = NULL, *k = NULL;
347 const char *e;
348
349 /* If this is not a sealed memfd, and the peer is unknown or
350 * unprivileged, then verify the path. */
351
352 if (asprintf(&sl, "/proc/self/fd/%i", fd) < 0) {
353 log_oom();
354 return;
355 }
356
357 r = readlink_malloc(sl, &k);
358 if (r < 0) {
359 log_error_errno(r, "readlink(%s) failed: %m", sl);
360 return;
361 }
362
363 e = path_startswith(k, "/dev/shm/");
364 if (!e)
365 e = path_startswith(k, "/tmp/");
366 if (!e)
367 e = path_startswith(k, "/var/tmp/");
368 if (!e) {
369 log_error("Received file outside of allowed directories. Refusing.");
370 return;
371 }
372
373 if (!filename_is_valid(e)) {
374 log_error("Received file in subdirectory of allowed directories. Refusing.");
375 return;
376 }
377 }
378
379 if (fstat(fd, &st) < 0) {
380 log_error_errno(errno, "Failed to stat passed file, ignoring: %m");
381 return;
382 }
383
384 if (!S_ISREG(st.st_mode)) {
385 log_error("File passed is not regular. Ignoring.");
386 return;
387 }
388
389 if (st.st_size <= 0)
390 return;
391
392 if (st.st_size > ENTRY_SIZE_MAX) {
393 log_error("File passed too large. Ignoring.");
394 return;
395 }
396
397 if (sealed) {
398 void *p;
399 size_t ps;
400
401 /* The file is sealed, we can just map it and use it. */
402
403 ps = PAGE_ALIGN(st.st_size);
404 p = mmap(NULL, ps, PROT_READ, MAP_PRIVATE, fd, 0);
405 if (p == MAP_FAILED) {
406 log_error_errno(errno, "Failed to map memfd, ignoring: %m");
407 return;
408 }
409
410 server_process_native_message(s, p, st.st_size, ucred, tv, label, label_len);
411 assert_se(munmap(p, ps) >= 0);
412 } else {
413 _cleanup_free_ void *p = NULL;
414 struct statvfs vfs;
415 ssize_t n;
416
417 if (fstatvfs(fd, &vfs) < 0) {
418 log_error_errno(errno, "Failed to stat file system of passed file, ignoring: %m");
419 return;
420 }
421
422 /* Refuse operating on file systems that have
423 * mandatory locking enabled, see:
424 *
425 * https://github.com/systemd/systemd/issues/1822
426 */
427 if (vfs.f_flag & ST_MANDLOCK) {
428 log_error("Received file descriptor from file system with mandatory locking enabled, refusing.");
429 return;
430 }
431
432 /* Make the fd non-blocking. On regular files this has
433 * the effect of bypassing mandatory locking. Of
434 * course, this should normally not be necessary given
435 * the check above, but let's better be safe than
436 * sorry, after all NFS is pretty confusing regarding
437 * file system flags, and we better don't trust it,
438 * and so is SMB. */
439 r = fd_nonblock(fd, true);
440 if (r < 0) {
441 log_error_errno(r, "Failed to make fd non-blocking, ignoring: %m");
442 return;
443 }
444
445 /* The file is not sealed, we can't map the file here, since
446 * clients might then truncate it and trigger a SIGBUS for
447 * us. So let's stupidly read it */
448
449 p = malloc(st.st_size);
450 if (!p) {
451 log_oom();
452 return;
453 }
454
455 n = pread(fd, p, st.st_size, 0);
456 if (n < 0)
457 log_error_errno(errno, "Failed to read file, ignoring: %m");
458 else if (n > 0)
459 server_process_native_message(s, p, n, ucred, tv, label, label_len);
460 }
461 }
462
463 int server_open_native_socket(Server*s) {
464
465 static const union sockaddr_union sa = {
466 .un.sun_family = AF_UNIX,
467 .un.sun_path = "/run/systemd/journal/socket",
468 };
469 static const int one = 1;
470 int r;
471
472 assert(s);
473
474 if (s->native_fd < 0) {
475 s->native_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
476 if (s->native_fd < 0)
477 return log_error_errno(errno, "socket() failed: %m");
478
479 (void) unlink(sa.un.sun_path);
480
481 r = bind(s->native_fd, &sa.sa, SOCKADDR_UN_LEN(sa.un));
482 if (r < 0)
483 return log_error_errno(errno, "bind(%s) failed: %m", sa.un.sun_path);
484
485 (void) chmod(sa.un.sun_path, 0666);
486 } else
487 fd_nonblock(s->native_fd, 1);
488
489 r = setsockopt(s->native_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
490 if (r < 0)
491 return log_error_errno(errno, "SO_PASSCRED failed: %m");
492
493 #if HAVE_SELINUX
494 if (mac_selinux_use()) {
495 r = setsockopt(s->native_fd, SOL_SOCKET, SO_PASSSEC, &one, sizeof(one));
496 if (r < 0)
497 log_warning_errno(errno, "SO_PASSSEC failed: %m");
498 }
499 #endif
500
501 r = setsockopt(s->native_fd, SOL_SOCKET, SO_TIMESTAMP, &one, sizeof(one));
502 if (r < 0)
503 return log_error_errno(errno, "SO_TIMESTAMP failed: %m");
504
505 r = sd_event_add_io(s->event, &s->native_event_source, s->native_fd, EPOLLIN, server_process_datagram, s);
506 if (r < 0)
507 return log_error_errno(r, "Failed to add native server fd to event loop: %m");
508
509 r = sd_event_source_set_priority(s->native_event_source, SD_EVENT_PRIORITY_NORMAL+5);
510 if (r < 0)
511 return log_error_errno(r, "Failed to adjust native event source priority: %m");
512
513 return 0;
514 }