]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/journal/journald-native.c
Merge pull request #5930 from larskarlitski/journal-skip
[thirdparty/systemd.git] / src / journal / journald-native.c
1 /***
2 This file is part of systemd.
3
4 Copyright 2011 Lennart Poettering
5
6 systemd is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
10
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public License
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
18 ***/
19
20 #include <stddef.h>
21 #include <sys/epoll.h>
22 #include <sys/mman.h>
23 #include <sys/statvfs.h>
24 #include <unistd.h>
25
26 #include "alloc-util.h"
27 #include "fd-util.h"
28 #include "fs-util.h"
29 #include "io-util.h"
30 #include "journal-importer.h"
31 #include "journald-console.h"
32 #include "journald-kmsg.h"
33 #include "journald-native.h"
34 #include "journald-server.h"
35 #include "journald-syslog.h"
36 #include "journald-wall.h"
37 #include "memfd-util.h"
38 #include "parse-util.h"
39 #include "path-util.h"
40 #include "selinux-util.h"
41 #include "socket-util.h"
42 #include "string-util.h"
43 #include "unaligned.h"
44
45 bool valid_user_field(const char *p, size_t l, bool allow_protected) {
46 const char *a;
47
48 /* We kinda enforce POSIX syntax recommendations for
49 environment variables here, but make a couple of additional
50 requirements.
51
52 http://pubs.opengroup.org/onlinepubs/000095399/basedefs/xbd_chap08.html */
53
54 /* No empty field names */
55 if (l <= 0)
56 return false;
57
58 /* Don't allow names longer than 64 chars */
59 if (l > 64)
60 return false;
61
62 /* Variables starting with an underscore are protected */
63 if (!allow_protected && p[0] == '_')
64 return false;
65
66 /* Don't allow digits as first character */
67 if (p[0] >= '0' && p[0] <= '9')
68 return false;
69
70 /* Only allow A-Z0-9 and '_' */
71 for (a = p; a < p + l; a++)
72 if ((*a < 'A' || *a > 'Z') &&
73 (*a < '0' || *a > '9') &&
74 *a != '_')
75 return false;
76
77 return true;
78 }
79
80 static bool allow_object_pid(const struct ucred *ucred) {
81 return ucred && ucred->uid == 0;
82 }
83
84 static void server_process_entry_meta(
85 const char *p, size_t l,
86 const struct ucred *ucred,
87 int *priority,
88 char **identifier,
89 char **message,
90 pid_t *object_pid) {
91
92 /* We need to determine the priority of this entry for the rate limiting logic */
93
94 if (l == 10 &&
95 startswith(p, "PRIORITY=") &&
96 p[9] >= '0' && p[9] <= '9')
97 *priority = (*priority & LOG_FACMASK) | (p[9] - '0');
98
99 else if (l == 17 &&
100 startswith(p, "SYSLOG_FACILITY=") &&
101 p[16] >= '0' && p[16] <= '9')
102 *priority = (*priority & LOG_PRIMASK) | ((p[16] - '0') << 3);
103
104 else if (l == 18 &&
105 startswith(p, "SYSLOG_FACILITY=") &&
106 p[16] >= '0' && p[16] <= '9' &&
107 p[17] >= '0' && p[17] <= '9')
108 *priority = (*priority & LOG_PRIMASK) | (((p[16] - '0')*10 + (p[17] - '0')) << 3);
109
110 else if (l >= 19 &&
111 startswith(p, "SYSLOG_IDENTIFIER=")) {
112 char *t;
113
114 t = strndup(p + 18, l - 18);
115 if (t) {
116 free(*identifier);
117 *identifier = t;
118 }
119
120 } else if (l >= 8 &&
121 startswith(p, "MESSAGE=")) {
122 char *t;
123
124 t = strndup(p + 8, l - 8);
125 if (t) {
126 free(*message);
127 *message = t;
128 }
129
130 } else if (l > strlen("OBJECT_PID=") &&
131 l < strlen("OBJECT_PID=") + DECIMAL_STR_MAX(pid_t) &&
132 startswith(p, "OBJECT_PID=") &&
133 allow_object_pid(ucred)) {
134 char buf[DECIMAL_STR_MAX(pid_t)];
135 memcpy(buf, p + strlen("OBJECT_PID="), l - strlen("OBJECT_PID="));
136 buf[l-strlen("OBJECT_PID=")] = '\0';
137
138 (void) parse_pid(buf, object_pid);
139 }
140 }
141
142 static int server_process_entry(
143 Server *s,
144 const void *buffer, size_t *remaining,
145 const struct ucred *ucred,
146 const struct timeval *tv,
147 const char *label, size_t label_len) {
148
149 /* Process a single entry from a native message.
150 * Returns 0 if nothing special happened and the message processing should continue,
151 * and a negative or positive value otherwise.
152 *
153 * Note that *remaining is altered on both success and failure. */
154
155 struct iovec *iovec = NULL;
156 unsigned n = 0, j, tn = (unsigned) -1;
157 const char *p;
158 size_t m = 0, entry_size = 0;
159 int priority = LOG_INFO;
160 char *identifier = NULL, *message = NULL;
161 pid_t object_pid = 0;
162 int r = 0;
163
164 p = buffer;
165
166 while (*remaining > 0) {
167 const char *e, *q;
168
169 e = memchr(p, '\n', *remaining);
170
171 if (!e) {
172 /* Trailing noise, let's ignore it, and flush what we collected */
173 log_debug("Received message with trailing noise, ignoring.");
174 r = 1; /* finish processing of the message */
175 break;
176 }
177
178 if (e == p) {
179 /* Entry separator */
180 *remaining -= 1;
181 break;
182 }
183
184 if (*p == '.' || *p == '#') {
185 /* Ignore control commands for now, and
186 * comments too. */
187 *remaining -= (e - p) + 1;
188 p = e + 1;
189 continue;
190 }
191
192 /* A property follows */
193
194 /* n existing properties, 1 new, +1 for _TRANSPORT */
195 if (!GREEDY_REALLOC(iovec, m, n + 2 + N_IOVEC_META_FIELDS + N_IOVEC_OBJECT_FIELDS)) {
196 r = log_oom();
197 break;
198 }
199
200 q = memchr(p, '=', e - p);
201 if (q) {
202 if (valid_user_field(p, q - p, false)) {
203 size_t l;
204
205 l = e - p;
206
207 /* If the field name starts with an
208 * underscore, skip the variable,
209 * since that indicates a trusted
210 * field */
211 iovec[n].iov_base = (char*) p;
212 iovec[n].iov_len = l;
213 entry_size += l;
214 n++;
215
216 server_process_entry_meta(p, l, ucred,
217 &priority,
218 &identifier,
219 &message,
220 &object_pid);
221 }
222
223 *remaining -= (e - p) + 1;
224 p = e + 1;
225 continue;
226 } else {
227 uint64_t l;
228 char *k;
229
230 if (*remaining < e - p + 1 + sizeof(uint64_t) + 1) {
231 log_debug("Failed to parse message, ignoring.");
232 break;
233 }
234
235 l = unaligned_read_le64(e + 1);
236
237 if (l > DATA_SIZE_MAX) {
238 log_debug("Received binary data block of %"PRIu64" bytes is too large, ignoring.", l);
239 break;
240 }
241
242 if ((uint64_t) *remaining < e - p + 1 + sizeof(uint64_t) + l + 1 ||
243 e[1+sizeof(uint64_t)+l] != '\n') {
244 log_debug("Failed to parse message, ignoring.");
245 break;
246 }
247
248 k = malloc((e - p) + 1 + l);
249 if (!k) {
250 log_oom();
251 break;
252 }
253
254 memcpy(k, p, e - p);
255 k[e - p] = '=';
256 memcpy(k + (e - p) + 1, e + 1 + sizeof(uint64_t), l);
257
258 if (valid_user_field(p, e - p, false)) {
259 iovec[n].iov_base = k;
260 iovec[n].iov_len = (e - p) + 1 + l;
261 entry_size += iovec[n].iov_len;
262 n++;
263
264 server_process_entry_meta(k, (e - p) + 1 + l, ucred,
265 &priority,
266 &identifier,
267 &message,
268 &object_pid);
269 } else
270 free(k);
271
272 *remaining -= (e - p) + 1 + sizeof(uint64_t) + l + 1;
273 p = e + 1 + sizeof(uint64_t) + l + 1;
274 }
275 }
276
277 if (n <= 0) {
278 r = 1;
279 goto finish;
280 }
281
282 tn = n++;
283 IOVEC_SET_STRING(iovec[tn], "_TRANSPORT=journal");
284 entry_size += strlen("_TRANSPORT=journal");
285
286 if (entry_size + n + 1 > ENTRY_SIZE_MAX) { /* data + separators + trailer */
287 log_debug("Entry is too big with %u properties and %zu bytes, ignoring.",
288 n, entry_size);
289 goto finish;
290 }
291
292 if (message) {
293 if (s->forward_to_syslog)
294 server_forward_syslog(s, syslog_fixup_facility(priority), identifier, message, ucred, tv);
295
296 if (s->forward_to_kmsg)
297 server_forward_kmsg(s, priority, identifier, message, ucred);
298
299 if (s->forward_to_console)
300 server_forward_console(s, priority, identifier, message, ucred);
301
302 if (s->forward_to_wall)
303 server_forward_wall(s, priority, identifier, message, ucred);
304 }
305
306 server_dispatch_message(s, iovec, n, m, ucred, tv, label, label_len, NULL, priority, object_pid);
307
308 finish:
309 for (j = 0; j < n; j++) {
310 if (j == tn)
311 continue;
312
313 if (iovec[j].iov_base < buffer ||
314 (const char*) iovec[j].iov_base >= p + *remaining)
315 free(iovec[j].iov_base);
316 }
317
318 free(iovec);
319 free(identifier);
320 free(message);
321
322 return r;
323 }
324
325 void server_process_native_message(
326 Server *s,
327 const void *buffer, size_t buffer_size,
328 const struct ucred *ucred,
329 const struct timeval *tv,
330 const char *label, size_t label_len) {
331
332 int r;
333 size_t remaining = buffer_size;
334
335 assert(s);
336 assert(buffer || buffer_size == 0);
337
338 do {
339 r = server_process_entry(s,
340 (const uint8_t*) buffer + (buffer_size - remaining), &remaining,
341 ucred, tv, label, label_len);
342 } while (r == 0);
343 }
344
345 void server_process_native_file(
346 Server *s,
347 int fd,
348 const struct ucred *ucred,
349 const struct timeval *tv,
350 const char *label, size_t label_len) {
351
352 struct stat st;
353 bool sealed;
354 int r;
355
356 /* Data is in the passed fd, since it didn't fit in a
357 * datagram. */
358
359 assert(s);
360 assert(fd >= 0);
361
362 /* If it's a memfd, check if it is sealed. If so, we can just
363 * use map it and use it, and do not need to copy the data
364 * out. */
365 sealed = memfd_get_sealed(fd) > 0;
366
367 if (!sealed && (!ucred || ucred->uid != 0)) {
368 _cleanup_free_ char *sl = NULL, *k = NULL;
369 const char *e;
370
371 /* If this is not a sealed memfd, and the peer is unknown or
372 * unprivileged, then verify the path. */
373
374 if (asprintf(&sl, "/proc/self/fd/%i", fd) < 0) {
375 log_oom();
376 return;
377 }
378
379 r = readlink_malloc(sl, &k);
380 if (r < 0) {
381 log_error_errno(r, "readlink(%s) failed: %m", sl);
382 return;
383 }
384
385 e = path_startswith(k, "/dev/shm/");
386 if (!e)
387 e = path_startswith(k, "/tmp/");
388 if (!e)
389 e = path_startswith(k, "/var/tmp/");
390 if (!e) {
391 log_error("Received file outside of allowed directories. Refusing.");
392 return;
393 }
394
395 if (!filename_is_valid(e)) {
396 log_error("Received file in subdirectory of allowed directories. Refusing.");
397 return;
398 }
399 }
400
401 if (fstat(fd, &st) < 0) {
402 log_error_errno(errno, "Failed to stat passed file, ignoring: %m");
403 return;
404 }
405
406 if (!S_ISREG(st.st_mode)) {
407 log_error("File passed is not regular. Ignoring.");
408 return;
409 }
410
411 if (st.st_size <= 0)
412 return;
413
414 if (st.st_size > ENTRY_SIZE_MAX) {
415 log_error("File passed too large. Ignoring.");
416 return;
417 }
418
419 if (sealed) {
420 void *p;
421 size_t ps;
422
423 /* The file is sealed, we can just map it and use it. */
424
425 ps = PAGE_ALIGN(st.st_size);
426 p = mmap(NULL, ps, PROT_READ, MAP_PRIVATE, fd, 0);
427 if (p == MAP_FAILED) {
428 log_error_errno(errno, "Failed to map memfd, ignoring: %m");
429 return;
430 }
431
432 server_process_native_message(s, p, st.st_size, ucred, tv, label, label_len);
433 assert_se(munmap(p, ps) >= 0);
434 } else {
435 _cleanup_free_ void *p = NULL;
436 struct statvfs vfs;
437 ssize_t n;
438
439 if (fstatvfs(fd, &vfs) < 0) {
440 log_error_errno(errno, "Failed to stat file system of passed file, ignoring: %m");
441 return;
442 }
443
444 /* Refuse operating on file systems that have
445 * mandatory locking enabled, see:
446 *
447 * https://github.com/systemd/systemd/issues/1822
448 */
449 if (vfs.f_flag & ST_MANDLOCK) {
450 log_error("Received file descriptor from file system with mandatory locking enable, refusing.");
451 return;
452 }
453
454 /* Make the fd non-blocking. On regular files this has
455 * the effect of bypassing mandatory locking. Of
456 * course, this should normally not be necessary given
457 * the check above, but let's better be safe than
458 * sorry, after all NFS is pretty confusing regarding
459 * file system flags, and we better don't trust it,
460 * and so is SMB. */
461 r = fd_nonblock(fd, true);
462 if (r < 0) {
463 log_error_errno(r, "Failed to make fd non-blocking, ignoring: %m");
464 return;
465 }
466
467 /* The file is not sealed, we can't map the file here, since
468 * clients might then truncate it and trigger a SIGBUS for
469 * us. So let's stupidly read it */
470
471 p = malloc(st.st_size);
472 if (!p) {
473 log_oom();
474 return;
475 }
476
477 n = pread(fd, p, st.st_size, 0);
478 if (n < 0)
479 log_error_errno(errno, "Failed to read file, ignoring: %m");
480 else if (n > 0)
481 server_process_native_message(s, p, n, ucred, tv, label, label_len);
482 }
483 }
484
485 int server_open_native_socket(Server*s) {
486
487 static const union sockaddr_union sa = {
488 .un.sun_family = AF_UNIX,
489 .un.sun_path = "/run/systemd/journal/socket",
490 };
491 static const int one = 1;
492 int r;
493
494 assert(s);
495
496 if (s->native_fd < 0) {
497 s->native_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
498 if (s->native_fd < 0)
499 return log_error_errno(errno, "socket() failed: %m");
500
501 (void) unlink(sa.un.sun_path);
502
503 r = bind(s->native_fd, &sa.sa, SOCKADDR_UN_LEN(sa.un));
504 if (r < 0)
505 return log_error_errno(errno, "bind(%s) failed: %m", sa.un.sun_path);
506
507 (void) chmod(sa.un.sun_path, 0666);
508 } else
509 fd_nonblock(s->native_fd, 1);
510
511 r = setsockopt(s->native_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
512 if (r < 0)
513 return log_error_errno(errno, "SO_PASSCRED failed: %m");
514
515 #ifdef HAVE_SELINUX
516 if (mac_selinux_use()) {
517 r = setsockopt(s->native_fd, SOL_SOCKET, SO_PASSSEC, &one, sizeof(one));
518 if (r < 0)
519 log_warning_errno(errno, "SO_PASSSEC failed: %m");
520 }
521 #endif
522
523 r = setsockopt(s->native_fd, SOL_SOCKET, SO_TIMESTAMP, &one, sizeof(one));
524 if (r < 0)
525 return log_error_errno(errno, "SO_TIMESTAMP failed: %m");
526
527 r = sd_event_add_io(s->event, &s->native_event_source, s->native_fd, EPOLLIN, server_process_datagram, s);
528 if (r < 0)
529 return log_error_errno(r, "Failed to add native server fd to event loop: %m");
530
531 r = sd_event_source_set_priority(s->native_event_source, SD_EVENT_PRIORITY_NORMAL+5);
532 if (r < 0)
533 return log_error_errno(r, "Failed to adjust native event source priority: %m");
534
535 return 0;
536 }