]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/journal/journald-native.c
core: implement /run/systemd/units/-based path for passing unit info from PID 1 to...
[thirdparty/systemd.git] / src / journal / journald-native.c
1 /***
2 This file is part of systemd.
3
4 Copyright 2011 Lennart Poettering
5
6 systemd is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
10
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public License
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
18 ***/
19
20 #include <stddef.h>
21 #include <sys/epoll.h>
22 #include <sys/mman.h>
23 #include <sys/statvfs.h>
24 #include <unistd.h>
25
26 #include "alloc-util.h"
27 #include "fd-util.h"
28 #include "fs-util.h"
29 #include "io-util.h"
30 #include "journal-importer.h"
31 #include "journal-util.h"
32 #include "journald-console.h"
33 #include "journald-kmsg.h"
34 #include "journald-native.h"
35 #include "journald-server.h"
36 #include "journald-syslog.h"
37 #include "journald-wall.h"
38 #include "memfd-util.h"
39 #include "parse-util.h"
40 #include "path-util.h"
41 #include "process-util.h"
42 #include "selinux-util.h"
43 #include "socket-util.h"
44 #include "string-util.h"
45 #include "unaligned.h"
46
47 static bool allow_object_pid(const struct ucred *ucred) {
48 return ucred && ucred->uid == 0;
49 }
50
51 static void server_process_entry_meta(
52 const char *p, size_t l,
53 const struct ucred *ucred,
54 int *priority,
55 char **identifier,
56 char **message,
57 pid_t *object_pid) {
58
59 /* We need to determine the priority of this entry for the rate limiting logic */
60
61 if (l == 10 &&
62 startswith(p, "PRIORITY=") &&
63 p[9] >= '0' && p[9] <= '9')
64 *priority = (*priority & LOG_FACMASK) | (p[9] - '0');
65
66 else if (l == 17 &&
67 startswith(p, "SYSLOG_FACILITY=") &&
68 p[16] >= '0' && p[16] <= '9')
69 *priority = (*priority & LOG_PRIMASK) | ((p[16] - '0') << 3);
70
71 else if (l == 18 &&
72 startswith(p, "SYSLOG_FACILITY=") &&
73 p[16] >= '0' && p[16] <= '9' &&
74 p[17] >= '0' && p[17] <= '9')
75 *priority = (*priority & LOG_PRIMASK) | (((p[16] - '0')*10 + (p[17] - '0')) << 3);
76
77 else if (l >= 19 &&
78 startswith(p, "SYSLOG_IDENTIFIER=")) {
79 char *t;
80
81 t = strndup(p + 18, l - 18);
82 if (t) {
83 free(*identifier);
84 *identifier = t;
85 }
86
87 } else if (l >= 8 &&
88 startswith(p, "MESSAGE=")) {
89 char *t;
90
91 t = strndup(p + 8, l - 8);
92 if (t) {
93 free(*message);
94 *message = t;
95 }
96
97 } else if (l > strlen("OBJECT_PID=") &&
98 l < strlen("OBJECT_PID=") + DECIMAL_STR_MAX(pid_t) &&
99 startswith(p, "OBJECT_PID=") &&
100 allow_object_pid(ucred)) {
101 char buf[DECIMAL_STR_MAX(pid_t)];
102 memcpy(buf, p + strlen("OBJECT_PID="), l - strlen("OBJECT_PID="));
103 buf[l-strlen("OBJECT_PID=")] = '\0';
104
105 (void) parse_pid(buf, object_pid);
106 }
107 }
108
109 static int server_process_entry(
110 Server *s,
111 const void *buffer, size_t *remaining,
112 ClientContext *context,
113 const struct ucred *ucred,
114 const struct timeval *tv,
115 const char *label, size_t label_len) {
116
117 /* Process a single entry from a native message. Returns 0 if nothing special happened and the message
118 * processing should continue, and a negative or positive value otherwise.
119 *
120 * Note that *remaining is altered on both success and failure. */
121
122 size_t n = 0, j, tn = (size_t) -1, m = 0, entry_size = 0;
123 char *identifier = NULL, *message = NULL;
124 struct iovec *iovec = NULL;
125 int priority = LOG_INFO;
126 pid_t object_pid = 0;
127 const char *p;
128 int r = 0;
129
130 p = buffer;
131
132 while (*remaining > 0) {
133 const char *e, *q;
134
135 e = memchr(p, '\n', *remaining);
136
137 if (!e) {
138 /* Trailing noise, let's ignore it, and flush what we collected */
139 log_debug("Received message with trailing noise, ignoring.");
140 r = 1; /* finish processing of the message */
141 break;
142 }
143
144 if (e == p) {
145 /* Entry separator */
146 *remaining -= 1;
147 break;
148 }
149
150 if (IN_SET(*p, '.', '#')) {
151 /* Ignore control commands for now, and
152 * comments too. */
153 *remaining -= (e - p) + 1;
154 p = e + 1;
155 continue;
156 }
157
158 /* A property follows */
159
160 /* n existing properties, 1 new, +1 for _TRANSPORT */
161 if (!GREEDY_REALLOC(iovec, m,
162 n + 2 +
163 N_IOVEC_META_FIELDS + N_IOVEC_OBJECT_FIELDS +
164 client_context_extra_fields_n_iovec(context))) {
165 r = log_oom();
166 break;
167 }
168
169 q = memchr(p, '=', e - p);
170 if (q) {
171 if (journal_field_valid(p, q - p, false)) {
172 size_t l;
173
174 l = e - p;
175
176 /* If the field name starts with an underscore, skip the variable, since that indicates
177 * a trusted field */
178 iovec[n++] = IOVEC_MAKE((char*) p, l);
179 entry_size += l;
180
181 server_process_entry_meta(p, l, ucred,
182 &priority,
183 &identifier,
184 &message,
185 &object_pid);
186 }
187
188 *remaining -= (e - p) + 1;
189 p = e + 1;
190 continue;
191 } else {
192 uint64_t l;
193 char *k;
194
195 if (*remaining < e - p + 1 + sizeof(uint64_t) + 1) {
196 log_debug("Failed to parse message, ignoring.");
197 break;
198 }
199
200 l = unaligned_read_le64(e + 1);
201
202 if (l > DATA_SIZE_MAX) {
203 log_debug("Received binary data block of %"PRIu64" bytes is too large, ignoring.", l);
204 break;
205 }
206
207 if ((uint64_t) *remaining < e - p + 1 + sizeof(uint64_t) + l + 1 ||
208 e[1+sizeof(uint64_t)+l] != '\n') {
209 log_debug("Failed to parse message, ignoring.");
210 break;
211 }
212
213 k = malloc((e - p) + 1 + l);
214 if (!k) {
215 log_oom();
216 break;
217 }
218
219 memcpy(k, p, e - p);
220 k[e - p] = '=';
221 memcpy(k + (e - p) + 1, e + 1 + sizeof(uint64_t), l);
222
223 if (journal_field_valid(p, e - p, false)) {
224 iovec[n].iov_base = k;
225 iovec[n].iov_len = (e - p) + 1 + l;
226 entry_size += iovec[n].iov_len;
227 n++;
228
229 server_process_entry_meta(k, (e - p) + 1 + l, ucred,
230 &priority,
231 &identifier,
232 &message,
233 &object_pid);
234 } else
235 free(k);
236
237 *remaining -= (e - p) + 1 + sizeof(uint64_t) + l + 1;
238 p = e + 1 + sizeof(uint64_t) + l + 1;
239 }
240 }
241
242 if (n <= 0) {
243 r = 1;
244 goto finish;
245 }
246
247 if (!client_context_test_priority(context, priority)) {
248 r = 0;
249 goto finish;
250 }
251
252 tn = n++;
253 iovec[tn] = IOVEC_MAKE_STRING("_TRANSPORT=journal");
254 entry_size += strlen("_TRANSPORT=journal");
255
256 if (entry_size + n + 1 > ENTRY_SIZE_MAX) { /* data + separators + trailer */
257 log_debug("Entry is too big with %zu properties and %zu bytes, ignoring.", n, entry_size);
258 goto finish;
259 }
260
261 if (message) {
262 if (s->forward_to_syslog)
263 server_forward_syslog(s, syslog_fixup_facility(priority), identifier, message, ucred, tv);
264
265 if (s->forward_to_kmsg)
266 server_forward_kmsg(s, priority, identifier, message, ucred);
267
268 if (s->forward_to_console)
269 server_forward_console(s, priority, identifier, message, ucred);
270
271 if (s->forward_to_wall)
272 server_forward_wall(s, priority, identifier, message, ucred);
273 }
274
275 server_dispatch_message(s, iovec, n, m, context, tv, priority, object_pid);
276
277 finish:
278 for (j = 0; j < n; j++) {
279 if (j == tn)
280 continue;
281
282 if (iovec[j].iov_base < buffer ||
283 (const char*) iovec[j].iov_base >= p + *remaining)
284 free(iovec[j].iov_base);
285 }
286
287 free(iovec);
288 free(identifier);
289 free(message);
290
291 return r;
292 }
293
294 void server_process_native_message(
295 Server *s,
296 const void *buffer, size_t buffer_size,
297 const struct ucred *ucred,
298 const struct timeval *tv,
299 const char *label, size_t label_len) {
300
301 size_t remaining = buffer_size;
302 ClientContext *context = NULL;
303 int r;
304
305 assert(s);
306 assert(buffer || buffer_size == 0);
307
308 if (ucred && pid_is_valid(ucred->pid)) {
309 r = client_context_get(s, ucred->pid, ucred, label, label_len, NULL, &context);
310 if (r < 0)
311 log_warning_errno(r, "Failed to retrieve credentials for PID " PID_FMT ", ignoring: %m", ucred->pid);
312 }
313
314 do {
315 r = server_process_entry(s,
316 (const uint8_t*) buffer + (buffer_size - remaining), &remaining,
317 context, ucred, tv, label, label_len);
318 } while (r == 0);
319 }
320
321 void server_process_native_file(
322 Server *s,
323 int fd,
324 const struct ucred *ucred,
325 const struct timeval *tv,
326 const char *label, size_t label_len) {
327
328 struct stat st;
329 bool sealed;
330 int r;
331
332 /* Data is in the passed fd, since it didn't fit in a
333 * datagram. */
334
335 assert(s);
336 assert(fd >= 0);
337
338 /* If it's a memfd, check if it is sealed. If so, we can just
339 * use map it and use it, and do not need to copy the data
340 * out. */
341 sealed = memfd_get_sealed(fd) > 0;
342
343 if (!sealed && (!ucred || ucred->uid != 0)) {
344 _cleanup_free_ char *sl = NULL, *k = NULL;
345 const char *e;
346
347 /* If this is not a sealed memfd, and the peer is unknown or
348 * unprivileged, then verify the path. */
349
350 if (asprintf(&sl, "/proc/self/fd/%i", fd) < 0) {
351 log_oom();
352 return;
353 }
354
355 r = readlink_malloc(sl, &k);
356 if (r < 0) {
357 log_error_errno(r, "readlink(%s) failed: %m", sl);
358 return;
359 }
360
361 e = path_startswith(k, "/dev/shm/");
362 if (!e)
363 e = path_startswith(k, "/tmp/");
364 if (!e)
365 e = path_startswith(k, "/var/tmp/");
366 if (!e) {
367 log_error("Received file outside of allowed directories. Refusing.");
368 return;
369 }
370
371 if (!filename_is_valid(e)) {
372 log_error("Received file in subdirectory of allowed directories. Refusing.");
373 return;
374 }
375 }
376
377 if (fstat(fd, &st) < 0) {
378 log_error_errno(errno, "Failed to stat passed file, ignoring: %m");
379 return;
380 }
381
382 if (!S_ISREG(st.st_mode)) {
383 log_error("File passed is not regular. Ignoring.");
384 return;
385 }
386
387 if (st.st_size <= 0)
388 return;
389
390 if (st.st_size > ENTRY_SIZE_MAX) {
391 log_error("File passed too large. Ignoring.");
392 return;
393 }
394
395 if (sealed) {
396 void *p;
397 size_t ps;
398
399 /* The file is sealed, we can just map it and use it. */
400
401 ps = PAGE_ALIGN(st.st_size);
402 p = mmap(NULL, ps, PROT_READ, MAP_PRIVATE, fd, 0);
403 if (p == MAP_FAILED) {
404 log_error_errno(errno, "Failed to map memfd, ignoring: %m");
405 return;
406 }
407
408 server_process_native_message(s, p, st.st_size, ucred, tv, label, label_len);
409 assert_se(munmap(p, ps) >= 0);
410 } else {
411 _cleanup_free_ void *p = NULL;
412 struct statvfs vfs;
413 ssize_t n;
414
415 if (fstatvfs(fd, &vfs) < 0) {
416 log_error_errno(errno, "Failed to stat file system of passed file, ignoring: %m");
417 return;
418 }
419
420 /* Refuse operating on file systems that have
421 * mandatory locking enabled, see:
422 *
423 * https://github.com/systemd/systemd/issues/1822
424 */
425 if (vfs.f_flag & ST_MANDLOCK) {
426 log_error("Received file descriptor from file system with mandatory locking enable, refusing.");
427 return;
428 }
429
430 /* Make the fd non-blocking. On regular files this has
431 * the effect of bypassing mandatory locking. Of
432 * course, this should normally not be necessary given
433 * the check above, but let's better be safe than
434 * sorry, after all NFS is pretty confusing regarding
435 * file system flags, and we better don't trust it,
436 * and so is SMB. */
437 r = fd_nonblock(fd, true);
438 if (r < 0) {
439 log_error_errno(r, "Failed to make fd non-blocking, ignoring: %m");
440 return;
441 }
442
443 /* The file is not sealed, we can't map the file here, since
444 * clients might then truncate it and trigger a SIGBUS for
445 * us. So let's stupidly read it */
446
447 p = malloc(st.st_size);
448 if (!p) {
449 log_oom();
450 return;
451 }
452
453 n = pread(fd, p, st.st_size, 0);
454 if (n < 0)
455 log_error_errno(errno, "Failed to read file, ignoring: %m");
456 else if (n > 0)
457 server_process_native_message(s, p, n, ucred, tv, label, label_len);
458 }
459 }
460
461 int server_open_native_socket(Server*s) {
462
463 static const union sockaddr_union sa = {
464 .un.sun_family = AF_UNIX,
465 .un.sun_path = "/run/systemd/journal/socket",
466 };
467 static const int one = 1;
468 int r;
469
470 assert(s);
471
472 if (s->native_fd < 0) {
473 s->native_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
474 if (s->native_fd < 0)
475 return log_error_errno(errno, "socket() failed: %m");
476
477 (void) unlink(sa.un.sun_path);
478
479 r = bind(s->native_fd, &sa.sa, SOCKADDR_UN_LEN(sa.un));
480 if (r < 0)
481 return log_error_errno(errno, "bind(%s) failed: %m", sa.un.sun_path);
482
483 (void) chmod(sa.un.sun_path, 0666);
484 } else
485 fd_nonblock(s->native_fd, 1);
486
487 r = setsockopt(s->native_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
488 if (r < 0)
489 return log_error_errno(errno, "SO_PASSCRED failed: %m");
490
491 #if HAVE_SELINUX
492 if (mac_selinux_use()) {
493 r = setsockopt(s->native_fd, SOL_SOCKET, SO_PASSSEC, &one, sizeof(one));
494 if (r < 0)
495 log_warning_errno(errno, "SO_PASSSEC failed: %m");
496 }
497 #endif
498
499 r = setsockopt(s->native_fd, SOL_SOCKET, SO_TIMESTAMP, &one, sizeof(one));
500 if (r < 0)
501 return log_error_errno(errno, "SO_TIMESTAMP failed: %m");
502
503 r = sd_event_add_io(s->event, &s->native_event_source, s->native_fd, EPOLLIN, server_process_datagram, s);
504 if (r < 0)
505 return log_error_errno(r, "Failed to add native server fd to event loop: %m");
506
507 r = sd_event_source_set_priority(s->native_event_source, SD_EVENT_PRIORITY_NORMAL+5);
508 if (r < 0)
509 return log_error_errno(r, "Failed to adjust native event source priority: %m");
510
511 return 0;
512 }