]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/journal/journald-context.c
do not filter out deprecated USER audit messages
[thirdparty/systemd.git] / src / journal / journald-context.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2
3 #if HAVE_SELINUX
4 #include <selinux/selinux.h>
5 #endif
6
7 #include "alloc-util.h"
8 #include "audit-util.h"
9 #include "cgroup-util.h"
10 #include "fd-util.h"
11 #include "fileio.h"
12 #include "fs-util.h"
13 #include "io-util.h"
14 #include "journal-util.h"
15 #include "journald-context.h"
16 #include "process-util.h"
17 #include "string-util.h"
18 #include "syslog-util.h"
19 #include "unaligned.h"
20 #include "user-util.h"
21
22 /* This implements a metadata cache for clients, which are identified by their PID. Requesting metadata through /proc
23 * is expensive, hence let's cache the data if we can. Note that this means the metadata might be out-of-date when we
24 * store it, but it might already be anyway, as we request the data asynchronously from /proc at a different time the
25 * log entry was originally created. We hence just increase the "window of inaccuracy" a bit.
26 *
27 * The cache is indexed by the PID. Entries may be "pinned" in the cache, in which case the entries are not removed
28 * until they are unpinned. Unpinned entries are kept around until cache pressure is seen. Cache entries older than 5s
29 * are never used (a sad attempt to deal with the UNIX weakness of PIDs reuse), cache entries older than 1s are
30 * refreshed in an incremental way (meaning: data is reread from /proc, but any old data we can't refresh is not
31 * flushed out). Data newer than 1s is used immediately without refresh.
32 *
33 * Log stream clients (i.e. all clients using the AF_UNIX/SOCK_STREAM stdout/stderr transport) will pin a cache entry
34 * as long as their socket is connected. Note that cache entries are shared between different transports. That means a
35 * cache entry pinned for the stream connection logic may be reused for the syslog or native protocols.
36 *
37 * Caching metadata like this has two major benefits:
38 *
39 * 1. Reading metadata is expensive, and we can thus substantially speed up log processing under flood.
40 *
41 * 2. Because metadata caching is shared between stream and datagram transports and stream connections pin a cache
42 * entry there's a good chance we can properly map a substantial set of datagram log messages to their originating
43 * service, as all services (unless explicitly configured otherwise) will have their stdout/stderr connected to a
44 * stream connection. This should improve cases where a service process logs immediately before exiting and we
45 * previously had trouble associating the log message with the service.
46 *
47 * NB: With and without the metadata cache: the implicitly added entry metadata in the journal (with the exception of
48 * UID/PID/GID and SELinux label) must be understood as possibly slightly out of sync (i.e. sometimes slighly older
49 * and sometimes slightly newer than what was current at the log event).
50 */
51
52 /* We refresh every 1s */
53 #define REFRESH_USEC (1*USEC_PER_SEC)
54
55 /* Data older than 5s we flush out */
56 #define MAX_USEC (5*USEC_PER_SEC)
57
58 /* Keep at most 16K entries in the cache. (Note though that this limit may be violated if enough streams pin entries in
59 * the cache, in which case we *do* permit this limit to be breached. That's safe however, as the number of stream
60 * clients itself is limited.) */
61 #define CACHE_MAX (16*1024)
62
63 static int client_context_compare(const void *a, const void *b) {
64 const ClientContext *x = a, *y = b;
65
66 if (x->timestamp < y->timestamp)
67 return -1;
68 if (x->timestamp > y->timestamp)
69 return 1;
70
71 if (x->pid < y->pid)
72 return -1;
73 if (x->pid > y->pid)
74 return 1;
75
76 return 0;
77 }
78
79 static int client_context_new(Server *s, pid_t pid, ClientContext **ret) {
80 ClientContext *c;
81 int r;
82
83 assert(s);
84 assert(pid_is_valid(pid));
85 assert(ret);
86
87 r = hashmap_ensure_allocated(&s->client_contexts, NULL);
88 if (r < 0)
89 return r;
90
91 r = prioq_ensure_allocated(&s->client_contexts_lru, client_context_compare);
92 if (r < 0)
93 return r;
94
95 c = new0(ClientContext, 1);
96 if (!c)
97 return -ENOMEM;
98
99 c->pid = pid;
100
101 c->uid = UID_INVALID;
102 c->gid = GID_INVALID;
103 c->auditid = AUDIT_SESSION_INVALID;
104 c->loginuid = UID_INVALID;
105 c->owner_uid = UID_INVALID;
106 c->lru_index = PRIOQ_IDX_NULL;
107 c->timestamp = USEC_INFINITY;
108 c->extra_fields_mtime = NSEC_INFINITY;
109 c->log_level_max = -1;
110
111 r = hashmap_put(s->client_contexts, PID_TO_PTR(pid), c);
112 if (r < 0) {
113 free(c);
114 return r;
115 }
116
117 *ret = c;
118 return 0;
119 }
120
121 static void client_context_reset(ClientContext *c) {
122 assert(c);
123
124 c->timestamp = USEC_INFINITY;
125
126 c->uid = UID_INVALID;
127 c->gid = GID_INVALID;
128
129 c->comm = mfree(c->comm);
130 c->exe = mfree(c->exe);
131 c->cmdline = mfree(c->cmdline);
132 c->capeff = mfree(c->capeff);
133
134 c->auditid = AUDIT_SESSION_INVALID;
135 c->loginuid = UID_INVALID;
136
137 c->cgroup = mfree(c->cgroup);
138 c->session = mfree(c->session);
139 c->owner_uid = UID_INVALID;
140 c->unit = mfree(c->unit);
141 c->user_unit = mfree(c->user_unit);
142 c->slice = mfree(c->slice);
143 c->user_slice = mfree(c->user_slice);
144
145 c->invocation_id = SD_ID128_NULL;
146
147 c->label = mfree(c->label);
148 c->label_size = 0;
149
150 c->extra_fields_iovec = mfree(c->extra_fields_iovec);
151 c->extra_fields_n_iovec = 0;
152 c->extra_fields_data = mfree(c->extra_fields_data);
153 c->extra_fields_mtime = NSEC_INFINITY;
154
155 c->log_level_max = -1;
156 }
157
158 static ClientContext* client_context_free(Server *s, ClientContext *c) {
159 assert(s);
160
161 if (!c)
162 return NULL;
163
164 assert_se(hashmap_remove(s->client_contexts, PID_TO_PTR(c->pid)) == c);
165
166 if (c->in_lru)
167 assert_se(prioq_remove(s->client_contexts_lru, c, &c->lru_index) >= 0);
168
169 client_context_reset(c);
170
171 return mfree(c);
172 }
173
174 static void client_context_read_uid_gid(ClientContext *c, const struct ucred *ucred) {
175 assert(c);
176 assert(pid_is_valid(c->pid));
177
178 /* The ucred data passed in is always the most current and accurate, if we have any. Use it. */
179 if (ucred && uid_is_valid(ucred->uid))
180 c->uid = ucred->uid;
181 else
182 (void) get_process_uid(c->pid, &c->uid);
183
184 if (ucred && gid_is_valid(ucred->gid))
185 c->gid = ucred->gid;
186 else
187 (void) get_process_gid(c->pid, &c->gid);
188 }
189
190 static void client_context_read_basic(ClientContext *c) {
191 char *t;
192
193 assert(c);
194 assert(pid_is_valid(c->pid));
195
196 if (get_process_comm(c->pid, &t) >= 0)
197 free_and_replace(c->comm, t);
198
199 if (get_process_exe(c->pid, &t) >= 0)
200 free_and_replace(c->exe, t);
201
202 if (get_process_cmdline(c->pid, 0, false, &t) >= 0)
203 free_and_replace(c->cmdline, t);
204
205 if (get_process_capeff(c->pid, &t) >= 0)
206 free_and_replace(c->capeff, t);
207 }
208
209 static int client_context_read_label(
210 ClientContext *c,
211 const char *label, size_t label_size) {
212
213 assert(c);
214 assert(pid_is_valid(c->pid));
215 assert(label_size == 0 || label);
216
217 if (label_size > 0) {
218 char *l;
219
220 /* If we got an SELinux label passed in it counts. */
221
222 l = newdup_suffix0(char, label, label_size);
223 if (!l)
224 return -ENOMEM;
225
226 free_and_replace(c->label, l);
227 c->label_size = label_size;
228 }
229 #if HAVE_SELINUX
230 else {
231 char *con;
232
233 /* If we got no SELinux label passed in, let's try to acquire one */
234
235 if (getpidcon(c->pid, &con) >= 0) {
236 free_and_replace(c->label, con);
237 c->label_size = strlen(c->label);
238 }
239 }
240 #endif
241
242 return 0;
243 }
244
245 static int client_context_read_cgroup(Server *s, ClientContext *c, const char *unit_id) {
246 char *t = NULL;
247 int r;
248
249 assert(c);
250
251 /* Try to acquire the current cgroup path */
252 r = cg_pid_get_path_shifted(c->pid, s->cgroup_root, &t);
253 if (r < 0) {
254
255 /* If that didn't work, we use the unit ID passed in as fallback, if we have nothing cached yet */
256 if (unit_id && !c->unit) {
257 c->unit = strdup(unit_id);
258 if (c->unit)
259 return 0;
260 }
261
262 return r;
263 }
264
265 /* Let's shortcut this if the cgroup path didn't change */
266 if (streq_ptr(c->cgroup, t)) {
267 free(t);
268 return 0;
269 }
270
271 free_and_replace(c->cgroup, t);
272
273 (void) cg_path_get_session(c->cgroup, &t);
274 free_and_replace(c->session, t);
275
276 if (cg_path_get_owner_uid(c->cgroup, &c->owner_uid) < 0)
277 c->owner_uid = UID_INVALID;
278
279 (void) cg_path_get_unit(c->cgroup, &t);
280 free_and_replace(c->unit, t);
281
282 (void) cg_path_get_user_unit(c->cgroup, &t);
283 free_and_replace(c->user_unit, t);
284
285 (void) cg_path_get_slice(c->cgroup, &t);
286 free_and_replace(c->slice, t);
287
288 (void) cg_path_get_user_slice(c->cgroup, &t);
289 free_and_replace(c->user_slice, t);
290
291 return 0;
292 }
293
294 static int client_context_read_invocation_id(
295 Server *s,
296 ClientContext *c) {
297
298 _cleanup_free_ char *value = NULL;
299 const char *p;
300 int r;
301
302 assert(s);
303 assert(c);
304
305 /* Read the invocation ID of a unit off a unit. PID 1 stores it in a per-unit symlink in /run/systemd/units/ */
306
307 if (!c->unit)
308 return 0;
309
310 p = strjoina("/run/systemd/units/invocation:", c->unit);
311 r = readlink_malloc(p, &value);
312 if (r < 0)
313 return r;
314
315 return sd_id128_from_string(value, &c->invocation_id);
316 }
317
318 static int client_context_read_log_level_max(
319 Server *s,
320 ClientContext *c) {
321
322 _cleanup_free_ char *value = NULL;
323 const char *p;
324 int r, ll;
325
326 if (!c->unit)
327 return 0;
328
329 p = strjoina("/run/systemd/units/log-level-max:", c->unit);
330 r = readlink_malloc(p, &value);
331 if (r < 0)
332 return r;
333
334 ll = log_level_from_string(value);
335 if (ll < 0)
336 return -EINVAL;
337
338 c->log_level_max = ll;
339 return 0;
340 }
341
342 static int client_context_read_extra_fields(
343 Server *s,
344 ClientContext *c) {
345
346 size_t size = 0, n_iovec = 0, n_allocated = 0, left;
347 _cleanup_free_ struct iovec *iovec = NULL;
348 _cleanup_free_ void *data = NULL;
349 _cleanup_fclose_ FILE *f = NULL;
350 struct stat st;
351 const char *p;
352 uint8_t *q;
353 int r;
354
355 if (!c->unit)
356 return 0;
357
358 p = strjoina("/run/systemd/units/log-extra-fields:", c->unit);
359
360 if (c->extra_fields_mtime != NSEC_INFINITY) {
361 if (stat(p, &st) < 0) {
362 if (errno == ENOENT)
363 return 0;
364
365 return -errno;
366 }
367
368 if (timespec_load_nsec(&st.st_mtim) == c->extra_fields_mtime)
369 return 0;
370 }
371
372 f = fopen(p, "re");
373 if (!f) {
374 if (errno == ENOENT)
375 return 0;
376
377 return -errno;
378 }
379
380 if (fstat(fileno(f), &st) < 0) /* The file might have been replaced since the stat() above, let's get a new
381 * one, that matches the stuff we are reading */
382 return -errno;
383
384 r = read_full_stream(f, (char**) &data, &size);
385 if (r < 0)
386 return r;
387
388 q = data, left = size;
389 while (left > 0) {
390 uint8_t *field, *eq;
391 uint64_t v, n;
392
393 if (left < sizeof(uint64_t))
394 return -EBADMSG;
395
396 v = unaligned_read_le64(q);
397 if (v < 2)
398 return -EBADMSG;
399
400 n = sizeof(uint64_t) + v;
401 if (left < n)
402 return -EBADMSG;
403
404 field = q + sizeof(uint64_t);
405
406 eq = memchr(field, '=', v);
407 if (!eq)
408 return -EBADMSG;
409
410 if (!journal_field_valid((const char *) field, eq - field, false))
411 return -EBADMSG;
412
413 if (!GREEDY_REALLOC(iovec, n_allocated, n_iovec+1))
414 return -ENOMEM;
415
416 iovec[n_iovec++] = IOVEC_MAKE(field, v);
417
418 left -= n, q += n;
419 }
420
421 free(c->extra_fields_iovec);
422 free(c->extra_fields_data);
423
424 c->extra_fields_iovec = TAKE_PTR(iovec);
425 c->extra_fields_n_iovec = n_iovec;
426 c->extra_fields_data = TAKE_PTR(data);
427 c->extra_fields_mtime = timespec_load_nsec(&st.st_mtim);
428
429 return 0;
430 }
431
432 static void client_context_really_refresh(
433 Server *s,
434 ClientContext *c,
435 const struct ucred *ucred,
436 const char *label, size_t label_size,
437 const char *unit_id,
438 usec_t timestamp) {
439
440 assert(s);
441 assert(c);
442 assert(pid_is_valid(c->pid));
443
444 if (timestamp == USEC_INFINITY)
445 timestamp = now(CLOCK_MONOTONIC);
446
447 client_context_read_uid_gid(c, ucred);
448 client_context_read_basic(c);
449 (void) client_context_read_label(c, label, label_size);
450
451 (void) audit_session_from_pid(c->pid, &c->auditid);
452 (void) audit_loginuid_from_pid(c->pid, &c->loginuid);
453
454 (void) client_context_read_cgroup(s, c, unit_id);
455 (void) client_context_read_invocation_id(s, c);
456 (void) client_context_read_log_level_max(s, c);
457 (void) client_context_read_extra_fields(s, c);
458
459 c->timestamp = timestamp;
460
461 if (c->in_lru) {
462 assert(c->n_ref == 0);
463 assert_se(prioq_reshuffle(s->client_contexts_lru, c, &c->lru_index) >= 0);
464 }
465 }
466
467 void client_context_maybe_refresh(
468 Server *s,
469 ClientContext *c,
470 const struct ucred *ucred,
471 const char *label, size_t label_size,
472 const char *unit_id,
473 usec_t timestamp) {
474
475 assert(s);
476 assert(c);
477
478 if (timestamp == USEC_INFINITY)
479 timestamp = now(CLOCK_MONOTONIC);
480
481 /* No cached data so far? Let's fill it up */
482 if (c->timestamp == USEC_INFINITY)
483 goto refresh;
484
485 /* If the data isn't pinned and if the cashed data is older than the upper limit, we flush it out
486 * entirely. This follows the logic that as long as an entry is pinned the PID reuse is unlikely. */
487 if (c->n_ref == 0 && c->timestamp + MAX_USEC < timestamp) {
488 client_context_reset(c);
489 goto refresh;
490 }
491
492 /* If the data is older than the lower limit, we refresh, but keep the old data for all we can't update */
493 if (c->timestamp + REFRESH_USEC < timestamp)
494 goto refresh;
495
496 /* If the data passed along doesn't match the cached data we also do a refresh */
497 if (ucred && uid_is_valid(ucred->uid) && c->uid != ucred->uid)
498 goto refresh;
499
500 if (ucred && gid_is_valid(ucred->gid) && c->gid != ucred->gid)
501 goto refresh;
502
503 if (label_size > 0 && (label_size != c->label_size || memcmp(label, c->label, label_size) != 0))
504 goto refresh;
505
506 return;
507
508 refresh:
509 client_context_really_refresh(s, c, ucred, label, label_size, unit_id, timestamp);
510 }
511
512 static void client_context_try_shrink_to(Server *s, size_t limit) {
513 assert(s);
514
515 /* Bring the number of cache entries below the indicated limit, so that we can create a new entry without
516 * breaching the limit. Note that we only flush out entries that aren't pinned here. This means the number of
517 * cache entries may very well grow beyond the limit, if all entries stored remain pinned. */
518
519 while (hashmap_size(s->client_contexts) > limit) {
520 ClientContext *c;
521
522 c = prioq_pop(s->client_contexts_lru);
523 if (!c)
524 break; /* All remaining entries are pinned, give up */
525
526 assert(c->in_lru);
527 assert(c->n_ref == 0);
528
529 c->in_lru = false;
530
531 client_context_free(s, c);
532 }
533 }
534
535 void client_context_flush_all(Server *s) {
536 assert(s);
537
538 /* Flush out all remaining entries. This assumes all references are already dropped. */
539
540 s->my_context = client_context_release(s, s->my_context);
541 s->pid1_context = client_context_release(s, s->pid1_context);
542
543 client_context_try_shrink_to(s, 0);
544
545 assert(prioq_size(s->client_contexts_lru) == 0);
546 assert(hashmap_size(s->client_contexts) == 0);
547
548 s->client_contexts_lru = prioq_free(s->client_contexts_lru);
549 s->client_contexts = hashmap_free(s->client_contexts);
550 }
551
552 static int client_context_get_internal(
553 Server *s,
554 pid_t pid,
555 const struct ucred *ucred,
556 const char *label, size_t label_len,
557 const char *unit_id,
558 bool add_ref,
559 ClientContext **ret) {
560
561 ClientContext *c;
562 int r;
563
564 assert(s);
565 assert(ret);
566
567 if (!pid_is_valid(pid))
568 return -EINVAL;
569
570 c = hashmap_get(s->client_contexts, PID_TO_PTR(pid));
571 if (c) {
572
573 if (add_ref) {
574 if (c->in_lru) {
575 /* The entry wasn't pinned so far, let's remove it from the LRU list then */
576 assert(c->n_ref == 0);
577 assert_se(prioq_remove(s->client_contexts_lru, c, &c->lru_index) >= 0);
578 c->in_lru = false;
579 }
580
581 c->n_ref++;
582 }
583
584 client_context_maybe_refresh(s, c, ucred, label, label_len, unit_id, USEC_INFINITY);
585
586 *ret = c;
587 return 0;
588 }
589
590 client_context_try_shrink_to(s, CACHE_MAX-1);
591
592 r = client_context_new(s, pid, &c);
593 if (r < 0)
594 return r;
595
596 if (add_ref)
597 c->n_ref++;
598 else {
599 r = prioq_put(s->client_contexts_lru, c, &c->lru_index);
600 if (r < 0) {
601 client_context_free(s, c);
602 return r;
603 }
604
605 c->in_lru = true;
606 }
607
608 client_context_really_refresh(s, c, ucred, label, label_len, unit_id, USEC_INFINITY);
609
610 *ret = c;
611 return 0;
612 }
613
614 int client_context_get(
615 Server *s,
616 pid_t pid,
617 const struct ucred *ucred,
618 const char *label, size_t label_len,
619 const char *unit_id,
620 ClientContext **ret) {
621
622 return client_context_get_internal(s, pid, ucred, label, label_len, unit_id, false, ret);
623 }
624
625 int client_context_acquire(
626 Server *s,
627 pid_t pid,
628 const struct ucred *ucred,
629 const char *label, size_t label_len,
630 const char *unit_id,
631 ClientContext **ret) {
632
633 return client_context_get_internal(s, pid, ucred, label, label_len, unit_id, true, ret);
634 };
635
636 ClientContext *client_context_release(Server *s, ClientContext *c) {
637 assert(s);
638
639 if (!c)
640 return NULL;
641
642 assert(c->n_ref > 0);
643 assert(!c->in_lru);
644
645 c->n_ref--;
646 if (c->n_ref > 0)
647 return NULL;
648
649 /* The entry is not pinned anymore, let's add it to the LRU prioq if we can. If we can't we'll drop it
650 * right-away */
651
652 if (prioq_put(s->client_contexts_lru, c, &c->lru_index) < 0)
653 client_context_free(s, c);
654 else
655 c->in_lru = true;
656
657 return NULL;
658 }
659
660 void client_context_acquire_default(Server *s) {
661 int r;
662
663 assert(s);
664
665 /* Ensure that our own and PID1's contexts are always pinned. Our own context is particularly useful to
666 * generate driver messages. */
667
668 if (!s->my_context) {
669 struct ucred ucred = {
670 .pid = getpid_cached(),
671 .uid = getuid(),
672 .gid = getgid(),
673 };
674
675 r = client_context_acquire(s, ucred.pid, &ucred, NULL, 0, NULL, &s->my_context);
676 if (r < 0)
677 log_warning_errno(r, "Failed to acquire our own context, ignoring: %m");
678 }
679
680 if (!s->pid1_context) {
681
682 r = client_context_acquire(s, 1, NULL, NULL, 0, NULL, &s->pid1_context);
683 if (r < 0)
684 log_warning_errno(r, "Failed to acquire PID1's context, ignoring: %m");
685
686 }
687 }