]>
Commit | Line | Data |
---|---|---|
db9ecf05 | 1 | /* SPDX-License-Identifier: LGPL-2.1-or-later */ |
22e3a02b | 2 | |
349cc4a5 | 3 | #if HAVE_SELINUX |
22e3a02b LP |
4 | #include <selinux/selinux.h> |
5 | #endif | |
6 | ||
7 | #include "alloc-util.h" | |
8 | #include "audit-util.h" | |
9 | #include "cgroup-util.h" | |
21c491e1 | 10 | #include "env-util.h" |
d3070fbd LP |
11 | #include "fd-util.h" |
12 | #include "fileio.h" | |
13 | #include "fs-util.h" | |
14 | #include "io-util.h" | |
15 | #include "journal-util.h" | |
22e3a02b | 16 | #include "journald-context.h" |
90fc172e | 17 | #include "parse-util.h" |
672773b6 | 18 | #include "path-util.h" |
22e3a02b | 19 | #include "process-util.h" |
b12a4808 | 20 | #include "procfs-util.h" |
22e3a02b | 21 | #include "string-util.h" |
d3070fbd LP |
22 | #include "syslog-util.h" |
23 | #include "unaligned.h" | |
22e3a02b LP |
24 | #include "user-util.h" |
25 | ||
26 | /* This implements a metadata cache for clients, which are identified by their PID. Requesting metadata through /proc | |
27 | * is expensive, hence let's cache the data if we can. Note that this means the metadata might be out-of-date when we | |
28 | * store it, but it might already be anyway, as we request the data asynchronously from /proc at a different time the | |
29 | * log entry was originally created. We hence just increase the "window of inaccuracy" a bit. | |
30 | * | |
31 | * The cache is indexed by the PID. Entries may be "pinned" in the cache, in which case the entries are not removed | |
32 | * until they are unpinned. Unpinned entries are kept around until cache pressure is seen. Cache entries older than 5s | |
33 | * are never used (a sad attempt to deal with the UNIX weakness of PIDs reuse), cache entries older than 1s are | |
34 | * refreshed in an incremental way (meaning: data is reread from /proc, but any old data we can't refresh is not | |
35 | * flushed out). Data newer than 1s is used immediately without refresh. | |
36 | * | |
37 | * Log stream clients (i.e. all clients using the AF_UNIX/SOCK_STREAM stdout/stderr transport) will pin a cache entry | |
38 | * as long as their socket is connected. Note that cache entries are shared between different transports. That means a | |
39 | * cache entry pinned for the stream connection logic may be reused for the syslog or native protocols. | |
40 | * | |
41 | * Caching metadata like this has two major benefits: | |
42 | * | |
43 | * 1. Reading metadata is expensive, and we can thus substantially speed up log processing under flood. | |
44 | * | |
45 | * 2. Because metadata caching is shared between stream and datagram transports and stream connections pin a cache | |
46 | * entry there's a good chance we can properly map a substantial set of datagram log messages to their originating | |
47 | * service, as all services (unless explicitly configured otherwise) will have their stdout/stderr connected to a | |
48 | * stream connection. This should improve cases where a service process logs immediately before exiting and we | |
49 | * previously had trouble associating the log message with the service. | |
50 | * | |
51 | * NB: With and without the metadata cache: the implicitly added entry metadata in the journal (with the exception of | |
5238e957 | 52 | * UID/PID/GID and SELinux label) must be understood as possibly slightly out of sync (i.e. sometimes slightly older |
22e3a02b LP |
53 | * and sometimes slightly newer than what was current at the log event). |
54 | */ | |
55 | ||
56 | /* We refresh every 1s */ | |
57 | #define REFRESH_USEC (1*USEC_PER_SEC) | |
58 | ||
59 | /* Data older than 5s we flush out */ | |
60 | #define MAX_USEC (5*USEC_PER_SEC) | |
61 | ||
62 | /* Keep at most 16K entries in the cache. (Note though that this limit may be violated if enough streams pin entries in | |
63 | * the cache, in which case we *do* permit this limit to be breached. That's safe however, as the number of stream | |
64 | * clients itself is limited.) */ | |
b12a4808 ZJS |
65 | #define CACHE_MAX_FALLBACK 128U |
66 | #define CACHE_MAX_MAX (16*1024U) | |
67 | #define CACHE_MAX_MIN 64U | |
68 | ||
69 | static size_t cache_max(void) { | |
70 | static size_t cached = -1; | |
71 | ||
f5fbe71d | 72 | if (cached == SIZE_MAX) { |
b12a4808 ZJS |
73 | uint64_t mem_total; |
74 | int r; | |
75 | ||
76 | r = procfs_memory_get(&mem_total, NULL); | |
77 | if (r < 0) { | |
78 | log_warning_errno(r, "Cannot query /proc/meminfo for MemTotal: %m"); | |
79 | cached = CACHE_MAX_FALLBACK; | |
21c491e1 | 80 | } else |
b12a4808 ZJS |
81 | /* Cache entries are usually a few kB, but the process cmdline is controlled by the |
82 | * user and can be up to _SC_ARG_MAX, usually 2MB. Let's say that approximately up to | |
83 | * 1/8th of memory may be used by the cache. | |
84 | * | |
85 | * In the common case, this formula gives 64 cache entries for each GB of RAM. | |
86 | */ | |
21c491e1 | 87 | cached = CLAMP(mem_total / 8 / sc_arg_max(), CACHE_MAX_MIN, CACHE_MAX_MAX); |
b12a4808 ZJS |
88 | } |
89 | ||
90 | return cached; | |
91 | } | |
22e3a02b LP |
92 | |
93 | static int client_context_compare(const void *a, const void *b) { | |
94 | const ClientContext *x = a, *y = b; | |
9c57a73b | 95 | int r; |
22e3a02b | 96 | |
9c57a73b YW |
97 | r = CMP(x->timestamp, y->timestamp); |
98 | if (r != 0) | |
99 | return r; | |
22e3a02b | 100 | |
9c57a73b | 101 | return CMP(x->pid, y->pid); |
22e3a02b LP |
102 | } |
103 | ||
104 | static int client_context_new(Server *s, pid_t pid, ClientContext **ret) { | |
08ff6fcf | 105 | _cleanup_free_ ClientContext *c = NULL; |
22e3a02b LP |
106 | int r; |
107 | ||
108 | assert(s); | |
109 | assert(pid_is_valid(pid)); | |
110 | assert(ret); | |
111 | ||
22e3a02b LP |
112 | r = prioq_ensure_allocated(&s->client_contexts_lru, client_context_compare); |
113 | if (r < 0) | |
114 | return r; | |
115 | ||
74dd8f57 | 116 | c = new(ClientContext, 1); |
22e3a02b LP |
117 | if (!c) |
118 | return -ENOMEM; | |
119 | ||
74dd8f57 LP |
120 | *c = (ClientContext) { |
121 | .pid = pid, | |
122 | .uid = UID_INVALID, | |
123 | .gid = GID_INVALID, | |
124 | .auditid = AUDIT_SESSION_INVALID, | |
125 | .loginuid = UID_INVALID, | |
126 | .owner_uid = UID_INVALID, | |
127 | .lru_index = PRIOQ_IDX_NULL, | |
128 | .timestamp = USEC_INFINITY, | |
129 | .extra_fields_mtime = NSEC_INFINITY, | |
130 | .log_level_max = -1, | |
131 | .log_ratelimit_interval = s->ratelimit_interval, | |
132 | .log_ratelimit_burst = s->ratelimit_burst, | |
133 | }; | |
22e3a02b | 134 | |
becee4da | 135 | r = hashmap_ensure_put(&s->client_contexts, NULL, PID_TO_PTR(pid), c); |
08ff6fcf | 136 | if (r < 0) |
22e3a02b | 137 | return r; |
22e3a02b | 138 | |
08ff6fcf | 139 | *ret = TAKE_PTR(c); |
22e3a02b LP |
140 | return 0; |
141 | } | |
142 | ||
90fc172e AZ |
143 | static void client_context_reset(Server *s, ClientContext *c) { |
144 | assert(s); | |
22e3a02b LP |
145 | assert(c); |
146 | ||
147 | c->timestamp = USEC_INFINITY; | |
148 | ||
149 | c->uid = UID_INVALID; | |
150 | c->gid = GID_INVALID; | |
151 | ||
152 | c->comm = mfree(c->comm); | |
153 | c->exe = mfree(c->exe); | |
154 | c->cmdline = mfree(c->cmdline); | |
155 | c->capeff = mfree(c->capeff); | |
156 | ||
157 | c->auditid = AUDIT_SESSION_INVALID; | |
158 | c->loginuid = UID_INVALID; | |
159 | ||
160 | c->cgroup = mfree(c->cgroup); | |
161 | c->session = mfree(c->session); | |
162 | c->owner_uid = UID_INVALID; | |
163 | c->unit = mfree(c->unit); | |
164 | c->user_unit = mfree(c->user_unit); | |
165 | c->slice = mfree(c->slice); | |
166 | c->user_slice = mfree(c->user_slice); | |
167 | ||
168 | c->invocation_id = SD_ID128_NULL; | |
169 | ||
170 | c->label = mfree(c->label); | |
171 | c->label_size = 0; | |
d3070fbd LP |
172 | |
173 | c->extra_fields_iovec = mfree(c->extra_fields_iovec); | |
174 | c->extra_fields_n_iovec = 0; | |
175 | c->extra_fields_data = mfree(c->extra_fields_data); | |
176 | c->extra_fields_mtime = NSEC_INFINITY; | |
177 | ||
178 | c->log_level_max = -1; | |
90fc172e | 179 | |
5ac1530e ZJS |
180 | c->log_ratelimit_interval = s->ratelimit_interval; |
181 | c->log_ratelimit_burst = s->ratelimit_burst; | |
22e3a02b LP |
182 | } |
183 | ||
184 | static ClientContext* client_context_free(Server *s, ClientContext *c) { | |
185 | assert(s); | |
186 | ||
187 | if (!c) | |
188 | return NULL; | |
189 | ||
190 | assert_se(hashmap_remove(s->client_contexts, PID_TO_PTR(c->pid)) == c); | |
191 | ||
192 | if (c->in_lru) | |
193 | assert_se(prioq_remove(s->client_contexts_lru, c, &c->lru_index) >= 0); | |
194 | ||
90fc172e | 195 | client_context_reset(s, c); |
22e3a02b LP |
196 | |
197 | return mfree(c); | |
198 | } | |
199 | ||
200 | static void client_context_read_uid_gid(ClientContext *c, const struct ucred *ucred) { | |
201 | assert(c); | |
202 | assert(pid_is_valid(c->pid)); | |
203 | ||
204 | /* The ucred data passed in is always the most current and accurate, if we have any. Use it. */ | |
205 | if (ucred && uid_is_valid(ucred->uid)) | |
206 | c->uid = ucred->uid; | |
207 | else | |
208 | (void) get_process_uid(c->pid, &c->uid); | |
209 | ||
210 | if (ucred && gid_is_valid(ucred->gid)) | |
211 | c->gid = ucred->gid; | |
212 | else | |
213 | (void) get_process_gid(c->pid, &c->gid); | |
214 | } | |
215 | ||
216 | static void client_context_read_basic(ClientContext *c) { | |
217 | char *t; | |
218 | ||
219 | assert(c); | |
220 | assert(pid_is_valid(c->pid)); | |
221 | ||
222 | if (get_process_comm(c->pid, &t) >= 0) | |
223 | free_and_replace(c->comm, t); | |
224 | ||
225 | if (get_process_exe(c->pid, &t) >= 0) | |
226 | free_and_replace(c->exe, t); | |
227 | ||
2f960b38 | 228 | if (get_process_cmdline(c->pid, SIZE_MAX, PROCESS_CMDLINE_QUOTE, &t) >= 0) |
22e3a02b LP |
229 | free_and_replace(c->cmdline, t); |
230 | ||
231 | if (get_process_capeff(c->pid, &t) >= 0) | |
232 | free_and_replace(c->capeff, t); | |
233 | } | |
234 | ||
235 | static int client_context_read_label( | |
236 | ClientContext *c, | |
237 | const char *label, size_t label_size) { | |
238 | ||
239 | assert(c); | |
240 | assert(pid_is_valid(c->pid)); | |
241 | assert(label_size == 0 || label); | |
242 | ||
243 | if (label_size > 0) { | |
244 | char *l; | |
245 | ||
246 | /* If we got an SELinux label passed in it counts. */ | |
247 | ||
248 | l = newdup_suffix0(char, label, label_size); | |
249 | if (!l) | |
250 | return -ENOMEM; | |
251 | ||
252 | free_and_replace(c->label, l); | |
253 | c->label_size = label_size; | |
254 | } | |
349cc4a5 | 255 | #if HAVE_SELINUX |
22e3a02b LP |
256 | else { |
257 | char *con; | |
258 | ||
259 | /* If we got no SELinux label passed in, let's try to acquire one */ | |
260 | ||
261 | if (getpidcon(c->pid, &con) >= 0) { | |
262 | free_and_replace(c->label, con); | |
263 | c->label_size = strlen(c->label); | |
264 | } | |
265 | } | |
266 | #endif | |
267 | ||
268 | return 0; | |
269 | } | |
270 | ||
271 | static int client_context_read_cgroup(Server *s, ClientContext *c, const char *unit_id) { | |
ef30f7ca | 272 | _cleanup_free_ char *t = NULL; |
22e3a02b LP |
273 | int r; |
274 | ||
275 | assert(c); | |
276 | ||
277 | /* Try to acquire the current cgroup path */ | |
278 | r = cg_pid_get_path_shifted(c->pid, s->cgroup_root, &t); | |
672773b6 | 279 | if (r < 0 || empty_or_root(t)) { |
672773b6 MS |
280 | /* We use the unit ID passed in as fallback if we have nothing cached yet and cg_pid_get_path_shifted() |
281 | * failed or process is running in a root cgroup. Zombie processes are automatically migrated to root cgroup | |
4e1dfa45 | 282 | * on cgroup v1 and we want to be able to map log messages from them too. */ |
22e3a02b LP |
283 | if (unit_id && !c->unit) { |
284 | c->unit = strdup(unit_id); | |
285 | if (c->unit) | |
286 | return 0; | |
287 | } | |
288 | ||
289 | return r; | |
290 | } | |
291 | ||
292 | /* Let's shortcut this if the cgroup path didn't change */ | |
ef30f7ca | 293 | if (streq_ptr(c->cgroup, t)) |
22e3a02b | 294 | return 0; |
22e3a02b LP |
295 | |
296 | free_and_replace(c->cgroup, t); | |
297 | ||
298 | (void) cg_path_get_session(c->cgroup, &t); | |
299 | free_and_replace(c->session, t); | |
300 | ||
301 | if (cg_path_get_owner_uid(c->cgroup, &c->owner_uid) < 0) | |
302 | c->owner_uid = UID_INVALID; | |
303 | ||
304 | (void) cg_path_get_unit(c->cgroup, &t); | |
305 | free_and_replace(c->unit, t); | |
306 | ||
307 | (void) cg_path_get_user_unit(c->cgroup, &t); | |
308 | free_and_replace(c->user_unit, t); | |
309 | ||
310 | (void) cg_path_get_slice(c->cgroup, &t); | |
311 | free_and_replace(c->slice, t); | |
312 | ||
313 | (void) cg_path_get_user_slice(c->cgroup, &t); | |
314 | free_and_replace(c->user_slice, t); | |
315 | ||
316 | return 0; | |
317 | } | |
318 | ||
319 | static int client_context_read_invocation_id( | |
320 | Server *s, | |
321 | ClientContext *c) { | |
322 | ||
2f8c48b6 | 323 | _cleanup_free_ char *p = NULL, *value = NULL; |
22e3a02b LP |
324 | int r; |
325 | ||
326 | assert(s); | |
327 | assert(c); | |
328 | ||
2f8c48b6 AZ |
329 | /* Read the invocation ID of a unit off a unit. |
330 | * PID 1 stores it in a per-unit symlink in /run/systemd/units/ | |
331 | * User managers store it in a per-unit symlink under /run/user/<uid>/systemd/units/ */ | |
22e3a02b | 332 | |
d3070fbd | 333 | if (!c->unit) |
22e3a02b LP |
334 | return 0; |
335 | ||
2f8c48b6 AZ |
336 | if (c->user_unit) { |
337 | r = asprintf(&p, "/run/user/" UID_FMT "/systemd/units/invocation:%s", c->owner_uid, c->user_unit); | |
338 | if (r < 0) | |
339 | return r; | |
340 | } else { | |
341 | p = strjoin("/run/systemd/units/invocation:", c->unit); | |
342 | if (!p) | |
343 | return -ENOMEM; | |
344 | } | |
345 | ||
d3070fbd | 346 | r = readlink_malloc(p, &value); |
22e3a02b LP |
347 | if (r < 0) |
348 | return r; | |
349 | ||
d3070fbd LP |
350 | return sd_id128_from_string(value, &c->invocation_id); |
351 | } | |
22e3a02b | 352 | |
d3070fbd LP |
353 | static int client_context_read_log_level_max( |
354 | Server *s, | |
355 | ClientContext *c) { | |
22e3a02b | 356 | |
d3070fbd LP |
357 | _cleanup_free_ char *value = NULL; |
358 | const char *p; | |
359 | int r, ll; | |
360 | ||
361 | if (!c->unit) | |
362 | return 0; | |
363 | ||
364 | p = strjoina("/run/systemd/units/log-level-max:", c->unit); | |
365 | r = readlink_malloc(p, &value); | |
22e3a02b LP |
366 | if (r < 0) |
367 | return r; | |
d3070fbd LP |
368 | |
369 | ll = log_level_from_string(value); | |
370 | if (ll < 0) | |
7211c853 | 371 | return ll; |
22e3a02b | 372 | |
d3070fbd LP |
373 | c->log_level_max = ll; |
374 | return 0; | |
375 | } | |
376 | ||
377 | static int client_context_read_extra_fields( | |
378 | Server *s, | |
379 | ClientContext *c) { | |
380 | ||
d3070fbd | 381 | _cleanup_free_ struct iovec *iovec = NULL; |
319a4f4b | 382 | size_t size = 0, n_iovec = 0, left; |
d3070fbd LP |
383 | _cleanup_free_ void *data = NULL; |
384 | _cleanup_fclose_ FILE *f = NULL; | |
385 | struct stat st; | |
386 | const char *p; | |
387 | uint8_t *q; | |
388 | int r; | |
389 | ||
390 | if (!c->unit) | |
391 | return 0; | |
392 | ||
393 | p = strjoina("/run/systemd/units/log-extra-fields:", c->unit); | |
394 | ||
395 | if (c->extra_fields_mtime != NSEC_INFINITY) { | |
396 | if (stat(p, &st) < 0) { | |
397 | if (errno == ENOENT) | |
398 | return 0; | |
399 | ||
400 | return -errno; | |
401 | } | |
402 | ||
403 | if (timespec_load_nsec(&st.st_mtim) == c->extra_fields_mtime) | |
404 | return 0; | |
405 | } | |
406 | ||
407 | f = fopen(p, "re"); | |
408 | if (!f) { | |
409 | if (errno == ENOENT) | |
410 | return 0; | |
411 | ||
412 | return -errno; | |
413 | } | |
414 | ||
415 | if (fstat(fileno(f), &st) < 0) /* The file might have been replaced since the stat() above, let's get a new | |
416 | * one, that matches the stuff we are reading */ | |
417 | return -errno; | |
418 | ||
419 | r = read_full_stream(f, (char**) &data, &size); | |
420 | if (r < 0) | |
421 | return r; | |
422 | ||
423 | q = data, left = size; | |
424 | while (left > 0) { | |
425 | uint8_t *field, *eq; | |
426 | uint64_t v, n; | |
427 | ||
428 | if (left < sizeof(uint64_t)) | |
429 | return -EBADMSG; | |
430 | ||
431 | v = unaligned_read_le64(q); | |
432 | if (v < 2) | |
433 | return -EBADMSG; | |
434 | ||
435 | n = sizeof(uint64_t) + v; | |
436 | if (left < n) | |
437 | return -EBADMSG; | |
438 | ||
439 | field = q + sizeof(uint64_t); | |
440 | ||
441 | eq = memchr(field, '=', v); | |
442 | if (!eq) | |
443 | return -EBADMSG; | |
444 | ||
445 | if (!journal_field_valid((const char *) field, eq - field, false)) | |
446 | return -EBADMSG; | |
447 | ||
319a4f4b | 448 | if (!GREEDY_REALLOC(iovec, n_iovec+1)) |
d3070fbd LP |
449 | return -ENOMEM; |
450 | ||
451 | iovec[n_iovec++] = IOVEC_MAKE(field, v); | |
452 | ||
453 | left -= n, q += n; | |
454 | } | |
455 | ||
456 | free(c->extra_fields_iovec); | |
457 | free(c->extra_fields_data); | |
458 | ||
1cc6c93a | 459 | c->extra_fields_iovec = TAKE_PTR(iovec); |
d3070fbd | 460 | c->extra_fields_n_iovec = n_iovec; |
1cc6c93a | 461 | c->extra_fields_data = TAKE_PTR(data); |
d3070fbd LP |
462 | c->extra_fields_mtime = timespec_load_nsec(&st.st_mtim); |
463 | ||
d3070fbd | 464 | return 0; |
22e3a02b LP |
465 | } |
466 | ||
5ac1530e | 467 | static int client_context_read_log_ratelimit_interval(ClientContext *c) { |
90fc172e AZ |
468 | _cleanup_free_ char *value = NULL; |
469 | const char *p; | |
470 | int r; | |
471 | ||
472 | assert(c); | |
473 | ||
474 | if (!c->unit) | |
475 | return 0; | |
476 | ||
477 | p = strjoina("/run/systemd/units/log-rate-limit-interval:", c->unit); | |
478 | r = readlink_malloc(p, &value); | |
479 | if (r < 0) | |
480 | return r; | |
481 | ||
5ac1530e | 482 | return safe_atou64(value, &c->log_ratelimit_interval); |
90fc172e AZ |
483 | } |
484 | ||
5ac1530e | 485 | static int client_context_read_log_ratelimit_burst(ClientContext *c) { |
90fc172e AZ |
486 | _cleanup_free_ char *value = NULL; |
487 | const char *p; | |
488 | int r; | |
489 | ||
490 | assert(c); | |
491 | ||
492 | if (!c->unit) | |
493 | return 0; | |
494 | ||
495 | p = strjoina("/run/systemd/units/log-rate-limit-burst:", c->unit); | |
496 | r = readlink_malloc(p, &value); | |
497 | if (r < 0) | |
498 | return r; | |
499 | ||
5ac1530e | 500 | return safe_atou(value, &c->log_ratelimit_burst); |
90fc172e AZ |
501 | } |
502 | ||
22e3a02b LP |
503 | static void client_context_really_refresh( |
504 | Server *s, | |
505 | ClientContext *c, | |
506 | const struct ucred *ucred, | |
507 | const char *label, size_t label_size, | |
508 | const char *unit_id, | |
509 | usec_t timestamp) { | |
510 | ||
511 | assert(s); | |
512 | assert(c); | |
513 | assert(pid_is_valid(c->pid)); | |
514 | ||
515 | if (timestamp == USEC_INFINITY) | |
516 | timestamp = now(CLOCK_MONOTONIC); | |
517 | ||
518 | client_context_read_uid_gid(c, ucred); | |
519 | client_context_read_basic(c); | |
520 | (void) client_context_read_label(c, label, label_size); | |
521 | ||
522 | (void) audit_session_from_pid(c->pid, &c->auditid); | |
523 | (void) audit_loginuid_from_pid(c->pid, &c->loginuid); | |
524 | ||
525 | (void) client_context_read_cgroup(s, c, unit_id); | |
526 | (void) client_context_read_invocation_id(s, c); | |
d3070fbd LP |
527 | (void) client_context_read_log_level_max(s, c); |
528 | (void) client_context_read_extra_fields(s, c); | |
5ac1530e ZJS |
529 | (void) client_context_read_log_ratelimit_interval(c); |
530 | (void) client_context_read_log_ratelimit_burst(c); | |
22e3a02b LP |
531 | |
532 | c->timestamp = timestamp; | |
533 | ||
534 | if (c->in_lru) { | |
535 | assert(c->n_ref == 0); | |
536 | assert_se(prioq_reshuffle(s->client_contexts_lru, c, &c->lru_index) >= 0); | |
537 | } | |
538 | } | |
539 | ||
540 | void client_context_maybe_refresh( | |
541 | Server *s, | |
542 | ClientContext *c, | |
543 | const struct ucred *ucred, | |
544 | const char *label, size_t label_size, | |
545 | const char *unit_id, | |
546 | usec_t timestamp) { | |
547 | ||
548 | assert(s); | |
549 | assert(c); | |
550 | ||
551 | if (timestamp == USEC_INFINITY) | |
552 | timestamp = now(CLOCK_MONOTONIC); | |
553 | ||
554 | /* No cached data so far? Let's fill it up */ | |
555 | if (c->timestamp == USEC_INFINITY) | |
556 | goto refresh; | |
557 | ||
558 | /* If the data isn't pinned and if the cashed data is older than the upper limit, we flush it out | |
559 | * entirely. This follows the logic that as long as an entry is pinned the PID reuse is unlikely. */ | |
560 | if (c->n_ref == 0 && c->timestamp + MAX_USEC < timestamp) { | |
90fc172e | 561 | client_context_reset(s, c); |
22e3a02b LP |
562 | goto refresh; |
563 | } | |
564 | ||
565 | /* If the data is older than the lower limit, we refresh, but keep the old data for all we can't update */ | |
566 | if (c->timestamp + REFRESH_USEC < timestamp) | |
567 | goto refresh; | |
568 | ||
569 | /* If the data passed along doesn't match the cached data we also do a refresh */ | |
570 | if (ucred && uid_is_valid(ucred->uid) && c->uid != ucred->uid) | |
571 | goto refresh; | |
572 | ||
573 | if (ucred && gid_is_valid(ucred->gid) && c->gid != ucred->gid) | |
574 | goto refresh; | |
575 | ||
576 | if (label_size > 0 && (label_size != c->label_size || memcmp(label, c->label, label_size) != 0)) | |
577 | goto refresh; | |
578 | ||
579 | return; | |
580 | ||
581 | refresh: | |
582 | client_context_really_refresh(s, c, ucred, label, label_size, unit_id, timestamp); | |
583 | } | |
584 | ||
585 | static void client_context_try_shrink_to(Server *s, size_t limit) { | |
91714a7f ZJS |
586 | ClientContext *c; |
587 | usec_t t; | |
588 | ||
22e3a02b LP |
589 | assert(s); |
590 | ||
91714a7f ZJS |
591 | /* Flush any cache entries for PIDs that have already moved on. Don't do this |
592 | * too often, since it's a slow process. */ | |
593 | t = now(CLOCK_MONOTONIC); | |
594 | if (s->last_cache_pid_flush + MAX_USEC < t) { | |
595 | unsigned n = prioq_size(s->client_contexts_lru), idx = 0; | |
596 | ||
597 | /* We do a number of iterations based on the initial size of the prioq. When we remove an | |
598 | * item, a new item is moved into its places, and items to the right might be reshuffled. | |
599 | */ | |
600 | for (unsigned i = 0; i < n; i++) { | |
601 | c = prioq_peek_by_index(s->client_contexts_lru, idx); | |
602 | ||
603 | assert(c->n_ref == 0); | |
604 | ||
605 | if (!pid_is_unwaited(c->pid)) | |
606 | client_context_free(s, c); | |
607 | else | |
608 | idx ++; | |
609 | } | |
610 | ||
611 | s->last_cache_pid_flush = t; | |
612 | } | |
613 | ||
22e3a02b LP |
614 | /* Bring the number of cache entries below the indicated limit, so that we can create a new entry without |
615 | * breaching the limit. Note that we only flush out entries that aren't pinned here. This means the number of | |
616 | * cache entries may very well grow beyond the limit, if all entries stored remain pinned. */ | |
617 | ||
618 | while (hashmap_size(s->client_contexts) > limit) { | |
22e3a02b LP |
619 | c = prioq_pop(s->client_contexts_lru); |
620 | if (!c) | |
621 | break; /* All remaining entries are pinned, give up */ | |
622 | ||
623 | assert(c->in_lru); | |
624 | assert(c->n_ref == 0); | |
625 | ||
626 | c->in_lru = false; | |
627 | ||
628 | client_context_free(s, c); | |
629 | } | |
630 | } | |
631 | ||
632 | void client_context_flush_all(Server *s) { | |
633 | assert(s); | |
634 | ||
635 | /* Flush out all remaining entries. This assumes all references are already dropped. */ | |
636 | ||
637 | s->my_context = client_context_release(s, s->my_context); | |
638 | s->pid1_context = client_context_release(s, s->pid1_context); | |
639 | ||
640 | client_context_try_shrink_to(s, 0); | |
641 | ||
642 | assert(prioq_size(s->client_contexts_lru) == 0); | |
643 | assert(hashmap_size(s->client_contexts) == 0); | |
644 | ||
645 | s->client_contexts_lru = prioq_free(s->client_contexts_lru); | |
646 | s->client_contexts = hashmap_free(s->client_contexts); | |
647 | } | |
648 | ||
649 | static int client_context_get_internal( | |
650 | Server *s, | |
651 | pid_t pid, | |
652 | const struct ucred *ucred, | |
653 | const char *label, size_t label_len, | |
654 | const char *unit_id, | |
655 | bool add_ref, | |
656 | ClientContext **ret) { | |
657 | ||
658 | ClientContext *c; | |
659 | int r; | |
660 | ||
661 | assert(s); | |
662 | assert(ret); | |
663 | ||
664 | if (!pid_is_valid(pid)) | |
665 | return -EINVAL; | |
666 | ||
667 | c = hashmap_get(s->client_contexts, PID_TO_PTR(pid)); | |
668 | if (c) { | |
669 | ||
670 | if (add_ref) { | |
671 | if (c->in_lru) { | |
672 | /* The entry wasn't pinned so far, let's remove it from the LRU list then */ | |
673 | assert(c->n_ref == 0); | |
674 | assert_se(prioq_remove(s->client_contexts_lru, c, &c->lru_index) >= 0); | |
675 | c->in_lru = false; | |
676 | } | |
677 | ||
678 | c->n_ref++; | |
679 | } | |
680 | ||
681 | client_context_maybe_refresh(s, c, ucred, label, label_len, unit_id, USEC_INFINITY); | |
682 | ||
683 | *ret = c; | |
684 | return 0; | |
685 | } | |
686 | ||
b12a4808 | 687 | client_context_try_shrink_to(s, cache_max()-1); |
22e3a02b LP |
688 | |
689 | r = client_context_new(s, pid, &c); | |
690 | if (r < 0) | |
691 | return r; | |
692 | ||
693 | if (add_ref) | |
694 | c->n_ref++; | |
695 | else { | |
696 | r = prioq_put(s->client_contexts_lru, c, &c->lru_index); | |
697 | if (r < 0) { | |
698 | client_context_free(s, c); | |
699 | return r; | |
700 | } | |
701 | ||
702 | c->in_lru = true; | |
703 | } | |
704 | ||
705 | client_context_really_refresh(s, c, ucred, label, label_len, unit_id, USEC_INFINITY); | |
706 | ||
707 | *ret = c; | |
708 | return 0; | |
709 | } | |
710 | ||
711 | int client_context_get( | |
712 | Server *s, | |
713 | pid_t pid, | |
714 | const struct ucred *ucred, | |
715 | const char *label, size_t label_len, | |
716 | const char *unit_id, | |
717 | ClientContext **ret) { | |
718 | ||
719 | return client_context_get_internal(s, pid, ucred, label, label_len, unit_id, false, ret); | |
720 | } | |
721 | ||
722 | int client_context_acquire( | |
723 | Server *s, | |
724 | pid_t pid, | |
725 | const struct ucred *ucred, | |
726 | const char *label, size_t label_len, | |
727 | const char *unit_id, | |
728 | ClientContext **ret) { | |
729 | ||
730 | return client_context_get_internal(s, pid, ucred, label, label_len, unit_id, true, ret); | |
731 | }; | |
732 | ||
733 | ClientContext *client_context_release(Server *s, ClientContext *c) { | |
734 | assert(s); | |
735 | ||
736 | if (!c) | |
737 | return NULL; | |
738 | ||
739 | assert(c->n_ref > 0); | |
740 | assert(!c->in_lru); | |
741 | ||
742 | c->n_ref--; | |
743 | if (c->n_ref > 0) | |
744 | return NULL; | |
745 | ||
746 | /* The entry is not pinned anymore, let's add it to the LRU prioq if we can. If we can't we'll drop it | |
747 | * right-away */ | |
748 | ||
749 | if (prioq_put(s->client_contexts_lru, c, &c->lru_index) < 0) | |
750 | client_context_free(s, c); | |
751 | else | |
752 | c->in_lru = true; | |
753 | ||
754 | return NULL; | |
755 | } | |
756 | ||
757 | void client_context_acquire_default(Server *s) { | |
758 | int r; | |
759 | ||
760 | assert(s); | |
761 | ||
762 | /* Ensure that our own and PID1's contexts are always pinned. Our own context is particularly useful to | |
763 | * generate driver messages. */ | |
764 | ||
765 | if (!s->my_context) { | |
766 | struct ucred ucred = { | |
767 | .pid = getpid_cached(), | |
768 | .uid = getuid(), | |
769 | .gid = getgid(), | |
770 | }; | |
771 | ||
772 | r = client_context_acquire(s, ucred.pid, &ucred, NULL, 0, NULL, &s->my_context); | |
773 | if (r < 0) | |
8522691d DDM |
774 | log_ratelimit_warning_errno(r, JOURNALD_LOG_RATELIMIT, |
775 | "Failed to acquire our own context, ignoring: %m"); | |
22e3a02b LP |
776 | } |
777 | ||
b1852c48 LP |
778 | if (!s->namespace && !s->pid1_context) { |
779 | /* Acquire PID1's context, but only if we are in non-namespaced mode, since PID 1 is only | |
780 | * going to log to the non-namespaced journal instance. */ | |
22e3a02b LP |
781 | |
782 | r = client_context_acquire(s, 1, NULL, NULL, 0, NULL, &s->pid1_context); | |
783 | if (r < 0) | |
8522691d DDM |
784 | log_ratelimit_warning_errno(r, JOURNALD_LOG_RATELIMIT, |
785 | "Failed to acquire PID1's context, ignoring: %m"); | |
22e3a02b LP |
786 | |
787 | } | |
788 | } |