]>
Commit | Line | Data |
---|---|---|
53e1b683 | 1 | /* SPDX-License-Identifier: LGPL-2.1+ */ |
22e3a02b | 2 | |
349cc4a5 | 3 | #if HAVE_SELINUX |
22e3a02b LP |
4 | #include <selinux/selinux.h> |
5 | #endif | |
6 | ||
7 | #include "alloc-util.h" | |
8 | #include "audit-util.h" | |
9 | #include "cgroup-util.h" | |
21c491e1 | 10 | #include "env-util.h" |
d3070fbd LP |
11 | #include "fd-util.h" |
12 | #include "fileio.h" | |
13 | #include "fs-util.h" | |
14 | #include "io-util.h" | |
15 | #include "journal-util.h" | |
22e3a02b | 16 | #include "journald-context.h" |
90fc172e | 17 | #include "parse-util.h" |
672773b6 | 18 | #include "path-util.h" |
22e3a02b | 19 | #include "process-util.h" |
b12a4808 | 20 | #include "procfs-util.h" |
22e3a02b | 21 | #include "string-util.h" |
d3070fbd LP |
22 | #include "syslog-util.h" |
23 | #include "unaligned.h" | |
22e3a02b LP |
24 | #include "user-util.h" |
25 | ||
26 | /* This implements a metadata cache for clients, which are identified by their PID. Requesting metadata through /proc | |
27 | * is expensive, hence let's cache the data if we can. Note that this means the metadata might be out-of-date when we | |
28 | * store it, but it might already be anyway, as we request the data asynchronously from /proc at a different time the | |
29 | * log entry was originally created. We hence just increase the "window of inaccuracy" a bit. | |
30 | * | |
31 | * The cache is indexed by the PID. Entries may be "pinned" in the cache, in which case the entries are not removed | |
32 | * until they are unpinned. Unpinned entries are kept around until cache pressure is seen. Cache entries older than 5s | |
33 | * are never used (a sad attempt to deal with the UNIX weakness of PIDs reuse), cache entries older than 1s are | |
34 | * refreshed in an incremental way (meaning: data is reread from /proc, but any old data we can't refresh is not | |
35 | * flushed out). Data newer than 1s is used immediately without refresh. | |
36 | * | |
37 | * Log stream clients (i.e. all clients using the AF_UNIX/SOCK_STREAM stdout/stderr transport) will pin a cache entry | |
38 | * as long as their socket is connected. Note that cache entries are shared between different transports. That means a | |
39 | * cache entry pinned for the stream connection logic may be reused for the syslog or native protocols. | |
40 | * | |
41 | * Caching metadata like this has two major benefits: | |
42 | * | |
43 | * 1. Reading metadata is expensive, and we can thus substantially speed up log processing under flood. | |
44 | * | |
45 | * 2. Because metadata caching is shared between stream and datagram transports and stream connections pin a cache | |
46 | * entry there's a good chance we can properly map a substantial set of datagram log messages to their originating | |
47 | * service, as all services (unless explicitly configured otherwise) will have their stdout/stderr connected to a | |
48 | * stream connection. This should improve cases where a service process logs immediately before exiting and we | |
49 | * previously had trouble associating the log message with the service. | |
50 | * | |
51 | * NB: With and without the metadata cache: the implicitly added entry metadata in the journal (with the exception of | |
5238e957 | 52 | * UID/PID/GID and SELinux label) must be understood as possibly slightly out of sync (i.e. sometimes slightly older |
22e3a02b LP |
53 | * and sometimes slightly newer than what was current at the log event). |
54 | */ | |
55 | ||
56 | /* We refresh every 1s */ | |
57 | #define REFRESH_USEC (1*USEC_PER_SEC) | |
58 | ||
59 | /* Data older than 5s we flush out */ | |
60 | #define MAX_USEC (5*USEC_PER_SEC) | |
61 | ||
62 | /* Keep at most 16K entries in the cache. (Note though that this limit may be violated if enough streams pin entries in | |
63 | * the cache, in which case we *do* permit this limit to be breached. That's safe however, as the number of stream | |
64 | * clients itself is limited.) */ | |
b12a4808 ZJS |
65 | #define CACHE_MAX_FALLBACK 128U |
66 | #define CACHE_MAX_MAX (16*1024U) | |
67 | #define CACHE_MAX_MIN 64U | |
68 | ||
69 | static size_t cache_max(void) { | |
70 | static size_t cached = -1; | |
71 | ||
72 | if (cached == (size_t) -1) { | |
73 | uint64_t mem_total; | |
74 | int r; | |
75 | ||
76 | r = procfs_memory_get(&mem_total, NULL); | |
77 | if (r < 0) { | |
78 | log_warning_errno(r, "Cannot query /proc/meminfo for MemTotal: %m"); | |
79 | cached = CACHE_MAX_FALLBACK; | |
21c491e1 | 80 | } else |
b12a4808 ZJS |
81 | /* Cache entries are usually a few kB, but the process cmdline is controlled by the |
82 | * user and can be up to _SC_ARG_MAX, usually 2MB. Let's say that approximately up to | |
83 | * 1/8th of memory may be used by the cache. | |
84 | * | |
85 | * In the common case, this formula gives 64 cache entries for each GB of RAM. | |
86 | */ | |
21c491e1 | 87 | cached = CLAMP(mem_total / 8 / sc_arg_max(), CACHE_MAX_MIN, CACHE_MAX_MAX); |
b12a4808 ZJS |
88 | } |
89 | ||
90 | return cached; | |
91 | } | |
22e3a02b LP |
92 | |
93 | static int client_context_compare(const void *a, const void *b) { | |
94 | const ClientContext *x = a, *y = b; | |
9c57a73b | 95 | int r; |
22e3a02b | 96 | |
9c57a73b YW |
97 | r = CMP(x->timestamp, y->timestamp); |
98 | if (r != 0) | |
99 | return r; | |
22e3a02b | 100 | |
9c57a73b | 101 | return CMP(x->pid, y->pid); |
22e3a02b LP |
102 | } |
103 | ||
104 | static int client_context_new(Server *s, pid_t pid, ClientContext **ret) { | |
105 | ClientContext *c; | |
106 | int r; | |
107 | ||
108 | assert(s); | |
109 | assert(pid_is_valid(pid)); | |
110 | assert(ret); | |
111 | ||
112 | r = hashmap_ensure_allocated(&s->client_contexts, NULL); | |
113 | if (r < 0) | |
114 | return r; | |
115 | ||
116 | r = prioq_ensure_allocated(&s->client_contexts_lru, client_context_compare); | |
117 | if (r < 0) | |
118 | return r; | |
119 | ||
74dd8f57 | 120 | c = new(ClientContext, 1); |
22e3a02b LP |
121 | if (!c) |
122 | return -ENOMEM; | |
123 | ||
74dd8f57 LP |
124 | *c = (ClientContext) { |
125 | .pid = pid, | |
126 | .uid = UID_INVALID, | |
127 | .gid = GID_INVALID, | |
128 | .auditid = AUDIT_SESSION_INVALID, | |
129 | .loginuid = UID_INVALID, | |
130 | .owner_uid = UID_INVALID, | |
131 | .lru_index = PRIOQ_IDX_NULL, | |
132 | .timestamp = USEC_INFINITY, | |
133 | .extra_fields_mtime = NSEC_INFINITY, | |
134 | .log_level_max = -1, | |
135 | .log_ratelimit_interval = s->ratelimit_interval, | |
136 | .log_ratelimit_burst = s->ratelimit_burst, | |
137 | }; | |
22e3a02b LP |
138 | |
139 | r = hashmap_put(s->client_contexts, PID_TO_PTR(pid), c); | |
140 | if (r < 0) { | |
141 | free(c); | |
142 | return r; | |
143 | } | |
144 | ||
145 | *ret = c; | |
146 | return 0; | |
147 | } | |
148 | ||
90fc172e AZ |
149 | static void client_context_reset(Server *s, ClientContext *c) { |
150 | assert(s); | |
22e3a02b LP |
151 | assert(c); |
152 | ||
153 | c->timestamp = USEC_INFINITY; | |
154 | ||
155 | c->uid = UID_INVALID; | |
156 | c->gid = GID_INVALID; | |
157 | ||
158 | c->comm = mfree(c->comm); | |
159 | c->exe = mfree(c->exe); | |
160 | c->cmdline = mfree(c->cmdline); | |
161 | c->capeff = mfree(c->capeff); | |
162 | ||
163 | c->auditid = AUDIT_SESSION_INVALID; | |
164 | c->loginuid = UID_INVALID; | |
165 | ||
166 | c->cgroup = mfree(c->cgroup); | |
167 | c->session = mfree(c->session); | |
168 | c->owner_uid = UID_INVALID; | |
169 | c->unit = mfree(c->unit); | |
170 | c->user_unit = mfree(c->user_unit); | |
171 | c->slice = mfree(c->slice); | |
172 | c->user_slice = mfree(c->user_slice); | |
173 | ||
174 | c->invocation_id = SD_ID128_NULL; | |
175 | ||
176 | c->label = mfree(c->label); | |
177 | c->label_size = 0; | |
d3070fbd LP |
178 | |
179 | c->extra_fields_iovec = mfree(c->extra_fields_iovec); | |
180 | c->extra_fields_n_iovec = 0; | |
181 | c->extra_fields_data = mfree(c->extra_fields_data); | |
182 | c->extra_fields_mtime = NSEC_INFINITY; | |
183 | ||
184 | c->log_level_max = -1; | |
90fc172e | 185 | |
5ac1530e ZJS |
186 | c->log_ratelimit_interval = s->ratelimit_interval; |
187 | c->log_ratelimit_burst = s->ratelimit_burst; | |
22e3a02b LP |
188 | } |
189 | ||
190 | static ClientContext* client_context_free(Server *s, ClientContext *c) { | |
191 | assert(s); | |
192 | ||
193 | if (!c) | |
194 | return NULL; | |
195 | ||
196 | assert_se(hashmap_remove(s->client_contexts, PID_TO_PTR(c->pid)) == c); | |
197 | ||
198 | if (c->in_lru) | |
199 | assert_se(prioq_remove(s->client_contexts_lru, c, &c->lru_index) >= 0); | |
200 | ||
90fc172e | 201 | client_context_reset(s, c); |
22e3a02b LP |
202 | |
203 | return mfree(c); | |
204 | } | |
205 | ||
206 | static void client_context_read_uid_gid(ClientContext *c, const struct ucred *ucred) { | |
207 | assert(c); | |
208 | assert(pid_is_valid(c->pid)); | |
209 | ||
210 | /* The ucred data passed in is always the most current and accurate, if we have any. Use it. */ | |
211 | if (ucred && uid_is_valid(ucred->uid)) | |
212 | c->uid = ucred->uid; | |
213 | else | |
214 | (void) get_process_uid(c->pid, &c->uid); | |
215 | ||
216 | if (ucred && gid_is_valid(ucred->gid)) | |
217 | c->gid = ucred->gid; | |
218 | else | |
219 | (void) get_process_gid(c->pid, &c->gid); | |
220 | } | |
221 | ||
222 | static void client_context_read_basic(ClientContext *c) { | |
223 | char *t; | |
224 | ||
225 | assert(c); | |
226 | assert(pid_is_valid(c->pid)); | |
227 | ||
228 | if (get_process_comm(c->pid, &t) >= 0) | |
229 | free_and_replace(c->comm, t); | |
230 | ||
231 | if (get_process_exe(c->pid, &t) >= 0) | |
232 | free_and_replace(c->exe, t); | |
233 | ||
09c1dcee | 234 | if (get_process_cmdline(c->pid, SIZE_MAX, 0, &t) >= 0) |
22e3a02b LP |
235 | free_and_replace(c->cmdline, t); |
236 | ||
237 | if (get_process_capeff(c->pid, &t) >= 0) | |
238 | free_and_replace(c->capeff, t); | |
239 | } | |
240 | ||
241 | static int client_context_read_label( | |
242 | ClientContext *c, | |
243 | const char *label, size_t label_size) { | |
244 | ||
245 | assert(c); | |
246 | assert(pid_is_valid(c->pid)); | |
247 | assert(label_size == 0 || label); | |
248 | ||
249 | if (label_size > 0) { | |
250 | char *l; | |
251 | ||
252 | /* If we got an SELinux label passed in it counts. */ | |
253 | ||
254 | l = newdup_suffix0(char, label, label_size); | |
255 | if (!l) | |
256 | return -ENOMEM; | |
257 | ||
258 | free_and_replace(c->label, l); | |
259 | c->label_size = label_size; | |
260 | } | |
349cc4a5 | 261 | #if HAVE_SELINUX |
22e3a02b LP |
262 | else { |
263 | char *con; | |
264 | ||
265 | /* If we got no SELinux label passed in, let's try to acquire one */ | |
266 | ||
267 | if (getpidcon(c->pid, &con) >= 0) { | |
268 | free_and_replace(c->label, con); | |
269 | c->label_size = strlen(c->label); | |
270 | } | |
271 | } | |
272 | #endif | |
273 | ||
274 | return 0; | |
275 | } | |
276 | ||
277 | static int client_context_read_cgroup(Server *s, ClientContext *c, const char *unit_id) { | |
ef30f7ca | 278 | _cleanup_free_ char *t = NULL; |
22e3a02b LP |
279 | int r; |
280 | ||
281 | assert(c); | |
282 | ||
283 | /* Try to acquire the current cgroup path */ | |
284 | r = cg_pid_get_path_shifted(c->pid, s->cgroup_root, &t); | |
672773b6 | 285 | if (r < 0 || empty_or_root(t)) { |
672773b6 MS |
286 | /* We use the unit ID passed in as fallback if we have nothing cached yet and cg_pid_get_path_shifted() |
287 | * failed or process is running in a root cgroup. Zombie processes are automatically migrated to root cgroup | |
4e1dfa45 | 288 | * on cgroup v1 and we want to be able to map log messages from them too. */ |
22e3a02b LP |
289 | if (unit_id && !c->unit) { |
290 | c->unit = strdup(unit_id); | |
291 | if (c->unit) | |
292 | return 0; | |
293 | } | |
294 | ||
295 | return r; | |
296 | } | |
297 | ||
298 | /* Let's shortcut this if the cgroup path didn't change */ | |
ef30f7ca | 299 | if (streq_ptr(c->cgroup, t)) |
22e3a02b | 300 | return 0; |
22e3a02b LP |
301 | |
302 | free_and_replace(c->cgroup, t); | |
303 | ||
304 | (void) cg_path_get_session(c->cgroup, &t); | |
305 | free_and_replace(c->session, t); | |
306 | ||
307 | if (cg_path_get_owner_uid(c->cgroup, &c->owner_uid) < 0) | |
308 | c->owner_uid = UID_INVALID; | |
309 | ||
310 | (void) cg_path_get_unit(c->cgroup, &t); | |
311 | free_and_replace(c->unit, t); | |
312 | ||
313 | (void) cg_path_get_user_unit(c->cgroup, &t); | |
314 | free_and_replace(c->user_unit, t); | |
315 | ||
316 | (void) cg_path_get_slice(c->cgroup, &t); | |
317 | free_and_replace(c->slice, t); | |
318 | ||
319 | (void) cg_path_get_user_slice(c->cgroup, &t); | |
320 | free_and_replace(c->user_slice, t); | |
321 | ||
322 | return 0; | |
323 | } | |
324 | ||
325 | static int client_context_read_invocation_id( | |
326 | Server *s, | |
327 | ClientContext *c) { | |
328 | ||
2f8c48b6 | 329 | _cleanup_free_ char *p = NULL, *value = NULL; |
22e3a02b LP |
330 | int r; |
331 | ||
332 | assert(s); | |
333 | assert(c); | |
334 | ||
2f8c48b6 AZ |
335 | /* Read the invocation ID of a unit off a unit. |
336 | * PID 1 stores it in a per-unit symlink in /run/systemd/units/ | |
337 | * User managers store it in a per-unit symlink under /run/user/<uid>/systemd/units/ */ | |
22e3a02b | 338 | |
d3070fbd | 339 | if (!c->unit) |
22e3a02b LP |
340 | return 0; |
341 | ||
2f8c48b6 AZ |
342 | if (c->user_unit) { |
343 | r = asprintf(&p, "/run/user/" UID_FMT "/systemd/units/invocation:%s", c->owner_uid, c->user_unit); | |
344 | if (r < 0) | |
345 | return r; | |
346 | } else { | |
347 | p = strjoin("/run/systemd/units/invocation:", c->unit); | |
348 | if (!p) | |
349 | return -ENOMEM; | |
350 | } | |
351 | ||
d3070fbd | 352 | r = readlink_malloc(p, &value); |
22e3a02b LP |
353 | if (r < 0) |
354 | return r; | |
355 | ||
d3070fbd LP |
356 | return sd_id128_from_string(value, &c->invocation_id); |
357 | } | |
22e3a02b | 358 | |
d3070fbd LP |
359 | static int client_context_read_log_level_max( |
360 | Server *s, | |
361 | ClientContext *c) { | |
22e3a02b | 362 | |
d3070fbd LP |
363 | _cleanup_free_ char *value = NULL; |
364 | const char *p; | |
365 | int r, ll; | |
366 | ||
367 | if (!c->unit) | |
368 | return 0; | |
369 | ||
370 | p = strjoina("/run/systemd/units/log-level-max:", c->unit); | |
371 | r = readlink_malloc(p, &value); | |
22e3a02b LP |
372 | if (r < 0) |
373 | return r; | |
d3070fbd LP |
374 | |
375 | ll = log_level_from_string(value); | |
376 | if (ll < 0) | |
22e3a02b | 377 | return -EINVAL; |
22e3a02b | 378 | |
d3070fbd LP |
379 | c->log_level_max = ll; |
380 | return 0; | |
381 | } | |
382 | ||
383 | static int client_context_read_extra_fields( | |
384 | Server *s, | |
385 | ClientContext *c) { | |
386 | ||
387 | size_t size = 0, n_iovec = 0, n_allocated = 0, left; | |
388 | _cleanup_free_ struct iovec *iovec = NULL; | |
389 | _cleanup_free_ void *data = NULL; | |
390 | _cleanup_fclose_ FILE *f = NULL; | |
391 | struct stat st; | |
392 | const char *p; | |
393 | uint8_t *q; | |
394 | int r; | |
395 | ||
396 | if (!c->unit) | |
397 | return 0; | |
398 | ||
399 | p = strjoina("/run/systemd/units/log-extra-fields:", c->unit); | |
400 | ||
401 | if (c->extra_fields_mtime != NSEC_INFINITY) { | |
402 | if (stat(p, &st) < 0) { | |
403 | if (errno == ENOENT) | |
404 | return 0; | |
405 | ||
406 | return -errno; | |
407 | } | |
408 | ||
409 | if (timespec_load_nsec(&st.st_mtim) == c->extra_fields_mtime) | |
410 | return 0; | |
411 | } | |
412 | ||
413 | f = fopen(p, "re"); | |
414 | if (!f) { | |
415 | if (errno == ENOENT) | |
416 | return 0; | |
417 | ||
418 | return -errno; | |
419 | } | |
420 | ||
421 | if (fstat(fileno(f), &st) < 0) /* The file might have been replaced since the stat() above, let's get a new | |
422 | * one, that matches the stuff we are reading */ | |
423 | return -errno; | |
424 | ||
425 | r = read_full_stream(f, (char**) &data, &size); | |
426 | if (r < 0) | |
427 | return r; | |
428 | ||
429 | q = data, left = size; | |
430 | while (left > 0) { | |
431 | uint8_t *field, *eq; | |
432 | uint64_t v, n; | |
433 | ||
434 | if (left < sizeof(uint64_t)) | |
435 | return -EBADMSG; | |
436 | ||
437 | v = unaligned_read_le64(q); | |
438 | if (v < 2) | |
439 | return -EBADMSG; | |
440 | ||
441 | n = sizeof(uint64_t) + v; | |
442 | if (left < n) | |
443 | return -EBADMSG; | |
444 | ||
445 | field = q + sizeof(uint64_t); | |
446 | ||
447 | eq = memchr(field, '=', v); | |
448 | if (!eq) | |
449 | return -EBADMSG; | |
450 | ||
451 | if (!journal_field_valid((const char *) field, eq - field, false)) | |
452 | return -EBADMSG; | |
453 | ||
454 | if (!GREEDY_REALLOC(iovec, n_allocated, n_iovec+1)) | |
455 | return -ENOMEM; | |
456 | ||
457 | iovec[n_iovec++] = IOVEC_MAKE(field, v); | |
458 | ||
459 | left -= n, q += n; | |
460 | } | |
461 | ||
462 | free(c->extra_fields_iovec); | |
463 | free(c->extra_fields_data); | |
464 | ||
1cc6c93a | 465 | c->extra_fields_iovec = TAKE_PTR(iovec); |
d3070fbd | 466 | c->extra_fields_n_iovec = n_iovec; |
1cc6c93a | 467 | c->extra_fields_data = TAKE_PTR(data); |
d3070fbd LP |
468 | c->extra_fields_mtime = timespec_load_nsec(&st.st_mtim); |
469 | ||
d3070fbd | 470 | return 0; |
22e3a02b LP |
471 | } |
472 | ||
5ac1530e | 473 | static int client_context_read_log_ratelimit_interval(ClientContext *c) { |
90fc172e AZ |
474 | _cleanup_free_ char *value = NULL; |
475 | const char *p; | |
476 | int r; | |
477 | ||
478 | assert(c); | |
479 | ||
480 | if (!c->unit) | |
481 | return 0; | |
482 | ||
483 | p = strjoina("/run/systemd/units/log-rate-limit-interval:", c->unit); | |
484 | r = readlink_malloc(p, &value); | |
485 | if (r < 0) | |
486 | return r; | |
487 | ||
5ac1530e | 488 | return safe_atou64(value, &c->log_ratelimit_interval); |
90fc172e AZ |
489 | } |
490 | ||
5ac1530e | 491 | static int client_context_read_log_ratelimit_burst(ClientContext *c) { |
90fc172e AZ |
492 | _cleanup_free_ char *value = NULL; |
493 | const char *p; | |
494 | int r; | |
495 | ||
496 | assert(c); | |
497 | ||
498 | if (!c->unit) | |
499 | return 0; | |
500 | ||
501 | p = strjoina("/run/systemd/units/log-rate-limit-burst:", c->unit); | |
502 | r = readlink_malloc(p, &value); | |
503 | if (r < 0) | |
504 | return r; | |
505 | ||
5ac1530e | 506 | return safe_atou(value, &c->log_ratelimit_burst); |
90fc172e AZ |
507 | } |
508 | ||
22e3a02b LP |
509 | static void client_context_really_refresh( |
510 | Server *s, | |
511 | ClientContext *c, | |
512 | const struct ucred *ucred, | |
513 | const char *label, size_t label_size, | |
514 | const char *unit_id, | |
515 | usec_t timestamp) { | |
516 | ||
517 | assert(s); | |
518 | assert(c); | |
519 | assert(pid_is_valid(c->pid)); | |
520 | ||
521 | if (timestamp == USEC_INFINITY) | |
522 | timestamp = now(CLOCK_MONOTONIC); | |
523 | ||
524 | client_context_read_uid_gid(c, ucred); | |
525 | client_context_read_basic(c); | |
526 | (void) client_context_read_label(c, label, label_size); | |
527 | ||
528 | (void) audit_session_from_pid(c->pid, &c->auditid); | |
529 | (void) audit_loginuid_from_pid(c->pid, &c->loginuid); | |
530 | ||
531 | (void) client_context_read_cgroup(s, c, unit_id); | |
532 | (void) client_context_read_invocation_id(s, c); | |
d3070fbd LP |
533 | (void) client_context_read_log_level_max(s, c); |
534 | (void) client_context_read_extra_fields(s, c); | |
5ac1530e ZJS |
535 | (void) client_context_read_log_ratelimit_interval(c); |
536 | (void) client_context_read_log_ratelimit_burst(c); | |
22e3a02b LP |
537 | |
538 | c->timestamp = timestamp; | |
539 | ||
540 | if (c->in_lru) { | |
541 | assert(c->n_ref == 0); | |
542 | assert_se(prioq_reshuffle(s->client_contexts_lru, c, &c->lru_index) >= 0); | |
543 | } | |
544 | } | |
545 | ||
546 | void client_context_maybe_refresh( | |
547 | Server *s, | |
548 | ClientContext *c, | |
549 | const struct ucred *ucred, | |
550 | const char *label, size_t label_size, | |
551 | const char *unit_id, | |
552 | usec_t timestamp) { | |
553 | ||
554 | assert(s); | |
555 | assert(c); | |
556 | ||
557 | if (timestamp == USEC_INFINITY) | |
558 | timestamp = now(CLOCK_MONOTONIC); | |
559 | ||
560 | /* No cached data so far? Let's fill it up */ | |
561 | if (c->timestamp == USEC_INFINITY) | |
562 | goto refresh; | |
563 | ||
564 | /* If the data isn't pinned and if the cashed data is older than the upper limit, we flush it out | |
565 | * entirely. This follows the logic that as long as an entry is pinned the PID reuse is unlikely. */ | |
566 | if (c->n_ref == 0 && c->timestamp + MAX_USEC < timestamp) { | |
90fc172e | 567 | client_context_reset(s, c); |
22e3a02b LP |
568 | goto refresh; |
569 | } | |
570 | ||
571 | /* If the data is older than the lower limit, we refresh, but keep the old data for all we can't update */ | |
572 | if (c->timestamp + REFRESH_USEC < timestamp) | |
573 | goto refresh; | |
574 | ||
575 | /* If the data passed along doesn't match the cached data we also do a refresh */ | |
576 | if (ucred && uid_is_valid(ucred->uid) && c->uid != ucred->uid) | |
577 | goto refresh; | |
578 | ||
579 | if (ucred && gid_is_valid(ucred->gid) && c->gid != ucred->gid) | |
580 | goto refresh; | |
581 | ||
582 | if (label_size > 0 && (label_size != c->label_size || memcmp(label, c->label, label_size) != 0)) | |
583 | goto refresh; | |
584 | ||
585 | return; | |
586 | ||
587 | refresh: | |
588 | client_context_really_refresh(s, c, ucred, label, label_size, unit_id, timestamp); | |
589 | } | |
590 | ||
591 | static void client_context_try_shrink_to(Server *s, size_t limit) { | |
91714a7f ZJS |
592 | ClientContext *c; |
593 | usec_t t; | |
594 | ||
22e3a02b LP |
595 | assert(s); |
596 | ||
91714a7f ZJS |
597 | /* Flush any cache entries for PIDs that have already moved on. Don't do this |
598 | * too often, since it's a slow process. */ | |
599 | t = now(CLOCK_MONOTONIC); | |
600 | if (s->last_cache_pid_flush + MAX_USEC < t) { | |
601 | unsigned n = prioq_size(s->client_contexts_lru), idx = 0; | |
602 | ||
603 | /* We do a number of iterations based on the initial size of the prioq. When we remove an | |
604 | * item, a new item is moved into its places, and items to the right might be reshuffled. | |
605 | */ | |
606 | for (unsigned i = 0; i < n; i++) { | |
607 | c = prioq_peek_by_index(s->client_contexts_lru, idx); | |
608 | ||
609 | assert(c->n_ref == 0); | |
610 | ||
611 | if (!pid_is_unwaited(c->pid)) | |
612 | client_context_free(s, c); | |
613 | else | |
614 | idx ++; | |
615 | } | |
616 | ||
617 | s->last_cache_pid_flush = t; | |
618 | } | |
619 | ||
22e3a02b LP |
620 | /* Bring the number of cache entries below the indicated limit, so that we can create a new entry without |
621 | * breaching the limit. Note that we only flush out entries that aren't pinned here. This means the number of | |
622 | * cache entries may very well grow beyond the limit, if all entries stored remain pinned. */ | |
623 | ||
624 | while (hashmap_size(s->client_contexts) > limit) { | |
22e3a02b LP |
625 | c = prioq_pop(s->client_contexts_lru); |
626 | if (!c) | |
627 | break; /* All remaining entries are pinned, give up */ | |
628 | ||
629 | assert(c->in_lru); | |
630 | assert(c->n_ref == 0); | |
631 | ||
632 | c->in_lru = false; | |
633 | ||
634 | client_context_free(s, c); | |
635 | } | |
636 | } | |
637 | ||
638 | void client_context_flush_all(Server *s) { | |
639 | assert(s); | |
640 | ||
641 | /* Flush out all remaining entries. This assumes all references are already dropped. */ | |
642 | ||
643 | s->my_context = client_context_release(s, s->my_context); | |
644 | s->pid1_context = client_context_release(s, s->pid1_context); | |
645 | ||
646 | client_context_try_shrink_to(s, 0); | |
647 | ||
648 | assert(prioq_size(s->client_contexts_lru) == 0); | |
649 | assert(hashmap_size(s->client_contexts) == 0); | |
650 | ||
651 | s->client_contexts_lru = prioq_free(s->client_contexts_lru); | |
652 | s->client_contexts = hashmap_free(s->client_contexts); | |
653 | } | |
654 | ||
655 | static int client_context_get_internal( | |
656 | Server *s, | |
657 | pid_t pid, | |
658 | const struct ucred *ucred, | |
659 | const char *label, size_t label_len, | |
660 | const char *unit_id, | |
661 | bool add_ref, | |
662 | ClientContext **ret) { | |
663 | ||
664 | ClientContext *c; | |
665 | int r; | |
666 | ||
667 | assert(s); | |
668 | assert(ret); | |
669 | ||
670 | if (!pid_is_valid(pid)) | |
671 | return -EINVAL; | |
672 | ||
673 | c = hashmap_get(s->client_contexts, PID_TO_PTR(pid)); | |
674 | if (c) { | |
675 | ||
676 | if (add_ref) { | |
677 | if (c->in_lru) { | |
678 | /* The entry wasn't pinned so far, let's remove it from the LRU list then */ | |
679 | assert(c->n_ref == 0); | |
680 | assert_se(prioq_remove(s->client_contexts_lru, c, &c->lru_index) >= 0); | |
681 | c->in_lru = false; | |
682 | } | |
683 | ||
684 | c->n_ref++; | |
685 | } | |
686 | ||
687 | client_context_maybe_refresh(s, c, ucred, label, label_len, unit_id, USEC_INFINITY); | |
688 | ||
689 | *ret = c; | |
690 | return 0; | |
691 | } | |
692 | ||
b12a4808 | 693 | client_context_try_shrink_to(s, cache_max()-1); |
22e3a02b LP |
694 | |
695 | r = client_context_new(s, pid, &c); | |
696 | if (r < 0) | |
697 | return r; | |
698 | ||
699 | if (add_ref) | |
700 | c->n_ref++; | |
701 | else { | |
702 | r = prioq_put(s->client_contexts_lru, c, &c->lru_index); | |
703 | if (r < 0) { | |
704 | client_context_free(s, c); | |
705 | return r; | |
706 | } | |
707 | ||
708 | c->in_lru = true; | |
709 | } | |
710 | ||
711 | client_context_really_refresh(s, c, ucred, label, label_len, unit_id, USEC_INFINITY); | |
712 | ||
713 | *ret = c; | |
714 | return 0; | |
715 | } | |
716 | ||
717 | int client_context_get( | |
718 | Server *s, | |
719 | pid_t pid, | |
720 | const struct ucred *ucred, | |
721 | const char *label, size_t label_len, | |
722 | const char *unit_id, | |
723 | ClientContext **ret) { | |
724 | ||
725 | return client_context_get_internal(s, pid, ucred, label, label_len, unit_id, false, ret); | |
726 | } | |
727 | ||
728 | int client_context_acquire( | |
729 | Server *s, | |
730 | pid_t pid, | |
731 | const struct ucred *ucred, | |
732 | const char *label, size_t label_len, | |
733 | const char *unit_id, | |
734 | ClientContext **ret) { | |
735 | ||
736 | return client_context_get_internal(s, pid, ucred, label, label_len, unit_id, true, ret); | |
737 | }; | |
738 | ||
739 | ClientContext *client_context_release(Server *s, ClientContext *c) { | |
740 | assert(s); | |
741 | ||
742 | if (!c) | |
743 | return NULL; | |
744 | ||
745 | assert(c->n_ref > 0); | |
746 | assert(!c->in_lru); | |
747 | ||
748 | c->n_ref--; | |
749 | if (c->n_ref > 0) | |
750 | return NULL; | |
751 | ||
752 | /* The entry is not pinned anymore, let's add it to the LRU prioq if we can. If we can't we'll drop it | |
753 | * right-away */ | |
754 | ||
755 | if (prioq_put(s->client_contexts_lru, c, &c->lru_index) < 0) | |
756 | client_context_free(s, c); | |
757 | else | |
758 | c->in_lru = true; | |
759 | ||
760 | return NULL; | |
761 | } | |
762 | ||
763 | void client_context_acquire_default(Server *s) { | |
764 | int r; | |
765 | ||
766 | assert(s); | |
767 | ||
768 | /* Ensure that our own and PID1's contexts are always pinned. Our own context is particularly useful to | |
769 | * generate driver messages. */ | |
770 | ||
771 | if (!s->my_context) { | |
772 | struct ucred ucred = { | |
773 | .pid = getpid_cached(), | |
774 | .uid = getuid(), | |
775 | .gid = getgid(), | |
776 | }; | |
777 | ||
778 | r = client_context_acquire(s, ucred.pid, &ucred, NULL, 0, NULL, &s->my_context); | |
779 | if (r < 0) | |
780 | log_warning_errno(r, "Failed to acquire our own context, ignoring: %m"); | |
781 | } | |
782 | ||
b1852c48 LP |
783 | if (!s->namespace && !s->pid1_context) { |
784 | /* Acquire PID1's context, but only if we are in non-namespaced mode, since PID 1 is only | |
785 | * going to log to the non-namespaced journal instance. */ | |
22e3a02b LP |
786 | |
787 | r = client_context_acquire(s, 1, NULL, NULL, 0, NULL, &s->pid1_context); | |
788 | if (r < 0) | |
789 | log_warning_errno(r, "Failed to acquire PID1's context, ignoring: %m"); | |
790 | ||
791 | } | |
792 | } |