]>
Commit | Line | Data |
---|---|---|
53e1b683 | 1 | /* SPDX-License-Identifier: LGPL-2.1+ */ |
22e3a02b | 2 | |
349cc4a5 | 3 | #if HAVE_SELINUX |
22e3a02b LP |
4 | #include <selinux/selinux.h> |
5 | #endif | |
6 | ||
7 | #include "alloc-util.h" | |
8 | #include "audit-util.h" | |
9 | #include "cgroup-util.h" | |
d3070fbd LP |
10 | #include "fd-util.h" |
11 | #include "fileio.h" | |
12 | #include "fs-util.h" | |
13 | #include "io-util.h" | |
14 | #include "journal-util.h" | |
22e3a02b | 15 | #include "journald-context.h" |
90fc172e | 16 | #include "parse-util.h" |
672773b6 | 17 | #include "path-util.h" |
22e3a02b | 18 | #include "process-util.h" |
b12a4808 | 19 | #include "procfs-util.h" |
22e3a02b | 20 | #include "string-util.h" |
d3070fbd LP |
21 | #include "syslog-util.h" |
22 | #include "unaligned.h" | |
22e3a02b LP |
23 | #include "user-util.h" |
24 | ||
25 | /* This implements a metadata cache for clients, which are identified by their PID. Requesting metadata through /proc | |
26 | * is expensive, hence let's cache the data if we can. Note that this means the metadata might be out-of-date when we | |
27 | * store it, but it might already be anyway, as we request the data asynchronously from /proc at a different time the | |
28 | * log entry was originally created. We hence just increase the "window of inaccuracy" a bit. | |
29 | * | |
30 | * The cache is indexed by the PID. Entries may be "pinned" in the cache, in which case the entries are not removed | |
31 | * until they are unpinned. Unpinned entries are kept around until cache pressure is seen. Cache entries older than 5s | |
32 | * are never used (a sad attempt to deal with the UNIX weakness of PIDs reuse), cache entries older than 1s are | |
33 | * refreshed in an incremental way (meaning: data is reread from /proc, but any old data we can't refresh is not | |
34 | * flushed out). Data newer than 1s is used immediately without refresh. | |
35 | * | |
36 | * Log stream clients (i.e. all clients using the AF_UNIX/SOCK_STREAM stdout/stderr transport) will pin a cache entry | |
37 | * as long as their socket is connected. Note that cache entries are shared between different transports. That means a | |
38 | * cache entry pinned for the stream connection logic may be reused for the syslog or native protocols. | |
39 | * | |
40 | * Caching metadata like this has two major benefits: | |
41 | * | |
42 | * 1. Reading metadata is expensive, and we can thus substantially speed up log processing under flood. | |
43 | * | |
44 | * 2. Because metadata caching is shared between stream and datagram transports and stream connections pin a cache | |
45 | * entry there's a good chance we can properly map a substantial set of datagram log messages to their originating | |
46 | * service, as all services (unless explicitly configured otherwise) will have their stdout/stderr connected to a | |
47 | * stream connection. This should improve cases where a service process logs immediately before exiting and we | |
48 | * previously had trouble associating the log message with the service. | |
49 | * | |
50 | * NB: With and without the metadata cache: the implicitly added entry metadata in the journal (with the exception of | |
51 | * UID/PID/GID and SELinux label) must be understood as possibly slightly out of sync (i.e. sometimes slighly older | |
52 | * and sometimes slightly newer than what was current at the log event). | |
53 | */ | |
54 | ||
55 | /* We refresh every 1s */ | |
56 | #define REFRESH_USEC (1*USEC_PER_SEC) | |
57 | ||
58 | /* Data older than 5s we flush out */ | |
59 | #define MAX_USEC (5*USEC_PER_SEC) | |
60 | ||
61 | /* Keep at most 16K entries in the cache. (Note though that this limit may be violated if enough streams pin entries in | |
62 | * the cache, in which case we *do* permit this limit to be breached. That's safe however, as the number of stream | |
63 | * clients itself is limited.) */ | |
b12a4808 ZJS |
64 | #define CACHE_MAX_FALLBACK 128U |
65 | #define CACHE_MAX_MAX (16*1024U) | |
66 | #define CACHE_MAX_MIN 64U | |
67 | ||
68 | static size_t cache_max(void) { | |
69 | static size_t cached = -1; | |
70 | ||
71 | if (cached == (size_t) -1) { | |
72 | uint64_t mem_total; | |
73 | int r; | |
74 | ||
75 | r = procfs_memory_get(&mem_total, NULL); | |
76 | if (r < 0) { | |
77 | log_warning_errno(r, "Cannot query /proc/meminfo for MemTotal: %m"); | |
78 | cached = CACHE_MAX_FALLBACK; | |
79 | } else { | |
80 | /* Cache entries are usually a few kB, but the process cmdline is controlled by the | |
81 | * user and can be up to _SC_ARG_MAX, usually 2MB. Let's say that approximately up to | |
82 | * 1/8th of memory may be used by the cache. | |
83 | * | |
84 | * In the common case, this formula gives 64 cache entries for each GB of RAM. | |
85 | */ | |
86 | long l = sysconf(_SC_ARG_MAX); | |
87 | assert(l > 0); | |
88 | ||
89 | cached = CLAMP(mem_total / 8 / (uint64_t) l, CACHE_MAX_MIN, CACHE_MAX_MAX); | |
90 | } | |
91 | } | |
92 | ||
93 | return cached; | |
94 | } | |
22e3a02b LP |
95 | |
96 | static int client_context_compare(const void *a, const void *b) { | |
97 | const ClientContext *x = a, *y = b; | |
9c57a73b | 98 | int r; |
22e3a02b | 99 | |
9c57a73b YW |
100 | r = CMP(x->timestamp, y->timestamp); |
101 | if (r != 0) | |
102 | return r; | |
22e3a02b | 103 | |
9c57a73b | 104 | return CMP(x->pid, y->pid); |
22e3a02b LP |
105 | } |
106 | ||
107 | static int client_context_new(Server *s, pid_t pid, ClientContext **ret) { | |
108 | ClientContext *c; | |
109 | int r; | |
110 | ||
111 | assert(s); | |
112 | assert(pid_is_valid(pid)); | |
113 | assert(ret); | |
114 | ||
115 | r = hashmap_ensure_allocated(&s->client_contexts, NULL); | |
116 | if (r < 0) | |
117 | return r; | |
118 | ||
119 | r = prioq_ensure_allocated(&s->client_contexts_lru, client_context_compare); | |
120 | if (r < 0) | |
121 | return r; | |
122 | ||
123 | c = new0(ClientContext, 1); | |
124 | if (!c) | |
125 | return -ENOMEM; | |
126 | ||
127 | c->pid = pid; | |
128 | ||
129 | c->uid = UID_INVALID; | |
130 | c->gid = GID_INVALID; | |
131 | c->auditid = AUDIT_SESSION_INVALID; | |
132 | c->loginuid = UID_INVALID; | |
133 | c->owner_uid = UID_INVALID; | |
134 | c->lru_index = PRIOQ_IDX_NULL; | |
135 | c->timestamp = USEC_INFINITY; | |
d3070fbd LP |
136 | c->extra_fields_mtime = NSEC_INFINITY; |
137 | c->log_level_max = -1; | |
90fc172e AZ |
138 | c->log_rate_limit_interval = s->rate_limit_interval; |
139 | c->log_rate_limit_burst = s->rate_limit_burst; | |
22e3a02b LP |
140 | |
141 | r = hashmap_put(s->client_contexts, PID_TO_PTR(pid), c); | |
142 | if (r < 0) { | |
143 | free(c); | |
144 | return r; | |
145 | } | |
146 | ||
147 | *ret = c; | |
148 | return 0; | |
149 | } | |
150 | ||
90fc172e AZ |
151 | static void client_context_reset(Server *s, ClientContext *c) { |
152 | assert(s); | |
22e3a02b LP |
153 | assert(c); |
154 | ||
155 | c->timestamp = USEC_INFINITY; | |
156 | ||
157 | c->uid = UID_INVALID; | |
158 | c->gid = GID_INVALID; | |
159 | ||
160 | c->comm = mfree(c->comm); | |
161 | c->exe = mfree(c->exe); | |
162 | c->cmdline = mfree(c->cmdline); | |
163 | c->capeff = mfree(c->capeff); | |
164 | ||
165 | c->auditid = AUDIT_SESSION_INVALID; | |
166 | c->loginuid = UID_INVALID; | |
167 | ||
168 | c->cgroup = mfree(c->cgroup); | |
169 | c->session = mfree(c->session); | |
170 | c->owner_uid = UID_INVALID; | |
171 | c->unit = mfree(c->unit); | |
172 | c->user_unit = mfree(c->user_unit); | |
173 | c->slice = mfree(c->slice); | |
174 | c->user_slice = mfree(c->user_slice); | |
175 | ||
176 | c->invocation_id = SD_ID128_NULL; | |
177 | ||
178 | c->label = mfree(c->label); | |
179 | c->label_size = 0; | |
d3070fbd LP |
180 | |
181 | c->extra_fields_iovec = mfree(c->extra_fields_iovec); | |
182 | c->extra_fields_n_iovec = 0; | |
183 | c->extra_fields_data = mfree(c->extra_fields_data); | |
184 | c->extra_fields_mtime = NSEC_INFINITY; | |
185 | ||
186 | c->log_level_max = -1; | |
90fc172e AZ |
187 | |
188 | c->log_rate_limit_interval = s->rate_limit_interval; | |
189 | c->log_rate_limit_burst = s->rate_limit_burst; | |
22e3a02b LP |
190 | } |
191 | ||
192 | static ClientContext* client_context_free(Server *s, ClientContext *c) { | |
193 | assert(s); | |
194 | ||
195 | if (!c) | |
196 | return NULL; | |
197 | ||
198 | assert_se(hashmap_remove(s->client_contexts, PID_TO_PTR(c->pid)) == c); | |
199 | ||
200 | if (c->in_lru) | |
201 | assert_se(prioq_remove(s->client_contexts_lru, c, &c->lru_index) >= 0); | |
202 | ||
90fc172e | 203 | client_context_reset(s, c); |
22e3a02b LP |
204 | |
205 | return mfree(c); | |
206 | } | |
207 | ||
208 | static void client_context_read_uid_gid(ClientContext *c, const struct ucred *ucred) { | |
209 | assert(c); | |
210 | assert(pid_is_valid(c->pid)); | |
211 | ||
212 | /* The ucred data passed in is always the most current and accurate, if we have any. Use it. */ | |
213 | if (ucred && uid_is_valid(ucred->uid)) | |
214 | c->uid = ucred->uid; | |
215 | else | |
216 | (void) get_process_uid(c->pid, &c->uid); | |
217 | ||
218 | if (ucred && gid_is_valid(ucred->gid)) | |
219 | c->gid = ucred->gid; | |
220 | else | |
221 | (void) get_process_gid(c->pid, &c->gid); | |
222 | } | |
223 | ||
224 | static void client_context_read_basic(ClientContext *c) { | |
225 | char *t; | |
226 | ||
227 | assert(c); | |
228 | assert(pid_is_valid(c->pid)); | |
229 | ||
230 | if (get_process_comm(c->pid, &t) >= 0) | |
231 | free_and_replace(c->comm, t); | |
232 | ||
233 | if (get_process_exe(c->pid, &t) >= 0) | |
234 | free_and_replace(c->exe, t); | |
235 | ||
236 | if (get_process_cmdline(c->pid, 0, false, &t) >= 0) | |
237 | free_and_replace(c->cmdline, t); | |
238 | ||
239 | if (get_process_capeff(c->pid, &t) >= 0) | |
240 | free_and_replace(c->capeff, t); | |
241 | } | |
242 | ||
243 | static int client_context_read_label( | |
244 | ClientContext *c, | |
245 | const char *label, size_t label_size) { | |
246 | ||
247 | assert(c); | |
248 | assert(pid_is_valid(c->pid)); | |
249 | assert(label_size == 0 || label); | |
250 | ||
251 | if (label_size > 0) { | |
252 | char *l; | |
253 | ||
254 | /* If we got an SELinux label passed in it counts. */ | |
255 | ||
256 | l = newdup_suffix0(char, label, label_size); | |
257 | if (!l) | |
258 | return -ENOMEM; | |
259 | ||
260 | free_and_replace(c->label, l); | |
261 | c->label_size = label_size; | |
262 | } | |
349cc4a5 | 263 | #if HAVE_SELINUX |
22e3a02b LP |
264 | else { |
265 | char *con; | |
266 | ||
267 | /* If we got no SELinux label passed in, let's try to acquire one */ | |
268 | ||
269 | if (getpidcon(c->pid, &con) >= 0) { | |
270 | free_and_replace(c->label, con); | |
271 | c->label_size = strlen(c->label); | |
272 | } | |
273 | } | |
274 | #endif | |
275 | ||
276 | return 0; | |
277 | } | |
278 | ||
279 | static int client_context_read_cgroup(Server *s, ClientContext *c, const char *unit_id) { | |
ef30f7ca | 280 | _cleanup_free_ char *t = NULL; |
22e3a02b LP |
281 | int r; |
282 | ||
283 | assert(c); | |
284 | ||
285 | /* Try to acquire the current cgroup path */ | |
286 | r = cg_pid_get_path_shifted(c->pid, s->cgroup_root, &t); | |
672773b6 | 287 | if (r < 0 || empty_or_root(t)) { |
672773b6 MS |
288 | /* We use the unit ID passed in as fallback if we have nothing cached yet and cg_pid_get_path_shifted() |
289 | * failed or process is running in a root cgroup. Zombie processes are automatically migrated to root cgroup | |
4e1dfa45 | 290 | * on cgroup v1 and we want to be able to map log messages from them too. */ |
22e3a02b LP |
291 | if (unit_id && !c->unit) { |
292 | c->unit = strdup(unit_id); | |
293 | if (c->unit) | |
294 | return 0; | |
295 | } | |
296 | ||
297 | return r; | |
298 | } | |
299 | ||
300 | /* Let's shortcut this if the cgroup path didn't change */ | |
ef30f7ca | 301 | if (streq_ptr(c->cgroup, t)) |
22e3a02b | 302 | return 0; |
22e3a02b LP |
303 | |
304 | free_and_replace(c->cgroup, t); | |
305 | ||
306 | (void) cg_path_get_session(c->cgroup, &t); | |
307 | free_and_replace(c->session, t); | |
308 | ||
309 | if (cg_path_get_owner_uid(c->cgroup, &c->owner_uid) < 0) | |
310 | c->owner_uid = UID_INVALID; | |
311 | ||
312 | (void) cg_path_get_unit(c->cgroup, &t); | |
313 | free_and_replace(c->unit, t); | |
314 | ||
315 | (void) cg_path_get_user_unit(c->cgroup, &t); | |
316 | free_and_replace(c->user_unit, t); | |
317 | ||
318 | (void) cg_path_get_slice(c->cgroup, &t); | |
319 | free_and_replace(c->slice, t); | |
320 | ||
321 | (void) cg_path_get_user_slice(c->cgroup, &t); | |
322 | free_and_replace(c->user_slice, t); | |
323 | ||
324 | return 0; | |
325 | } | |
326 | ||
327 | static int client_context_read_invocation_id( | |
328 | Server *s, | |
329 | ClientContext *c) { | |
330 | ||
d3070fbd | 331 | _cleanup_free_ char *value = NULL; |
22e3a02b LP |
332 | const char *p; |
333 | int r; | |
334 | ||
335 | assert(s); | |
336 | assert(c); | |
337 | ||
d3070fbd | 338 | /* Read the invocation ID of a unit off a unit. PID 1 stores it in a per-unit symlink in /run/systemd/units/ */ |
22e3a02b | 339 | |
d3070fbd | 340 | if (!c->unit) |
22e3a02b LP |
341 | return 0; |
342 | ||
d3070fbd LP |
343 | p = strjoina("/run/systemd/units/invocation:", c->unit); |
344 | r = readlink_malloc(p, &value); | |
22e3a02b LP |
345 | if (r < 0) |
346 | return r; | |
347 | ||
d3070fbd LP |
348 | return sd_id128_from_string(value, &c->invocation_id); |
349 | } | |
22e3a02b | 350 | |
d3070fbd LP |
351 | static int client_context_read_log_level_max( |
352 | Server *s, | |
353 | ClientContext *c) { | |
22e3a02b | 354 | |
d3070fbd LP |
355 | _cleanup_free_ char *value = NULL; |
356 | const char *p; | |
357 | int r, ll; | |
358 | ||
359 | if (!c->unit) | |
360 | return 0; | |
361 | ||
362 | p = strjoina("/run/systemd/units/log-level-max:", c->unit); | |
363 | r = readlink_malloc(p, &value); | |
22e3a02b LP |
364 | if (r < 0) |
365 | return r; | |
d3070fbd LP |
366 | |
367 | ll = log_level_from_string(value); | |
368 | if (ll < 0) | |
22e3a02b | 369 | return -EINVAL; |
22e3a02b | 370 | |
d3070fbd LP |
371 | c->log_level_max = ll; |
372 | return 0; | |
373 | } | |
374 | ||
375 | static int client_context_read_extra_fields( | |
376 | Server *s, | |
377 | ClientContext *c) { | |
378 | ||
379 | size_t size = 0, n_iovec = 0, n_allocated = 0, left; | |
380 | _cleanup_free_ struct iovec *iovec = NULL; | |
381 | _cleanup_free_ void *data = NULL; | |
382 | _cleanup_fclose_ FILE *f = NULL; | |
383 | struct stat st; | |
384 | const char *p; | |
385 | uint8_t *q; | |
386 | int r; | |
387 | ||
388 | if (!c->unit) | |
389 | return 0; | |
390 | ||
391 | p = strjoina("/run/systemd/units/log-extra-fields:", c->unit); | |
392 | ||
393 | if (c->extra_fields_mtime != NSEC_INFINITY) { | |
394 | if (stat(p, &st) < 0) { | |
395 | if (errno == ENOENT) | |
396 | return 0; | |
397 | ||
398 | return -errno; | |
399 | } | |
400 | ||
401 | if (timespec_load_nsec(&st.st_mtim) == c->extra_fields_mtime) | |
402 | return 0; | |
403 | } | |
404 | ||
405 | f = fopen(p, "re"); | |
406 | if (!f) { | |
407 | if (errno == ENOENT) | |
408 | return 0; | |
409 | ||
410 | return -errno; | |
411 | } | |
412 | ||
413 | if (fstat(fileno(f), &st) < 0) /* The file might have been replaced since the stat() above, let's get a new | |
414 | * one, that matches the stuff we are reading */ | |
415 | return -errno; | |
416 | ||
417 | r = read_full_stream(f, (char**) &data, &size); | |
418 | if (r < 0) | |
419 | return r; | |
420 | ||
421 | q = data, left = size; | |
422 | while (left > 0) { | |
423 | uint8_t *field, *eq; | |
424 | uint64_t v, n; | |
425 | ||
426 | if (left < sizeof(uint64_t)) | |
427 | return -EBADMSG; | |
428 | ||
429 | v = unaligned_read_le64(q); | |
430 | if (v < 2) | |
431 | return -EBADMSG; | |
432 | ||
433 | n = sizeof(uint64_t) + v; | |
434 | if (left < n) | |
435 | return -EBADMSG; | |
436 | ||
437 | field = q + sizeof(uint64_t); | |
438 | ||
439 | eq = memchr(field, '=', v); | |
440 | if (!eq) | |
441 | return -EBADMSG; | |
442 | ||
443 | if (!journal_field_valid((const char *) field, eq - field, false)) | |
444 | return -EBADMSG; | |
445 | ||
446 | if (!GREEDY_REALLOC(iovec, n_allocated, n_iovec+1)) | |
447 | return -ENOMEM; | |
448 | ||
449 | iovec[n_iovec++] = IOVEC_MAKE(field, v); | |
450 | ||
451 | left -= n, q += n; | |
452 | } | |
453 | ||
454 | free(c->extra_fields_iovec); | |
455 | free(c->extra_fields_data); | |
456 | ||
1cc6c93a | 457 | c->extra_fields_iovec = TAKE_PTR(iovec); |
d3070fbd | 458 | c->extra_fields_n_iovec = n_iovec; |
1cc6c93a | 459 | c->extra_fields_data = TAKE_PTR(data); |
d3070fbd LP |
460 | c->extra_fields_mtime = timespec_load_nsec(&st.st_mtim); |
461 | ||
d3070fbd | 462 | return 0; |
22e3a02b LP |
463 | } |
464 | ||
90fc172e AZ |
465 | static int client_context_read_log_rate_limit_interval(ClientContext *c) { |
466 | _cleanup_free_ char *value = NULL; | |
467 | const char *p; | |
468 | int r; | |
469 | ||
470 | assert(c); | |
471 | ||
472 | if (!c->unit) | |
473 | return 0; | |
474 | ||
475 | p = strjoina("/run/systemd/units/log-rate-limit-interval:", c->unit); | |
476 | r = readlink_malloc(p, &value); | |
477 | if (r < 0) | |
478 | return r; | |
479 | ||
480 | return safe_atou64(value, &c->log_rate_limit_interval); | |
481 | } | |
482 | ||
483 | static int client_context_read_log_rate_limit_burst(ClientContext *c) { | |
484 | _cleanup_free_ char *value = NULL; | |
485 | const char *p; | |
486 | int r; | |
487 | ||
488 | assert(c); | |
489 | ||
490 | if (!c->unit) | |
491 | return 0; | |
492 | ||
493 | p = strjoina("/run/systemd/units/log-rate-limit-burst:", c->unit); | |
494 | r = readlink_malloc(p, &value); | |
495 | if (r < 0) | |
496 | return r; | |
497 | ||
498 | return safe_atou(value, &c->log_rate_limit_burst); | |
499 | } | |
500 | ||
22e3a02b LP |
501 | static void client_context_really_refresh( |
502 | Server *s, | |
503 | ClientContext *c, | |
504 | const struct ucred *ucred, | |
505 | const char *label, size_t label_size, | |
506 | const char *unit_id, | |
507 | usec_t timestamp) { | |
508 | ||
509 | assert(s); | |
510 | assert(c); | |
511 | assert(pid_is_valid(c->pid)); | |
512 | ||
513 | if (timestamp == USEC_INFINITY) | |
514 | timestamp = now(CLOCK_MONOTONIC); | |
515 | ||
516 | client_context_read_uid_gid(c, ucred); | |
517 | client_context_read_basic(c); | |
518 | (void) client_context_read_label(c, label, label_size); | |
519 | ||
520 | (void) audit_session_from_pid(c->pid, &c->auditid); | |
521 | (void) audit_loginuid_from_pid(c->pid, &c->loginuid); | |
522 | ||
523 | (void) client_context_read_cgroup(s, c, unit_id); | |
524 | (void) client_context_read_invocation_id(s, c); | |
d3070fbd LP |
525 | (void) client_context_read_log_level_max(s, c); |
526 | (void) client_context_read_extra_fields(s, c); | |
90fc172e AZ |
527 | (void) client_context_read_log_rate_limit_interval(c); |
528 | (void) client_context_read_log_rate_limit_burst(c); | |
22e3a02b LP |
529 | |
530 | c->timestamp = timestamp; | |
531 | ||
532 | if (c->in_lru) { | |
533 | assert(c->n_ref == 0); | |
534 | assert_se(prioq_reshuffle(s->client_contexts_lru, c, &c->lru_index) >= 0); | |
535 | } | |
536 | } | |
537 | ||
538 | void client_context_maybe_refresh( | |
539 | Server *s, | |
540 | ClientContext *c, | |
541 | const struct ucred *ucred, | |
542 | const char *label, size_t label_size, | |
543 | const char *unit_id, | |
544 | usec_t timestamp) { | |
545 | ||
546 | assert(s); | |
547 | assert(c); | |
548 | ||
549 | if (timestamp == USEC_INFINITY) | |
550 | timestamp = now(CLOCK_MONOTONIC); | |
551 | ||
552 | /* No cached data so far? Let's fill it up */ | |
553 | if (c->timestamp == USEC_INFINITY) | |
554 | goto refresh; | |
555 | ||
556 | /* If the data isn't pinned and if the cashed data is older than the upper limit, we flush it out | |
557 | * entirely. This follows the logic that as long as an entry is pinned the PID reuse is unlikely. */ | |
558 | if (c->n_ref == 0 && c->timestamp + MAX_USEC < timestamp) { | |
90fc172e | 559 | client_context_reset(s, c); |
22e3a02b LP |
560 | goto refresh; |
561 | } | |
562 | ||
563 | /* If the data is older than the lower limit, we refresh, but keep the old data for all we can't update */ | |
564 | if (c->timestamp + REFRESH_USEC < timestamp) | |
565 | goto refresh; | |
566 | ||
567 | /* If the data passed along doesn't match the cached data we also do a refresh */ | |
568 | if (ucred && uid_is_valid(ucred->uid) && c->uid != ucred->uid) | |
569 | goto refresh; | |
570 | ||
571 | if (ucred && gid_is_valid(ucred->gid) && c->gid != ucred->gid) | |
572 | goto refresh; | |
573 | ||
574 | if (label_size > 0 && (label_size != c->label_size || memcmp(label, c->label, label_size) != 0)) | |
575 | goto refresh; | |
576 | ||
577 | return; | |
578 | ||
579 | refresh: | |
580 | client_context_really_refresh(s, c, ucred, label, label_size, unit_id, timestamp); | |
581 | } | |
582 | ||
583 | static void client_context_try_shrink_to(Server *s, size_t limit) { | |
91714a7f ZJS |
584 | ClientContext *c; |
585 | usec_t t; | |
586 | ||
22e3a02b LP |
587 | assert(s); |
588 | ||
91714a7f ZJS |
589 | /* Flush any cache entries for PIDs that have already moved on. Don't do this |
590 | * too often, since it's a slow process. */ | |
591 | t = now(CLOCK_MONOTONIC); | |
592 | if (s->last_cache_pid_flush + MAX_USEC < t) { | |
593 | unsigned n = prioq_size(s->client_contexts_lru), idx = 0; | |
594 | ||
595 | /* We do a number of iterations based on the initial size of the prioq. When we remove an | |
596 | * item, a new item is moved into its places, and items to the right might be reshuffled. | |
597 | */ | |
598 | for (unsigned i = 0; i < n; i++) { | |
599 | c = prioq_peek_by_index(s->client_contexts_lru, idx); | |
600 | ||
601 | assert(c->n_ref == 0); | |
602 | ||
603 | if (!pid_is_unwaited(c->pid)) | |
604 | client_context_free(s, c); | |
605 | else | |
606 | idx ++; | |
607 | } | |
608 | ||
609 | s->last_cache_pid_flush = t; | |
610 | } | |
611 | ||
22e3a02b LP |
612 | /* Bring the number of cache entries below the indicated limit, so that we can create a new entry without |
613 | * breaching the limit. Note that we only flush out entries that aren't pinned here. This means the number of | |
614 | * cache entries may very well grow beyond the limit, if all entries stored remain pinned. */ | |
615 | ||
616 | while (hashmap_size(s->client_contexts) > limit) { | |
22e3a02b LP |
617 | c = prioq_pop(s->client_contexts_lru); |
618 | if (!c) | |
619 | break; /* All remaining entries are pinned, give up */ | |
620 | ||
621 | assert(c->in_lru); | |
622 | assert(c->n_ref == 0); | |
623 | ||
624 | c->in_lru = false; | |
625 | ||
626 | client_context_free(s, c); | |
627 | } | |
628 | } | |
629 | ||
630 | void client_context_flush_all(Server *s) { | |
631 | assert(s); | |
632 | ||
633 | /* Flush out all remaining entries. This assumes all references are already dropped. */ | |
634 | ||
635 | s->my_context = client_context_release(s, s->my_context); | |
636 | s->pid1_context = client_context_release(s, s->pid1_context); | |
637 | ||
638 | client_context_try_shrink_to(s, 0); | |
639 | ||
640 | assert(prioq_size(s->client_contexts_lru) == 0); | |
641 | assert(hashmap_size(s->client_contexts) == 0); | |
642 | ||
643 | s->client_contexts_lru = prioq_free(s->client_contexts_lru); | |
644 | s->client_contexts = hashmap_free(s->client_contexts); | |
645 | } | |
646 | ||
647 | static int client_context_get_internal( | |
648 | Server *s, | |
649 | pid_t pid, | |
650 | const struct ucred *ucred, | |
651 | const char *label, size_t label_len, | |
652 | const char *unit_id, | |
653 | bool add_ref, | |
654 | ClientContext **ret) { | |
655 | ||
656 | ClientContext *c; | |
657 | int r; | |
658 | ||
659 | assert(s); | |
660 | assert(ret); | |
661 | ||
662 | if (!pid_is_valid(pid)) | |
663 | return -EINVAL; | |
664 | ||
665 | c = hashmap_get(s->client_contexts, PID_TO_PTR(pid)); | |
666 | if (c) { | |
667 | ||
668 | if (add_ref) { | |
669 | if (c->in_lru) { | |
670 | /* The entry wasn't pinned so far, let's remove it from the LRU list then */ | |
671 | assert(c->n_ref == 0); | |
672 | assert_se(prioq_remove(s->client_contexts_lru, c, &c->lru_index) >= 0); | |
673 | c->in_lru = false; | |
674 | } | |
675 | ||
676 | c->n_ref++; | |
677 | } | |
678 | ||
679 | client_context_maybe_refresh(s, c, ucred, label, label_len, unit_id, USEC_INFINITY); | |
680 | ||
681 | *ret = c; | |
682 | return 0; | |
683 | } | |
684 | ||
b12a4808 | 685 | client_context_try_shrink_to(s, cache_max()-1); |
22e3a02b LP |
686 | |
687 | r = client_context_new(s, pid, &c); | |
688 | if (r < 0) | |
689 | return r; | |
690 | ||
691 | if (add_ref) | |
692 | c->n_ref++; | |
693 | else { | |
694 | r = prioq_put(s->client_contexts_lru, c, &c->lru_index); | |
695 | if (r < 0) { | |
696 | client_context_free(s, c); | |
697 | return r; | |
698 | } | |
699 | ||
700 | c->in_lru = true; | |
701 | } | |
702 | ||
703 | client_context_really_refresh(s, c, ucred, label, label_len, unit_id, USEC_INFINITY); | |
704 | ||
705 | *ret = c; | |
706 | return 0; | |
707 | } | |
708 | ||
709 | int client_context_get( | |
710 | Server *s, | |
711 | pid_t pid, | |
712 | const struct ucred *ucred, | |
713 | const char *label, size_t label_len, | |
714 | const char *unit_id, | |
715 | ClientContext **ret) { | |
716 | ||
717 | return client_context_get_internal(s, pid, ucred, label, label_len, unit_id, false, ret); | |
718 | } | |
719 | ||
720 | int client_context_acquire( | |
721 | Server *s, | |
722 | pid_t pid, | |
723 | const struct ucred *ucred, | |
724 | const char *label, size_t label_len, | |
725 | const char *unit_id, | |
726 | ClientContext **ret) { | |
727 | ||
728 | return client_context_get_internal(s, pid, ucred, label, label_len, unit_id, true, ret); | |
729 | }; | |
730 | ||
731 | ClientContext *client_context_release(Server *s, ClientContext *c) { | |
732 | assert(s); | |
733 | ||
734 | if (!c) | |
735 | return NULL; | |
736 | ||
737 | assert(c->n_ref > 0); | |
738 | assert(!c->in_lru); | |
739 | ||
740 | c->n_ref--; | |
741 | if (c->n_ref > 0) | |
742 | return NULL; | |
743 | ||
744 | /* The entry is not pinned anymore, let's add it to the LRU prioq if we can. If we can't we'll drop it | |
745 | * right-away */ | |
746 | ||
747 | if (prioq_put(s->client_contexts_lru, c, &c->lru_index) < 0) | |
748 | client_context_free(s, c); | |
749 | else | |
750 | c->in_lru = true; | |
751 | ||
752 | return NULL; | |
753 | } | |
754 | ||
755 | void client_context_acquire_default(Server *s) { | |
756 | int r; | |
757 | ||
758 | assert(s); | |
759 | ||
760 | /* Ensure that our own and PID1's contexts are always pinned. Our own context is particularly useful to | |
761 | * generate driver messages. */ | |
762 | ||
763 | if (!s->my_context) { | |
764 | struct ucred ucred = { | |
765 | .pid = getpid_cached(), | |
766 | .uid = getuid(), | |
767 | .gid = getgid(), | |
768 | }; | |
769 | ||
770 | r = client_context_acquire(s, ucred.pid, &ucred, NULL, 0, NULL, &s->my_context); | |
771 | if (r < 0) | |
772 | log_warning_errno(r, "Failed to acquire our own context, ignoring: %m"); | |
773 | } | |
774 | ||
775 | if (!s->pid1_context) { | |
776 | ||
777 | r = client_context_acquire(s, 1, NULL, NULL, 0, NULL, &s->pid1_context); | |
778 | if (r < 0) | |
779 | log_warning_errno(r, "Failed to acquire PID1's context, ignoring: %m"); | |
780 | ||
781 | } | |
782 | } |