]>
Commit | Line | Data |
---|---|---|
53e1b683 | 1 | /* SPDX-License-Identifier: LGPL-2.1+ */ |
22e3a02b | 2 | |
349cc4a5 | 3 | #if HAVE_SELINUX |
22e3a02b LP |
4 | #include <selinux/selinux.h> |
5 | #endif | |
6 | ||
7 | #include "alloc-util.h" | |
8 | #include "audit-util.h" | |
9 | #include "cgroup-util.h" | |
21c491e1 | 10 | #include "env-util.h" |
d3070fbd LP |
11 | #include "fd-util.h" |
12 | #include "fileio.h" | |
13 | #include "fs-util.h" | |
14 | #include "io-util.h" | |
15 | #include "journal-util.h" | |
22e3a02b | 16 | #include "journald-context.h" |
90fc172e | 17 | #include "parse-util.h" |
672773b6 | 18 | #include "path-util.h" |
22e3a02b | 19 | #include "process-util.h" |
b12a4808 | 20 | #include "procfs-util.h" |
22e3a02b | 21 | #include "string-util.h" |
d3070fbd LP |
22 | #include "syslog-util.h" |
23 | #include "unaligned.h" | |
22e3a02b LP |
24 | #include "user-util.h" |
25 | ||
26 | /* This implements a metadata cache for clients, which are identified by their PID. Requesting metadata through /proc | |
27 | * is expensive, hence let's cache the data if we can. Note that this means the metadata might be out-of-date when we | |
28 | * store it, but it might already be anyway, as we request the data asynchronously from /proc at a different time the | |
29 | * log entry was originally created. We hence just increase the "window of inaccuracy" a bit. | |
30 | * | |
31 | * The cache is indexed by the PID. Entries may be "pinned" in the cache, in which case the entries are not removed | |
32 | * until they are unpinned. Unpinned entries are kept around until cache pressure is seen. Cache entries older than 5s | |
33 | * are never used (a sad attempt to deal with the UNIX weakness of PIDs reuse), cache entries older than 1s are | |
34 | * refreshed in an incremental way (meaning: data is reread from /proc, but any old data we can't refresh is not | |
35 | * flushed out). Data newer than 1s is used immediately without refresh. | |
36 | * | |
37 | * Log stream clients (i.e. all clients using the AF_UNIX/SOCK_STREAM stdout/stderr transport) will pin a cache entry | |
38 | * as long as their socket is connected. Note that cache entries are shared between different transports. That means a | |
39 | * cache entry pinned for the stream connection logic may be reused for the syslog or native protocols. | |
40 | * | |
41 | * Caching metadata like this has two major benefits: | |
42 | * | |
43 | * 1. Reading metadata is expensive, and we can thus substantially speed up log processing under flood. | |
44 | * | |
45 | * 2. Because metadata caching is shared between stream and datagram transports and stream connections pin a cache | |
46 | * entry there's a good chance we can properly map a substantial set of datagram log messages to their originating | |
47 | * service, as all services (unless explicitly configured otherwise) will have their stdout/stderr connected to a | |
48 | * stream connection. This should improve cases where a service process logs immediately before exiting and we | |
49 | * previously had trouble associating the log message with the service. | |
50 | * | |
51 | * NB: With and without the metadata cache: the implicitly added entry metadata in the journal (with the exception of | |
5238e957 | 52 | * UID/PID/GID and SELinux label) must be understood as possibly slightly out of sync (i.e. sometimes slightly older |
22e3a02b LP |
53 | * and sometimes slightly newer than what was current at the log event). |
54 | */ | |
55 | ||
56 | /* We refresh every 1s */ | |
57 | #define REFRESH_USEC (1*USEC_PER_SEC) | |
58 | ||
59 | /* Data older than 5s we flush out */ | |
60 | #define MAX_USEC (5*USEC_PER_SEC) | |
61 | ||
62 | /* Keep at most 16K entries in the cache. (Note though that this limit may be violated if enough streams pin entries in | |
63 | * the cache, in which case we *do* permit this limit to be breached. That's safe however, as the number of stream | |
64 | * clients itself is limited.) */ | |
b12a4808 ZJS |
65 | #define CACHE_MAX_FALLBACK 128U |
66 | #define CACHE_MAX_MAX (16*1024U) | |
67 | #define CACHE_MAX_MIN 64U | |
68 | ||
69 | static size_t cache_max(void) { | |
70 | static size_t cached = -1; | |
71 | ||
72 | if (cached == (size_t) -1) { | |
73 | uint64_t mem_total; | |
74 | int r; | |
75 | ||
76 | r = procfs_memory_get(&mem_total, NULL); | |
77 | if (r < 0) { | |
78 | log_warning_errno(r, "Cannot query /proc/meminfo for MemTotal: %m"); | |
79 | cached = CACHE_MAX_FALLBACK; | |
21c491e1 | 80 | } else |
b12a4808 ZJS |
81 | /* Cache entries are usually a few kB, but the process cmdline is controlled by the |
82 | * user and can be up to _SC_ARG_MAX, usually 2MB. Let's say that approximately up to | |
83 | * 1/8th of memory may be used by the cache. | |
84 | * | |
85 | * In the common case, this formula gives 64 cache entries for each GB of RAM. | |
86 | */ | |
21c491e1 | 87 | cached = CLAMP(mem_total / 8 / sc_arg_max(), CACHE_MAX_MIN, CACHE_MAX_MAX); |
b12a4808 ZJS |
88 | } |
89 | ||
90 | return cached; | |
91 | } | |
22e3a02b LP |
92 | |
93 | static int client_context_compare(const void *a, const void *b) { | |
94 | const ClientContext *x = a, *y = b; | |
9c57a73b | 95 | int r; |
22e3a02b | 96 | |
9c57a73b YW |
97 | r = CMP(x->timestamp, y->timestamp); |
98 | if (r != 0) | |
99 | return r; | |
22e3a02b | 100 | |
9c57a73b | 101 | return CMP(x->pid, y->pid); |
22e3a02b LP |
102 | } |
103 | ||
104 | static int client_context_new(Server *s, pid_t pid, ClientContext **ret) { | |
105 | ClientContext *c; | |
106 | int r; | |
107 | ||
108 | assert(s); | |
109 | assert(pid_is_valid(pid)); | |
110 | assert(ret); | |
111 | ||
112 | r = hashmap_ensure_allocated(&s->client_contexts, NULL); | |
113 | if (r < 0) | |
114 | return r; | |
115 | ||
116 | r = prioq_ensure_allocated(&s->client_contexts_lru, client_context_compare); | |
117 | if (r < 0) | |
118 | return r; | |
119 | ||
120 | c = new0(ClientContext, 1); | |
121 | if (!c) | |
122 | return -ENOMEM; | |
123 | ||
124 | c->pid = pid; | |
125 | ||
126 | c->uid = UID_INVALID; | |
127 | c->gid = GID_INVALID; | |
128 | c->auditid = AUDIT_SESSION_INVALID; | |
129 | c->loginuid = UID_INVALID; | |
130 | c->owner_uid = UID_INVALID; | |
131 | c->lru_index = PRIOQ_IDX_NULL; | |
132 | c->timestamp = USEC_INFINITY; | |
d3070fbd LP |
133 | c->extra_fields_mtime = NSEC_INFINITY; |
134 | c->log_level_max = -1; | |
90fc172e AZ |
135 | c->log_rate_limit_interval = s->rate_limit_interval; |
136 | c->log_rate_limit_burst = s->rate_limit_burst; | |
22e3a02b LP |
137 | |
138 | r = hashmap_put(s->client_contexts, PID_TO_PTR(pid), c); | |
139 | if (r < 0) { | |
140 | free(c); | |
141 | return r; | |
142 | } | |
143 | ||
144 | *ret = c; | |
145 | return 0; | |
146 | } | |
147 | ||
90fc172e AZ |
148 | static void client_context_reset(Server *s, ClientContext *c) { |
149 | assert(s); | |
22e3a02b LP |
150 | assert(c); |
151 | ||
152 | c->timestamp = USEC_INFINITY; | |
153 | ||
154 | c->uid = UID_INVALID; | |
155 | c->gid = GID_INVALID; | |
156 | ||
157 | c->comm = mfree(c->comm); | |
158 | c->exe = mfree(c->exe); | |
159 | c->cmdline = mfree(c->cmdline); | |
160 | c->capeff = mfree(c->capeff); | |
161 | ||
162 | c->auditid = AUDIT_SESSION_INVALID; | |
163 | c->loginuid = UID_INVALID; | |
164 | ||
165 | c->cgroup = mfree(c->cgroup); | |
166 | c->session = mfree(c->session); | |
167 | c->owner_uid = UID_INVALID; | |
168 | c->unit = mfree(c->unit); | |
169 | c->user_unit = mfree(c->user_unit); | |
170 | c->slice = mfree(c->slice); | |
171 | c->user_slice = mfree(c->user_slice); | |
172 | ||
173 | c->invocation_id = SD_ID128_NULL; | |
174 | ||
175 | c->label = mfree(c->label); | |
176 | c->label_size = 0; | |
d3070fbd LP |
177 | |
178 | c->extra_fields_iovec = mfree(c->extra_fields_iovec); | |
179 | c->extra_fields_n_iovec = 0; | |
180 | c->extra_fields_data = mfree(c->extra_fields_data); | |
181 | c->extra_fields_mtime = NSEC_INFINITY; | |
182 | ||
183 | c->log_level_max = -1; | |
90fc172e AZ |
184 | |
185 | c->log_rate_limit_interval = s->rate_limit_interval; | |
186 | c->log_rate_limit_burst = s->rate_limit_burst; | |
22e3a02b LP |
187 | } |
188 | ||
189 | static ClientContext* client_context_free(Server *s, ClientContext *c) { | |
190 | assert(s); | |
191 | ||
192 | if (!c) | |
193 | return NULL; | |
194 | ||
195 | assert_se(hashmap_remove(s->client_contexts, PID_TO_PTR(c->pid)) == c); | |
196 | ||
197 | if (c->in_lru) | |
198 | assert_se(prioq_remove(s->client_contexts_lru, c, &c->lru_index) >= 0); | |
199 | ||
90fc172e | 200 | client_context_reset(s, c); |
22e3a02b LP |
201 | |
202 | return mfree(c); | |
203 | } | |
204 | ||
205 | static void client_context_read_uid_gid(ClientContext *c, const struct ucred *ucred) { | |
206 | assert(c); | |
207 | assert(pid_is_valid(c->pid)); | |
208 | ||
209 | /* The ucred data passed in is always the most current and accurate, if we have any. Use it. */ | |
210 | if (ucred && uid_is_valid(ucred->uid)) | |
211 | c->uid = ucred->uid; | |
212 | else | |
213 | (void) get_process_uid(c->pid, &c->uid); | |
214 | ||
215 | if (ucred && gid_is_valid(ucred->gid)) | |
216 | c->gid = ucred->gid; | |
217 | else | |
218 | (void) get_process_gid(c->pid, &c->gid); | |
219 | } | |
220 | ||
221 | static void client_context_read_basic(ClientContext *c) { | |
222 | char *t; | |
223 | ||
224 | assert(c); | |
225 | assert(pid_is_valid(c->pid)); | |
226 | ||
227 | if (get_process_comm(c->pid, &t) >= 0) | |
228 | free_and_replace(c->comm, t); | |
229 | ||
230 | if (get_process_exe(c->pid, &t) >= 0) | |
231 | free_and_replace(c->exe, t); | |
232 | ||
bc28751e | 233 | if (get_process_cmdline(c->pid, SIZE_MAX, false, &t) >= 0) |
22e3a02b LP |
234 | free_and_replace(c->cmdline, t); |
235 | ||
236 | if (get_process_capeff(c->pid, &t) >= 0) | |
237 | free_and_replace(c->capeff, t); | |
238 | } | |
239 | ||
240 | static int client_context_read_label( | |
241 | ClientContext *c, | |
242 | const char *label, size_t label_size) { | |
243 | ||
244 | assert(c); | |
245 | assert(pid_is_valid(c->pid)); | |
246 | assert(label_size == 0 || label); | |
247 | ||
248 | if (label_size > 0) { | |
249 | char *l; | |
250 | ||
251 | /* If we got an SELinux label passed in it counts. */ | |
252 | ||
253 | l = newdup_suffix0(char, label, label_size); | |
254 | if (!l) | |
255 | return -ENOMEM; | |
256 | ||
257 | free_and_replace(c->label, l); | |
258 | c->label_size = label_size; | |
259 | } | |
349cc4a5 | 260 | #if HAVE_SELINUX |
22e3a02b LP |
261 | else { |
262 | char *con; | |
263 | ||
264 | /* If we got no SELinux label passed in, let's try to acquire one */ | |
265 | ||
266 | if (getpidcon(c->pid, &con) >= 0) { | |
267 | free_and_replace(c->label, con); | |
268 | c->label_size = strlen(c->label); | |
269 | } | |
270 | } | |
271 | #endif | |
272 | ||
273 | return 0; | |
274 | } | |
275 | ||
276 | static int client_context_read_cgroup(Server *s, ClientContext *c, const char *unit_id) { | |
ef30f7ca | 277 | _cleanup_free_ char *t = NULL; |
22e3a02b LP |
278 | int r; |
279 | ||
280 | assert(c); | |
281 | ||
282 | /* Try to acquire the current cgroup path */ | |
283 | r = cg_pid_get_path_shifted(c->pid, s->cgroup_root, &t); | |
672773b6 | 284 | if (r < 0 || empty_or_root(t)) { |
672773b6 MS |
285 | /* We use the unit ID passed in as fallback if we have nothing cached yet and cg_pid_get_path_shifted() |
286 | * failed or process is running in a root cgroup. Zombie processes are automatically migrated to root cgroup | |
4e1dfa45 | 287 | * on cgroup v1 and we want to be able to map log messages from them too. */ |
22e3a02b LP |
288 | if (unit_id && !c->unit) { |
289 | c->unit = strdup(unit_id); | |
290 | if (c->unit) | |
291 | return 0; | |
292 | } | |
293 | ||
294 | return r; | |
295 | } | |
296 | ||
297 | /* Let's shortcut this if the cgroup path didn't change */ | |
ef30f7ca | 298 | if (streq_ptr(c->cgroup, t)) |
22e3a02b | 299 | return 0; |
22e3a02b LP |
300 | |
301 | free_and_replace(c->cgroup, t); | |
302 | ||
303 | (void) cg_path_get_session(c->cgroup, &t); | |
304 | free_and_replace(c->session, t); | |
305 | ||
306 | if (cg_path_get_owner_uid(c->cgroup, &c->owner_uid) < 0) | |
307 | c->owner_uid = UID_INVALID; | |
308 | ||
309 | (void) cg_path_get_unit(c->cgroup, &t); | |
310 | free_and_replace(c->unit, t); | |
311 | ||
312 | (void) cg_path_get_user_unit(c->cgroup, &t); | |
313 | free_and_replace(c->user_unit, t); | |
314 | ||
315 | (void) cg_path_get_slice(c->cgroup, &t); | |
316 | free_and_replace(c->slice, t); | |
317 | ||
318 | (void) cg_path_get_user_slice(c->cgroup, &t); | |
319 | free_and_replace(c->user_slice, t); | |
320 | ||
321 | return 0; | |
322 | } | |
323 | ||
324 | static int client_context_read_invocation_id( | |
325 | Server *s, | |
326 | ClientContext *c) { | |
327 | ||
d3070fbd | 328 | _cleanup_free_ char *value = NULL; |
22e3a02b LP |
329 | const char *p; |
330 | int r; | |
331 | ||
332 | assert(s); | |
333 | assert(c); | |
334 | ||
d3070fbd | 335 | /* Read the invocation ID of a unit off a unit. PID 1 stores it in a per-unit symlink in /run/systemd/units/ */ |
22e3a02b | 336 | |
d3070fbd | 337 | if (!c->unit) |
22e3a02b LP |
338 | return 0; |
339 | ||
d3070fbd LP |
340 | p = strjoina("/run/systemd/units/invocation:", c->unit); |
341 | r = readlink_malloc(p, &value); | |
22e3a02b LP |
342 | if (r < 0) |
343 | return r; | |
344 | ||
d3070fbd LP |
345 | return sd_id128_from_string(value, &c->invocation_id); |
346 | } | |
22e3a02b | 347 | |
d3070fbd LP |
348 | static int client_context_read_log_level_max( |
349 | Server *s, | |
350 | ClientContext *c) { | |
22e3a02b | 351 | |
d3070fbd LP |
352 | _cleanup_free_ char *value = NULL; |
353 | const char *p; | |
354 | int r, ll; | |
355 | ||
356 | if (!c->unit) | |
357 | return 0; | |
358 | ||
359 | p = strjoina("/run/systemd/units/log-level-max:", c->unit); | |
360 | r = readlink_malloc(p, &value); | |
22e3a02b LP |
361 | if (r < 0) |
362 | return r; | |
d3070fbd LP |
363 | |
364 | ll = log_level_from_string(value); | |
365 | if (ll < 0) | |
22e3a02b | 366 | return -EINVAL; |
22e3a02b | 367 | |
d3070fbd LP |
368 | c->log_level_max = ll; |
369 | return 0; | |
370 | } | |
371 | ||
372 | static int client_context_read_extra_fields( | |
373 | Server *s, | |
374 | ClientContext *c) { | |
375 | ||
376 | size_t size = 0, n_iovec = 0, n_allocated = 0, left; | |
377 | _cleanup_free_ struct iovec *iovec = NULL; | |
378 | _cleanup_free_ void *data = NULL; | |
379 | _cleanup_fclose_ FILE *f = NULL; | |
380 | struct stat st; | |
381 | const char *p; | |
382 | uint8_t *q; | |
383 | int r; | |
384 | ||
385 | if (!c->unit) | |
386 | return 0; | |
387 | ||
388 | p = strjoina("/run/systemd/units/log-extra-fields:", c->unit); | |
389 | ||
390 | if (c->extra_fields_mtime != NSEC_INFINITY) { | |
391 | if (stat(p, &st) < 0) { | |
392 | if (errno == ENOENT) | |
393 | return 0; | |
394 | ||
395 | return -errno; | |
396 | } | |
397 | ||
398 | if (timespec_load_nsec(&st.st_mtim) == c->extra_fields_mtime) | |
399 | return 0; | |
400 | } | |
401 | ||
402 | f = fopen(p, "re"); | |
403 | if (!f) { | |
404 | if (errno == ENOENT) | |
405 | return 0; | |
406 | ||
407 | return -errno; | |
408 | } | |
409 | ||
410 | if (fstat(fileno(f), &st) < 0) /* The file might have been replaced since the stat() above, let's get a new | |
411 | * one, that matches the stuff we are reading */ | |
412 | return -errno; | |
413 | ||
414 | r = read_full_stream(f, (char**) &data, &size); | |
415 | if (r < 0) | |
416 | return r; | |
417 | ||
418 | q = data, left = size; | |
419 | while (left > 0) { | |
420 | uint8_t *field, *eq; | |
421 | uint64_t v, n; | |
422 | ||
423 | if (left < sizeof(uint64_t)) | |
424 | return -EBADMSG; | |
425 | ||
426 | v = unaligned_read_le64(q); | |
427 | if (v < 2) | |
428 | return -EBADMSG; | |
429 | ||
430 | n = sizeof(uint64_t) + v; | |
431 | if (left < n) | |
432 | return -EBADMSG; | |
433 | ||
434 | field = q + sizeof(uint64_t); | |
435 | ||
436 | eq = memchr(field, '=', v); | |
437 | if (!eq) | |
438 | return -EBADMSG; | |
439 | ||
440 | if (!journal_field_valid((const char *) field, eq - field, false)) | |
441 | return -EBADMSG; | |
442 | ||
443 | if (!GREEDY_REALLOC(iovec, n_allocated, n_iovec+1)) | |
444 | return -ENOMEM; | |
445 | ||
446 | iovec[n_iovec++] = IOVEC_MAKE(field, v); | |
447 | ||
448 | left -= n, q += n; | |
449 | } | |
450 | ||
451 | free(c->extra_fields_iovec); | |
452 | free(c->extra_fields_data); | |
453 | ||
1cc6c93a | 454 | c->extra_fields_iovec = TAKE_PTR(iovec); |
d3070fbd | 455 | c->extra_fields_n_iovec = n_iovec; |
1cc6c93a | 456 | c->extra_fields_data = TAKE_PTR(data); |
d3070fbd LP |
457 | c->extra_fields_mtime = timespec_load_nsec(&st.st_mtim); |
458 | ||
d3070fbd | 459 | return 0; |
22e3a02b LP |
460 | } |
461 | ||
90fc172e AZ |
462 | static int client_context_read_log_rate_limit_interval(ClientContext *c) { |
463 | _cleanup_free_ char *value = NULL; | |
464 | const char *p; | |
465 | int r; | |
466 | ||
467 | assert(c); | |
468 | ||
469 | if (!c->unit) | |
470 | return 0; | |
471 | ||
472 | p = strjoina("/run/systemd/units/log-rate-limit-interval:", c->unit); | |
473 | r = readlink_malloc(p, &value); | |
474 | if (r < 0) | |
475 | return r; | |
476 | ||
477 | return safe_atou64(value, &c->log_rate_limit_interval); | |
478 | } | |
479 | ||
480 | static int client_context_read_log_rate_limit_burst(ClientContext *c) { | |
481 | _cleanup_free_ char *value = NULL; | |
482 | const char *p; | |
483 | int r; | |
484 | ||
485 | assert(c); | |
486 | ||
487 | if (!c->unit) | |
488 | return 0; | |
489 | ||
490 | p = strjoina("/run/systemd/units/log-rate-limit-burst:", c->unit); | |
491 | r = readlink_malloc(p, &value); | |
492 | if (r < 0) | |
493 | return r; | |
494 | ||
495 | return safe_atou(value, &c->log_rate_limit_burst); | |
496 | } | |
497 | ||
22e3a02b LP |
498 | static void client_context_really_refresh( |
499 | Server *s, | |
500 | ClientContext *c, | |
501 | const struct ucred *ucred, | |
502 | const char *label, size_t label_size, | |
503 | const char *unit_id, | |
504 | usec_t timestamp) { | |
505 | ||
506 | assert(s); | |
507 | assert(c); | |
508 | assert(pid_is_valid(c->pid)); | |
509 | ||
510 | if (timestamp == USEC_INFINITY) | |
511 | timestamp = now(CLOCK_MONOTONIC); | |
512 | ||
513 | client_context_read_uid_gid(c, ucred); | |
514 | client_context_read_basic(c); | |
515 | (void) client_context_read_label(c, label, label_size); | |
516 | ||
517 | (void) audit_session_from_pid(c->pid, &c->auditid); | |
518 | (void) audit_loginuid_from_pid(c->pid, &c->loginuid); | |
519 | ||
520 | (void) client_context_read_cgroup(s, c, unit_id); | |
521 | (void) client_context_read_invocation_id(s, c); | |
d3070fbd LP |
522 | (void) client_context_read_log_level_max(s, c); |
523 | (void) client_context_read_extra_fields(s, c); | |
90fc172e AZ |
524 | (void) client_context_read_log_rate_limit_interval(c); |
525 | (void) client_context_read_log_rate_limit_burst(c); | |
22e3a02b LP |
526 | |
527 | c->timestamp = timestamp; | |
528 | ||
529 | if (c->in_lru) { | |
530 | assert(c->n_ref == 0); | |
531 | assert_se(prioq_reshuffle(s->client_contexts_lru, c, &c->lru_index) >= 0); | |
532 | } | |
533 | } | |
534 | ||
535 | void client_context_maybe_refresh( | |
536 | Server *s, | |
537 | ClientContext *c, | |
538 | const struct ucred *ucred, | |
539 | const char *label, size_t label_size, | |
540 | const char *unit_id, | |
541 | usec_t timestamp) { | |
542 | ||
543 | assert(s); | |
544 | assert(c); | |
545 | ||
546 | if (timestamp == USEC_INFINITY) | |
547 | timestamp = now(CLOCK_MONOTONIC); | |
548 | ||
549 | /* No cached data so far? Let's fill it up */ | |
550 | if (c->timestamp == USEC_INFINITY) | |
551 | goto refresh; | |
552 | ||
553 | /* If the data isn't pinned and if the cashed data is older than the upper limit, we flush it out | |
554 | * entirely. This follows the logic that as long as an entry is pinned the PID reuse is unlikely. */ | |
555 | if (c->n_ref == 0 && c->timestamp + MAX_USEC < timestamp) { | |
90fc172e | 556 | client_context_reset(s, c); |
22e3a02b LP |
557 | goto refresh; |
558 | } | |
559 | ||
560 | /* If the data is older than the lower limit, we refresh, but keep the old data for all we can't update */ | |
561 | if (c->timestamp + REFRESH_USEC < timestamp) | |
562 | goto refresh; | |
563 | ||
564 | /* If the data passed along doesn't match the cached data we also do a refresh */ | |
565 | if (ucred && uid_is_valid(ucred->uid) && c->uid != ucred->uid) | |
566 | goto refresh; | |
567 | ||
568 | if (ucred && gid_is_valid(ucred->gid) && c->gid != ucred->gid) | |
569 | goto refresh; | |
570 | ||
571 | if (label_size > 0 && (label_size != c->label_size || memcmp(label, c->label, label_size) != 0)) | |
572 | goto refresh; | |
573 | ||
574 | return; | |
575 | ||
576 | refresh: | |
577 | client_context_really_refresh(s, c, ucred, label, label_size, unit_id, timestamp); | |
578 | } | |
579 | ||
580 | static void client_context_try_shrink_to(Server *s, size_t limit) { | |
91714a7f ZJS |
581 | ClientContext *c; |
582 | usec_t t; | |
583 | ||
22e3a02b LP |
584 | assert(s); |
585 | ||
91714a7f ZJS |
586 | /* Flush any cache entries for PIDs that have already moved on. Don't do this |
587 | * too often, since it's a slow process. */ | |
588 | t = now(CLOCK_MONOTONIC); | |
589 | if (s->last_cache_pid_flush + MAX_USEC < t) { | |
590 | unsigned n = prioq_size(s->client_contexts_lru), idx = 0; | |
591 | ||
592 | /* We do a number of iterations based on the initial size of the prioq. When we remove an | |
593 | * item, a new item is moved into its places, and items to the right might be reshuffled. | |
594 | */ | |
595 | for (unsigned i = 0; i < n; i++) { | |
596 | c = prioq_peek_by_index(s->client_contexts_lru, idx); | |
597 | ||
598 | assert(c->n_ref == 0); | |
599 | ||
600 | if (!pid_is_unwaited(c->pid)) | |
601 | client_context_free(s, c); | |
602 | else | |
603 | idx ++; | |
604 | } | |
605 | ||
606 | s->last_cache_pid_flush = t; | |
607 | } | |
608 | ||
22e3a02b LP |
609 | /* Bring the number of cache entries below the indicated limit, so that we can create a new entry without |
610 | * breaching the limit. Note that we only flush out entries that aren't pinned here. This means the number of | |
611 | * cache entries may very well grow beyond the limit, if all entries stored remain pinned. */ | |
612 | ||
613 | while (hashmap_size(s->client_contexts) > limit) { | |
22e3a02b LP |
614 | c = prioq_pop(s->client_contexts_lru); |
615 | if (!c) | |
616 | break; /* All remaining entries are pinned, give up */ | |
617 | ||
618 | assert(c->in_lru); | |
619 | assert(c->n_ref == 0); | |
620 | ||
621 | c->in_lru = false; | |
622 | ||
623 | client_context_free(s, c); | |
624 | } | |
625 | } | |
626 | ||
627 | void client_context_flush_all(Server *s) { | |
628 | assert(s); | |
629 | ||
630 | /* Flush out all remaining entries. This assumes all references are already dropped. */ | |
631 | ||
632 | s->my_context = client_context_release(s, s->my_context); | |
633 | s->pid1_context = client_context_release(s, s->pid1_context); | |
634 | ||
635 | client_context_try_shrink_to(s, 0); | |
636 | ||
637 | assert(prioq_size(s->client_contexts_lru) == 0); | |
638 | assert(hashmap_size(s->client_contexts) == 0); | |
639 | ||
640 | s->client_contexts_lru = prioq_free(s->client_contexts_lru); | |
641 | s->client_contexts = hashmap_free(s->client_contexts); | |
642 | } | |
643 | ||
644 | static int client_context_get_internal( | |
645 | Server *s, | |
646 | pid_t pid, | |
647 | const struct ucred *ucred, | |
648 | const char *label, size_t label_len, | |
649 | const char *unit_id, | |
650 | bool add_ref, | |
651 | ClientContext **ret) { | |
652 | ||
653 | ClientContext *c; | |
654 | int r; | |
655 | ||
656 | assert(s); | |
657 | assert(ret); | |
658 | ||
659 | if (!pid_is_valid(pid)) | |
660 | return -EINVAL; | |
661 | ||
662 | c = hashmap_get(s->client_contexts, PID_TO_PTR(pid)); | |
663 | if (c) { | |
664 | ||
665 | if (add_ref) { | |
666 | if (c->in_lru) { | |
667 | /* The entry wasn't pinned so far, let's remove it from the LRU list then */ | |
668 | assert(c->n_ref == 0); | |
669 | assert_se(prioq_remove(s->client_contexts_lru, c, &c->lru_index) >= 0); | |
670 | c->in_lru = false; | |
671 | } | |
672 | ||
673 | c->n_ref++; | |
674 | } | |
675 | ||
676 | client_context_maybe_refresh(s, c, ucred, label, label_len, unit_id, USEC_INFINITY); | |
677 | ||
678 | *ret = c; | |
679 | return 0; | |
680 | } | |
681 | ||
b12a4808 | 682 | client_context_try_shrink_to(s, cache_max()-1); |
22e3a02b LP |
683 | |
684 | r = client_context_new(s, pid, &c); | |
685 | if (r < 0) | |
686 | return r; | |
687 | ||
688 | if (add_ref) | |
689 | c->n_ref++; | |
690 | else { | |
691 | r = prioq_put(s->client_contexts_lru, c, &c->lru_index); | |
692 | if (r < 0) { | |
693 | client_context_free(s, c); | |
694 | return r; | |
695 | } | |
696 | ||
697 | c->in_lru = true; | |
698 | } | |
699 | ||
700 | client_context_really_refresh(s, c, ucred, label, label_len, unit_id, USEC_INFINITY); | |
701 | ||
702 | *ret = c; | |
703 | return 0; | |
704 | } | |
705 | ||
706 | int client_context_get( | |
707 | Server *s, | |
708 | pid_t pid, | |
709 | const struct ucred *ucred, | |
710 | const char *label, size_t label_len, | |
711 | const char *unit_id, | |
712 | ClientContext **ret) { | |
713 | ||
714 | return client_context_get_internal(s, pid, ucred, label, label_len, unit_id, false, ret); | |
715 | } | |
716 | ||
717 | int client_context_acquire( | |
718 | Server *s, | |
719 | pid_t pid, | |
720 | const struct ucred *ucred, | |
721 | const char *label, size_t label_len, | |
722 | const char *unit_id, | |
723 | ClientContext **ret) { | |
724 | ||
725 | return client_context_get_internal(s, pid, ucred, label, label_len, unit_id, true, ret); | |
726 | }; | |
727 | ||
728 | ClientContext *client_context_release(Server *s, ClientContext *c) { | |
729 | assert(s); | |
730 | ||
731 | if (!c) | |
732 | return NULL; | |
733 | ||
734 | assert(c->n_ref > 0); | |
735 | assert(!c->in_lru); | |
736 | ||
737 | c->n_ref--; | |
738 | if (c->n_ref > 0) | |
739 | return NULL; | |
740 | ||
741 | /* The entry is not pinned anymore, let's add it to the LRU prioq if we can. If we can't we'll drop it | |
742 | * right-away */ | |
743 | ||
744 | if (prioq_put(s->client_contexts_lru, c, &c->lru_index) < 0) | |
745 | client_context_free(s, c); | |
746 | else | |
747 | c->in_lru = true; | |
748 | ||
749 | return NULL; | |
750 | } | |
751 | ||
752 | void client_context_acquire_default(Server *s) { | |
753 | int r; | |
754 | ||
755 | assert(s); | |
756 | ||
757 | /* Ensure that our own and PID1's contexts are always pinned. Our own context is particularly useful to | |
758 | * generate driver messages. */ | |
759 | ||
760 | if (!s->my_context) { | |
761 | struct ucred ucred = { | |
762 | .pid = getpid_cached(), | |
763 | .uid = getuid(), | |
764 | .gid = getgid(), | |
765 | }; | |
766 | ||
767 | r = client_context_acquire(s, ucred.pid, &ucred, NULL, 0, NULL, &s->my_context); | |
768 | if (r < 0) | |
769 | log_warning_errno(r, "Failed to acquire our own context, ignoring: %m"); | |
770 | } | |
771 | ||
772 | if (!s->pid1_context) { | |
773 | ||
774 | r = client_context_acquire(s, 1, NULL, NULL, 0, NULL, &s->pid1_context); | |
775 | if (r < 0) | |
776 | log_warning_errno(r, "Failed to acquire PID1's context, ignoring: %m"); | |
777 | ||
778 | } | |
779 | } |