[thirdparty/systemd.git] / src / journal / journald-context.c

/* SPDX-License-Identifier: LGPL-2.1+ */

#if HAVE_SELINUX
#include <selinux/selinux.h>
#endif

#include "alloc-util.h"
#include "audit-util.h"
#include "cgroup-util.h"
#include "fd-util.h"
#include "fileio.h"
#include "fs-util.h"
#include "io-util.h"
#include "journal-util.h"
#include "journald-context.h"
#include "parse-util.h"
#include "process-util.h"
#include "string-util.h"
#include "syslog-util.h"
#include "unaligned.h"
#include "user-util.h"

/* This implements a metadata cache for clients, which are identified by their PID. Requesting metadata through /proc
 * is expensive, hence let's cache the data if we can. Note that this means the metadata might be out-of-date when we
 * store it, but it might already be anyway, as we request the data asynchronously from /proc at a different time the
 * log entry was originally created. We hence just increase the "window of inaccuracy" a bit.
 *
 * The cache is indexed by the PID. Entries may be "pinned" in the cache, in which case the entries are not removed
 * until they are unpinned. Unpinned entries are kept around until cache pressure is seen. Cache entries older than 5s
 * are never used (a sad attempt to deal with the UNIX weakness of PIDs reuse), cache entries older than 1s are
 * refreshed in an incremental way (meaning: data is reread from /proc, but any old data we can't refresh is not
 * flushed out). Data newer than 1s is used immediately without refresh.
 *
 * Log stream clients (i.e. all clients using the AF_UNIX/SOCK_STREAM stdout/stderr transport) will pin a cache entry
 * as long as their socket is connected. Note that cache entries are shared between different transports. That means a
 * cache entry pinned for the stream connection logic may be reused for the syslog or native protocols.
 *
 * Caching metadata like this has two major benefits:
 *
 * 1. Reading metadata is expensive, and we can thus substantially speed up log processing under flood.
 *
 * 2. Because metadata caching is shared between stream and datagram transports and stream connections pin a cache
 *    entry there's a good chance we can properly map a substantial set of datagram log messages to their originating
 *    service, as all services (unless explicitly configured otherwise) will have their stdout/stderr connected to a
 *    stream connection. This should improve cases where a service process logs immediately before exiting and we
 *    previously had trouble associating the log message with the service.
 *
 * NB: With and without the metadata cache: the implicitly added entry metadata in the journal (with the exception of
 *     UID/PID/GID and SELinux label) must be understood as possibly slightly out of sync (i.e. sometimes slighly older
 *     and sometimes slightly newer than what was current at the log event).
 */

/* We refresh every 1s */
#define REFRESH_USEC (1*USEC_PER_SEC)

/* Data older than 5s we flush out */
#define MAX_USEC (5*USEC_PER_SEC)

/* Keep at most 16K entries in the cache. (Note though that this limit may be violated if enough streams pin entries in
 * the cache, in which case we *do* permit this limit to be breached. That's safe however, as the number of stream
 * clients itself is limited.) */
#define CACHE_MAX (16*1024)

static int client_context_compare(const void *a, const void *b) {
        const ClientContext *x = a, *y = b;
        int r;

        r = CMP(x->timestamp, y->timestamp);
        if (r != 0)
                return r;

        return CMP(x->pid, y->pid);
}

static int client_context_new(Server *s, pid_t pid, ClientContext **ret) {
        ClientContext *c;
        int r;

        assert(s);
        assert(pid_is_valid(pid));
        assert(ret);

        r = hashmap_ensure_allocated(&s->client_contexts, NULL);
        if (r < 0)
                return r;

        r = prioq_ensure_allocated(&s->client_contexts_lru, client_context_compare);
        if (r < 0)
                return r;

        c = new0(ClientContext, 1);
        if (!c)
                return -ENOMEM;

        c->pid = pid;

        c->uid = UID_INVALID;
        c->gid = GID_INVALID;
        c->auditid = AUDIT_SESSION_INVALID;
        c->loginuid = UID_INVALID;
        c->owner_uid = UID_INVALID;
        c->lru_index = PRIOQ_IDX_NULL;
        c->timestamp = USEC_INFINITY;
        c->extra_fields_mtime = NSEC_INFINITY;
        c->log_level_max = -1;
        c->log_rate_limit_interval = s->rate_limit_interval;
        c->log_rate_limit_burst = s->rate_limit_burst;

        r = hashmap_put(s->client_contexts, PID_TO_PTR(pid), c);
        if (r < 0) {
                free(c);
                return r;
        }

        *ret = c;
        return 0;
}

static void client_context_reset(Server *s, ClientContext *c) {
        assert(s);
        assert(c);

        c->timestamp = USEC_INFINITY;

        c->uid = UID_INVALID;
        c->gid = GID_INVALID;

        c->comm = mfree(c->comm);
        c->exe = mfree(c->exe);
        c->cmdline = mfree(c->cmdline);
        c->capeff = mfree(c->capeff);

        c->auditid = AUDIT_SESSION_INVALID;
        c->loginuid = UID_INVALID;

        c->cgroup = mfree(c->cgroup);
        c->session = mfree(c->session);
        c->owner_uid = UID_INVALID;
        c->unit = mfree(c->unit);
        c->user_unit = mfree(c->user_unit);
        c->slice = mfree(c->slice);
        c->user_slice = mfree(c->user_slice);

        c->invocation_id = SD_ID128_NULL;

        c->label = mfree(c->label);
        c->label_size = 0;

        c->extra_fields_iovec = mfree(c->extra_fields_iovec);
        c->extra_fields_n_iovec = 0;
        c->extra_fields_data = mfree(c->extra_fields_data);
        c->extra_fields_mtime = NSEC_INFINITY;

        c->log_level_max = -1;

        c->log_rate_limit_interval = s->rate_limit_interval;
        c->log_rate_limit_burst = s->rate_limit_burst;
}

static ClientContext* client_context_free(Server *s, ClientContext *c) {
        assert(s);

        if (!c)
                return NULL;

        assert_se(hashmap_remove(s->client_contexts, PID_TO_PTR(c->pid)) == c);

        if (c->in_lru)
                assert_se(prioq_remove(s->client_contexts_lru, c, &c->lru_index) >= 0);

        client_context_reset(s, c);

        return mfree(c);
}

static void client_context_read_uid_gid(ClientContext *c, const struct ucred *ucred) {
        assert(c);
        assert(pid_is_valid(c->pid));

        /* The ucred data passed in is always the most current and accurate, if we have any. Use it. */
        if (ucred && uid_is_valid(ucred->uid))
                c->uid = ucred->uid;
        else
                (void) get_process_uid(c->pid, &c->uid);

        if (ucred && gid_is_valid(ucred->gid))
                c->gid = ucred->gid;
        else
                (void) get_process_gid(c->pid, &c->gid);
}

static void client_context_read_basic(ClientContext *c) {
        char *t;

        assert(c);
        assert(pid_is_valid(c->pid));

        if (get_process_comm(c->pid, &t) >= 0)
                free_and_replace(c->comm, t);

        if (get_process_exe(c->pid, &t) >= 0)
                free_and_replace(c->exe, t);

        if (get_process_cmdline(c->pid, 0, false, &t) >= 0)
                free_and_replace(c->cmdline, t);

        if (get_process_capeff(c->pid, &t) >= 0)
                free_and_replace(c->capeff, t);
}

static int client_context_read_label(
                ClientContext *c,
                const char *label, size_t label_size) {

        assert(c);
        assert(pid_is_valid(c->pid));
        assert(label_size == 0 || label);

        if (label_size > 0) {
                char *l;

                /* If we got an SELinux label passed in it counts. */

                l = newdup_suffix0(char, label, label_size);
                if (!l)
                        return -ENOMEM;

                free_and_replace(c->label, l);
                c->label_size = label_size;
        }
#if HAVE_SELINUX
        else {
                char *con;

                /* If we got no SELinux label passed in, let's try to acquire one */

                if (getpidcon(c->pid, &con) >= 0) {
                        free_and_replace(c->label, con);
                        c->label_size = strlen(c->label);
                }
        }
#endif

        return 0;
}

static int client_context_read_cgroup(Server *s, ClientContext *c, const char *unit_id) {
        char *t = NULL;
        int r;

        assert(c);

        /* Try to acquire the current cgroup path */
        r = cg_pid_get_path_shifted(c->pid, s->cgroup_root, &t);
        if (r < 0) {

                /* If that didn't work, we use the unit ID passed in as fallback, if we have nothing cached yet */
                if (unit_id && !c->unit) {
                        c->unit = strdup(unit_id);
                        if (c->unit)
                                return 0;
                }

                return r;
        }

        /* Let's shortcut this if the cgroup path didn't change */
        if (streq_ptr(c->cgroup, t)) {
                free(t);
                return 0;
        }

        free_and_replace(c->cgroup, t);

        (void) cg_path_get_session(c->cgroup, &t);
        free_and_replace(c->session, t);

        if (cg_path_get_owner_uid(c->cgroup, &c->owner_uid) < 0)
                c->owner_uid = UID_INVALID;

        (void) cg_path_get_unit(c->cgroup, &t);
        free_and_replace(c->unit, t);

        (void) cg_path_get_user_unit(c->cgroup, &t);
        free_and_replace(c->user_unit, t);

        (void) cg_path_get_slice(c->cgroup, &t);
        free_and_replace(c->slice, t);

        (void) cg_path_get_user_slice(c->cgroup, &t);
        free_and_replace(c->user_slice, t);

        return 0;
}

static int client_context_read_invocation_id(
                Server *s,
                ClientContext *c) {

        _cleanup_free_ char *value = NULL;
        const char *p;
        int r;

        assert(s);
        assert(c);

        /* Read the invocation ID of a unit off a unit. PID 1 stores it in a per-unit symlink in /run/systemd/units/ */

        if (!c->unit)
                return 0;

        p = strjoina("/run/systemd/units/invocation:", c->unit);
        r = readlink_malloc(p, &value);
        if (r < 0)
                return r;

        return sd_id128_from_string(value, &c->invocation_id);
}

static int client_context_read_log_level_max(
                Server *s,
                ClientContext *c) {

        _cleanup_free_ char *value = NULL;
        const char *p;
        int r, ll;

        if (!c->unit)
                return 0;

        p = strjoina("/run/systemd/units/log-level-max:", c->unit);
        r = readlink_malloc(p, &value);
        if (r < 0)
                return r;

        ll = log_level_from_string(value);
        if (ll < 0)
                return -EINVAL;

        c->log_level_max = ll;
        return 0;
}

static int client_context_read_extra_fields(
                Server *s,
                ClientContext *c) {

        size_t size = 0, n_iovec = 0, n_allocated = 0, left;
        _cleanup_free_ struct iovec *iovec = NULL;
        _cleanup_free_ void *data = NULL;
        _cleanup_fclose_ FILE *f = NULL;
        struct stat st;
        const char *p;
        uint8_t *q;
        int r;

        if (!c->unit)
                return 0;

        p = strjoina("/run/systemd/units/log-extra-fields:", c->unit);

        if (c->extra_fields_mtime != NSEC_INFINITY) {
                if (stat(p, &st) < 0) {
                        if (errno == ENOENT)
                                return 0;

                        return -errno;
                }

                if (timespec_load_nsec(&st.st_mtim) == c->extra_fields_mtime)
                        return 0;
        }

        f = fopen(p, "re");
        if (!f) {
                if (errno == ENOENT)
                        return 0;

                return -errno;
        }

        if (fstat(fileno(f), &st) < 0) /* The file might have been replaced since the stat() above, let's get a new
                                        * one, that matches the stuff we are reading */
                return -errno;

        r = read_full_stream(f, (char**) &data, &size);
        if (r < 0)
                return r;

        q = data, left = size;
        while (left > 0) {
                uint8_t *field, *eq;
                uint64_t v, n;

                if (left < sizeof(uint64_t))
                        return -EBADMSG;

                v = unaligned_read_le64(q);
                if (v < 2)
                        return -EBADMSG;

                n = sizeof(uint64_t) + v;
                if (left < n)
                        return -EBADMSG;

                field = q + sizeof(uint64_t);

                eq = memchr(field, '=', v);
                if (!eq)
                        return -EBADMSG;

                if (!journal_field_valid((const char *) field, eq - field, false))
                        return -EBADMSG;

                if (!GREEDY_REALLOC(iovec, n_allocated, n_iovec+1))
                        return -ENOMEM;

                iovec[n_iovec++] = IOVEC_MAKE(field, v);

                left -= n, q += n;
        }

        free(c->extra_fields_iovec);
        free(c->extra_fields_data);

        c->extra_fields_iovec = TAKE_PTR(iovec);
        c->extra_fields_n_iovec = n_iovec;
        c->extra_fields_data = TAKE_PTR(data);
        c->extra_fields_mtime = timespec_load_nsec(&st.st_mtim);

        return 0;
}

static int client_context_read_log_rate_limit_interval(ClientContext *c) {
        _cleanup_free_ char *value = NULL;
        const char *p;
        int r;

        assert(c);

        if (!c->unit)
                return 0;

        p = strjoina("/run/systemd/units/log-rate-limit-interval:", c->unit);
        r = readlink_malloc(p, &value);
        if (r < 0)
                return r;

        return safe_atou64(value, &c->log_rate_limit_interval);
}

static int client_context_read_log_rate_limit_burst(ClientContext *c) {
        _cleanup_free_ char *value = NULL;
        const char *p;
        int r;

        assert(c);

        if (!c->unit)
                return 0;

        p = strjoina("/run/systemd/units/log-rate-limit-burst:", c->unit);
        r = readlink_malloc(p, &value);
        if (r < 0)
                return r;

        return safe_atou(value, &c->log_rate_limit_burst);
}

static void client_context_really_refresh(
                Server *s,
                ClientContext *c,
                const struct ucred *ucred,
                const char *label, size_t label_size,
                const char *unit_id,
                usec_t timestamp) {

        assert(s);
        assert(c);
        assert(pid_is_valid(c->pid));

        if (timestamp == USEC_INFINITY)
                timestamp = now(CLOCK_MONOTONIC);

        client_context_read_uid_gid(c, ucred);
        client_context_read_basic(c);
        (void) client_context_read_label(c, label, label_size);

        (void) audit_session_from_pid(c->pid, &c->auditid);
        (void) audit_loginuid_from_pid(c->pid, &c->loginuid);

        (void) client_context_read_cgroup(s, c, unit_id);
        (void) client_context_read_invocation_id(s, c);
        (void) client_context_read_log_level_max(s, c);
        (void) client_context_read_extra_fields(s, c);
        (void) client_context_read_log_rate_limit_interval(c);
        (void) client_context_read_log_rate_limit_burst(c);

        c->timestamp = timestamp;

        if (c->in_lru) {
                assert(c->n_ref == 0);
                assert_se(prioq_reshuffle(s->client_contexts_lru, c, &c->lru_index) >= 0);
        }
}

void client_context_maybe_refresh(
                Server *s,
                ClientContext *c,
                const struct ucred *ucred,
                const char *label, size_t label_size,
                const char *unit_id,
                usec_t timestamp) {

        assert(s);
        assert(c);

        if (timestamp == USEC_INFINITY)
                timestamp = now(CLOCK_MONOTONIC);

        /* No cached data so far? Let's fill it up */
        if (c->timestamp == USEC_INFINITY)
                goto refresh;

        /* If the data isn't pinned and if the cashed data is older than the upper limit, we flush it out
         * entirely. This follows the logic that as long as an entry is pinned the PID reuse is unlikely. */
        if (c->n_ref == 0 && c->timestamp + MAX_USEC < timestamp) {
                client_context_reset(s, c);
                goto refresh;
        }

        /* If the data is older than the lower limit, we refresh, but keep the old data for all we can't update */
        if (c->timestamp + REFRESH_USEC < timestamp)
                goto refresh;

        /* If the data passed along doesn't match the cached data we also do a refresh */
        if (ucred && uid_is_valid(ucred->uid) && c->uid != ucred->uid)
                goto refresh;

        if (ucred && gid_is_valid(ucred->gid) && c->gid != ucred->gid)
                goto refresh;

        if (label_size > 0 && (label_size != c->label_size || memcmp(label, c->label, label_size) != 0))
                goto refresh;

        return;

refresh:
        client_context_really_refresh(s, c, ucred, label, label_size, unit_id, timestamp);
}

static void client_context_try_shrink_to(Server *s, size_t limit) {
        assert(s);

        /* Bring the number of cache entries below the indicated limit, so that we can create a new entry without
         * breaching the limit. Note that we only flush out entries that aren't pinned here. This means the number of
         * cache entries may very well grow beyond the limit, if all entries stored remain pinned. */

        while (hashmap_size(s->client_contexts) > limit) {
                ClientContext *c;

                c = prioq_pop(s->client_contexts_lru);
                if (!c)
                        break; /* All remaining entries are pinned, give up */

                assert(c->in_lru);
                assert(c->n_ref == 0);

                c->in_lru = false;

                client_context_free(s, c);
        }
}

void client_context_flush_all(Server *s) {
        assert(s);

        /* Flush out all remaining entries. This assumes all references are already dropped. */

        s->my_context = client_context_release(s, s->my_context);
        s->pid1_context = client_context_release(s, s->pid1_context);

        client_context_try_shrink_to(s, 0);

        assert(prioq_size(s->client_contexts_lru) == 0);
        assert(hashmap_size(s->client_contexts) == 0);

        s->client_contexts_lru = prioq_free(s->client_contexts_lru);
        s->client_contexts = hashmap_free(s->client_contexts);
}

static int client_context_get_internal(
                Server *s,
                pid_t pid,
                const struct ucred *ucred,
                const char *label, size_t label_len,
                const char *unit_id,
                bool add_ref,
                ClientContext **ret) {

        ClientContext *c;
        int r;

        assert(s);
        assert(ret);

        if (!pid_is_valid(pid))
                return -EINVAL;

        c = hashmap_get(s->client_contexts, PID_TO_PTR(pid));
        if (c) {

                if (add_ref) {
                        if (c->in_lru) {
                                /* The entry wasn't pinned so far, let's remove it from the LRU list then */
                                assert(c->n_ref == 0);
                                assert_se(prioq_remove(s->client_contexts_lru, c, &c->lru_index) >= 0);
                                c->in_lru = false;
                        }

                        c->n_ref++;
                }

                client_context_maybe_refresh(s, c, ucred, label, label_len, unit_id, USEC_INFINITY);

                *ret = c;
                return 0;
        }

        client_context_try_shrink_to(s, CACHE_MAX-1);

        r = client_context_new(s, pid, &c);
        if (r < 0)
                return r;

        if (add_ref)
                c->n_ref++;
        else {
                r = prioq_put(s->client_contexts_lru, c, &c->lru_index);
                if (r < 0) {
                        client_context_free(s, c);
                        return r;
                }

                c->in_lru = true;
        }

        client_context_really_refresh(s, c, ucred, label, label_len, unit_id, USEC_INFINITY);

        *ret = c;
        return 0;
}

int client_context_get(
                Server *s,
                pid_t pid,
                const struct ucred *ucred,
                const char *label, size_t label_len,
                const char *unit_id,
                ClientContext **ret) {

        return client_context_get_internal(s, pid, ucred, label, label_len, unit_id, false, ret);
}

int client_context_acquire(
                Server *s,
                pid_t pid,
                const struct ucred *ucred,
                const char *label, size_t label_len,
                const char *unit_id,
                ClientContext **ret) {

        return client_context_get_internal(s, pid, ucred, label, label_len, unit_id, true, ret);
};

ClientContext *client_context_release(Server *s, ClientContext *c) {
        assert(s);

        if (!c)
                return NULL;

        assert(c->n_ref > 0);
        assert(!c->in_lru);

        c->n_ref--;
        if (c->n_ref > 0)
                return NULL;

        /* The entry is not pinned anymore, let's add it to the LRU prioq if we can. If we can't we'll drop it
         * right-away */

        if (prioq_put(s->client_contexts_lru, c, &c->lru_index) < 0)
                client_context_free(s, c);
        else
                c->in_lru = true;

        return NULL;
}

void client_context_acquire_default(Server *s) {
        int r;

        assert(s);

        /* Ensure that our own and PID1's contexts are always pinned. Our own context is particularly useful to
         * generate driver messages. */

        if (!s->my_context) {
                struct ucred ucred = {
                        .pid = getpid_cached(),
                        .uid = getuid(),
                        .gid = getgid(),
                };

                r = client_context_acquire(s, ucred.pid, &ucred, NULL, 0, NULL, &s->my_context);
                if (r < 0)
                        log_warning_errno(r, "Failed to acquire our own context, ignoring: %m");
        }

        if (!s->pid1_context) {

                r = client_context_acquire(s, 1, NULL, NULL, 0, NULL, &s->pid1_context);
                if (r < 0)
                        log_warning_errno(r, "Failed to acquire PID1's context, ignoring: %m");

        }
}
Commit	Line	Data
53e1b683	1	/* SPDX-License-Identifier: LGPL-2.1+ */
22e3a02b	2
349cc4a5	3	#if HAVE_SELINUX
22e3a02b LP	4	#include <selinux/selinux.h>
	5	#endif
	6
	7	#include "alloc-util.h"
	8	#include "audit-util.h"
	9	#include "cgroup-util.h"
d3070fbd LP	10	#include "fd-util.h"
	11	#include "fileio.h"
	12	#include "fs-util.h"
	13	#include "io-util.h"
	14	#include "journal-util.h"
22e3a02b	15	#include "journald-context.h"
90fc172e	16	#include "parse-util.h"
22e3a02b LP	17	#include "process-util.h"
22e3a02b LP	18	#include "string-util.h"
d3070fbd LP	19	#include "syslog-util.h"
d3070fbd LP	20	#include "unaligned.h"
22e3a02b LP	21	#include "user-util.h"
	22
	23	/* This implements a metadata cache for clients, which are identified by their PID. Requesting metadata through /proc
	24	* is expensive, hence let's cache the data if we can. Note that this means the metadata might be out-of-date when we
	25	* store it, but it might already be anyway, as we request the data asynchronously from /proc at a different time the
	26	* log entry was originally created. We hence just increase the "window of inaccuracy" a bit.
	27	*
	28	* The cache is indexed by the PID. Entries may be "pinned" in the cache, in which case the entries are not removed
	29	* until they are unpinned. Unpinned entries are kept around until cache pressure is seen. Cache entries older than 5s
	30	* are never used (a sad attempt to deal with the UNIX weakness of PIDs reuse), cache entries older than 1s are
	31	* refreshed in an incremental way (meaning: data is reread from /proc, but any old data we can't refresh is not
	32	* flushed out). Data newer than 1s is used immediately without refresh.
	33	*
	34	* Log stream clients (i.e. all clients using the AF_UNIX/SOCK_STREAM stdout/stderr transport) will pin a cache entry
	35	* as long as their socket is connected. Note that cache entries are shared between different transports. That means a
	36	* cache entry pinned for the stream connection logic may be reused for the syslog or native protocols.
	37	*
	38	* Caching metadata like this has two major benefits:
	39	*
	40	* 1. Reading metadata is expensive, and we can thus substantially speed up log processing under flood.
	41	*
	42	* 2. Because metadata caching is shared between stream and datagram transports and stream connections pin a cache
	43	* entry there's a good chance we can properly map a substantial set of datagram log messages to their originating
	44	* service, as all services (unless explicitly configured otherwise) will have their stdout/stderr connected to a
	45	* stream connection. This should improve cases where a service process logs immediately before exiting and we
	46	* previously had trouble associating the log message with the service.
	47	*
	48	* NB: With and without the metadata cache: the implicitly added entry metadata in the journal (with the exception of
	49	* UID/PID/GID and SELinux label) must be understood as possibly slightly out of sync (i.e. sometimes slighly older
	50	* and sometimes slightly newer than what was current at the log event).
	51	*/
	52
	53	/* We refresh every 1s */
	54	#define REFRESH_USEC (1*USEC_PER_SEC)
	55
	56	/* Data older than 5s we flush out */
	57	#define MAX_USEC (5*USEC_PER_SEC)
	58
	59	/* Keep at most 16K entries in the cache. (Note though that this limit may be violated if enough streams pin entries in
	60	* the cache, in which case we do permit this limit to be breached. That's safe however, as the number of stream
	61	* clients itself is limited.) */
	62	#define CACHE_MAX (16*1024)
	63
	64	static int client_context_compare(const void a, const void b) {
	65	const ClientContext x = a, y = b;
9c57a73b	66	int r;
22e3a02b	67
9c57a73b YW	68	r = CMP(x->timestamp, y->timestamp);
	69	if (r != 0)
	70	return r;
22e3a02b	71
9c57a73b	72	return CMP(x->pid, y->pid);
22e3a02b LP	73	}
	74
	75	static int client_context_new(Server s, pid_t pid, ClientContext *ret) {
	76	ClientContext *c;
	77	int r;
	78
	79	assert(s);
	80	assert(pid_is_valid(pid));
	81	assert(ret);
	82
	83	r = hashmap_ensure_allocated(&s->client_contexts, NULL);
	84	if (r < 0)
	85	return r;
	86
	87	r = prioq_ensure_allocated(&s->client_contexts_lru, client_context_compare);
	88	if (r < 0)
	89	return r;
	90
	91	c = new0(ClientContext, 1);
	92	if (!c)
	93	return -ENOMEM;
	94
	95	c->pid = pid;
	96
	97	c->uid = UID_INVALID;
	98	c->gid = GID_INVALID;
	99	c->auditid = AUDIT_SESSION_INVALID;
	100	c->loginuid = UID_INVALID;
	101	c->owner_uid = UID_INVALID;
	102	c->lru_index = PRIOQ_IDX_NULL;
	103	c->timestamp = USEC_INFINITY;
d3070fbd LP	104	c->extra_fields_mtime = NSEC_INFINITY;
d3070fbd LP	105	c->log_level_max = -1;
90fc172e AZ	106	c->log_rate_limit_interval = s->rate_limit_interval;
90fc172e AZ	107	c->log_rate_limit_burst = s->rate_limit_burst;
22e3a02b LP	108
	109	r = hashmap_put(s->client_contexts, PID_TO_PTR(pid), c);
	110	if (r < 0) {
	111	free(c);
	112	return r;
	113	}
	114
	115	*ret = c;
	116	return 0;
	117	}
	118
90fc172e AZ	119	static void client_context_reset(Server s, ClientContext c) {
90fc172e AZ	120	assert(s);
22e3a02b LP	121	assert(c);
	122
	123	c->timestamp = USEC_INFINITY;
	124
	125	c->uid = UID_INVALID;
	126	c->gid = GID_INVALID;
	127
	128	c->comm = mfree(c->comm);
	129	c->exe = mfree(c->exe);
	130	c->cmdline = mfree(c->cmdline);
	131	c->capeff = mfree(c->capeff);
	132
	133	c->auditid = AUDIT_SESSION_INVALID;
	134	c->loginuid = UID_INVALID;
	135
	136	c->cgroup = mfree(c->cgroup);
	137	c->session = mfree(c->session);
	138	c->owner_uid = UID_INVALID;
	139	c->unit = mfree(c->unit);
	140	c->user_unit = mfree(c->user_unit);
	141	c->slice = mfree(c->slice);
	142	c->user_slice = mfree(c->user_slice);
	143
	144	c->invocation_id = SD_ID128_NULL;
	145
	146	c->label = mfree(c->label);
	147	c->label_size = 0;
d3070fbd LP	148
	149	c->extra_fields_iovec = mfree(c->extra_fields_iovec);
	150	c->extra_fields_n_iovec = 0;
	151	c->extra_fields_data = mfree(c->extra_fields_data);
	152	c->extra_fields_mtime = NSEC_INFINITY;
	153
	154	c->log_level_max = -1;
90fc172e AZ	155
	156	c->log_rate_limit_interval = s->rate_limit_interval;
	157	c->log_rate_limit_burst = s->rate_limit_burst;
22e3a02b LP	158	}
	159
	160	static ClientContext* client_context_free(Server s, ClientContext c) {
	161	assert(s);
	162
	163	if (!c)
	164	return NULL;
	165
	166	assert_se(hashmap_remove(s->client_contexts, PID_TO_PTR(c->pid)) == c);
	167
	168	if (c->in_lru)
	169	assert_se(prioq_remove(s->client_contexts_lru, c, &c->lru_index) >= 0);
	170
90fc172e	171	client_context_reset(s, c);
22e3a02b LP	172
	173	return mfree(c);
	174	}
	175
	176	static void client_context_read_uid_gid(ClientContext c, const struct ucred ucred) {
	177	assert(c);
	178	assert(pid_is_valid(c->pid));
	179
	180	/* The ucred data passed in is always the most current and accurate, if we have any. Use it. */
	181	if (ucred && uid_is_valid(ucred->uid))
	182	c->uid = ucred->uid;
	183	else
	184	(void) get_process_uid(c->pid, &c->uid);
	185
	186	if (ucred && gid_is_valid(ucred->gid))
	187	c->gid = ucred->gid;
	188	else
	189	(void) get_process_gid(c->pid, &c->gid);
	190	}
	191
	192	static void client_context_read_basic(ClientContext *c) {
	193	char *t;
	194
	195	assert(c);
	196	assert(pid_is_valid(c->pid));
	197
	198	if (get_process_comm(c->pid, &t) >= 0)
	199	free_and_replace(c->comm, t);
	200
	201	if (get_process_exe(c->pid, &t) >= 0)
	202	free_and_replace(c->exe, t);
	203
	204	if (get_process_cmdline(c->pid, 0, false, &t) >= 0)
	205	free_and_replace(c->cmdline, t);
	206
	207	if (get_process_capeff(c->pid, &t) >= 0)
	208	free_and_replace(c->capeff, t);
	209	}
	210
	211	static int client_context_read_label(
	212	ClientContext *c,
	213	const char *label, size_t label_size) {
	214
	215	assert(c);
	216	assert(pid_is_valid(c->pid));
	217	assert(label_size == 0 \|\| label);
	218
	219	if (label_size > 0) {
	220	char *l;
	221
	222	/* If we got an SELinux label passed in it counts. */
	223
	224	l = newdup_suffix0(char, label, label_size);
	225	if (!l)
	226	return -ENOMEM;
	227
	228	free_and_replace(c->label, l);
	229	c->label_size = label_size;
	230	}
349cc4a5	231	#if HAVE_SELINUX
22e3a02b LP	232	else {
	233	char *con;
	234
	235	/* If we got no SELinux label passed in, let's try to acquire one */
	236
	237	if (getpidcon(c->pid, &con) >= 0) {
	238	free_and_replace(c->label, con);
	239	c->label_size = strlen(c->label);
	240	}
	241	}
	242	#endif
	243
	244	return 0;
	245	}
	246
	247	static int client_context_read_cgroup(Server s, ClientContext c, const char *unit_id) {
	248	char *t = NULL;
	249	int r;
	250
	251	assert(c);
	252
	253	/* Try to acquire the current cgroup path */
	254	r = cg_pid_get_path_shifted(c->pid, s->cgroup_root, &t);
	255	if (r < 0) {
	256
	257	/* If that didn't work, we use the unit ID passed in as fallback, if we have nothing cached yet */
	258	if (unit_id && !c->unit) {
	259	c->unit = strdup(unit_id);
	260	if (c->unit)
	261	return 0;
	262	}
	263
	264	return r;
	265	}
	266
	267	/* Let's shortcut this if the cgroup path didn't change */
	268	if (streq_ptr(c->cgroup, t)) {
	269	free(t);
	270	return 0;
	271	}
	272
	273	free_and_replace(c->cgroup, t);
	274
	275	(void) cg_path_get_session(c->cgroup, &t);
	276	free_and_replace(c->session, t);
	277
	278	if (cg_path_get_owner_uid(c->cgroup, &c->owner_uid) < 0)
	279	c->owner_uid = UID_INVALID;
	280
	281	(void) cg_path_get_unit(c->cgroup, &t);
	282	free_and_replace(c->unit, t);
	283
	284	(void) cg_path_get_user_unit(c->cgroup, &t);
	285	free_and_replace(c->user_unit, t);
	286
	287	(void) cg_path_get_slice(c->cgroup, &t);
	288	free_and_replace(c->slice, t);
	289
	290	(void) cg_path_get_user_slice(c->cgroup, &t);
	291	free_and_replace(c->user_slice, t);
	292
	293	return 0;
	294	}
	295
296	static int client_context_read_invocation_id(
297	Server *s,
298	ClientContext *c) {
299
d3070fbd	300	_cleanup_free_ char *value = NULL;
22e3a02b LP	301	const char *p;
	302	int r;
	303
	304	assert(s);
	305	assert(c);
	306
d3070fbd	307	/* Read the invocation ID of a unit off a unit. PID 1 stores it in a per-unit symlink in /run/systemd/units/ */
22e3a02b	308
d3070fbd	309	if (!c->unit)
22e3a02b LP	310	return 0;
22e3a02b LP	311
d3070fbd LP	312	p = strjoina("/run/systemd/units/invocation:", c->unit);
d3070fbd LP	313	r = readlink_malloc(p, &value);
22e3a02b LP	314	if (r < 0)
	315	return r;
	316
d3070fbd LP	317	return sd_id128_from_string(value, &c->invocation_id);
d3070fbd LP	318	}
22e3a02b	319
d3070fbd LP	320	static int client_context_read_log_level_max(
	321	Server *s,
	322	ClientContext *c) {
22e3a02b	323
d3070fbd LP	324	_cleanup_free_ char *value = NULL;
	325	const char *p;
	326	int r, ll;
	327
	328	if (!c->unit)
	329	return 0;
	330
	331	p = strjoina("/run/systemd/units/log-level-max:", c->unit);
	332	r = readlink_malloc(p, &value);
22e3a02b LP	333	if (r < 0)
22e3a02b LP	334	return r;
d3070fbd LP	335
	336	ll = log_level_from_string(value);
	337	if (ll < 0)
22e3a02b	338	return -EINVAL;
22e3a02b	339
d3070fbd LP	340	c->log_level_max = ll;
	341	return 0;
	342	}
	343
	344	static int client_context_read_extra_fields(
	345	Server *s,
	346	ClientContext *c) {
	347
	348	size_t size = 0, n_iovec = 0, n_allocated = 0, left;
	349	_cleanup_free_ struct iovec *iovec = NULL;
	350	_cleanup_free_ void *data = NULL;
	351	_cleanup_fclose_ FILE *f = NULL;
	352	struct stat st;
	353	const char *p;
	354	uint8_t *q;
	355	int r;
	356
	357	if (!c->unit)
	358	return 0;
	359
	360	p = strjoina("/run/systemd/units/log-extra-fields:", c->unit);
	361
	362	if (c->extra_fields_mtime != NSEC_INFINITY) {
	363	if (stat(p, &st) < 0) {
	364	if (errno == ENOENT)
	365	return 0;
	366
	367	return -errno;
	368	}
	369
	370	if (timespec_load_nsec(&st.st_mtim) == c->extra_fields_mtime)
	371	return 0;
	372	}
	373
	374	f = fopen(p, "re");
	375	if (!f) {
	376	if (errno == ENOENT)
	377	return 0;
	378
	379	return -errno;
	380	}
	381
	382	if (fstat(fileno(f), &st) < 0) /* The file might have been replaced since the stat() above, let's get a new
	383	* one, that matches the stuff we are reading */
	384	return -errno;
	385
	386	r = read_full_stream(f, (char**) &data, &size);
	387	if (r < 0)
	388	return r;
	389
	390	q = data, left = size;
	391	while (left > 0) {
	392	uint8_t field, eq;
	393	uint64_t v, n;
	394
	395	if (left < sizeof(uint64_t))
	396	return -EBADMSG;
	397
	398	v = unaligned_read_le64(q);
	399	if (v < 2)
	400	return -EBADMSG;
	401
	402	n = sizeof(uint64_t) + v;
	403	if (left < n)
404	return -EBADMSG;
405
406	field = q + sizeof(uint64_t);
407
408	eq = memchr(field, '=', v);
409	if (!eq)
410	return -EBADMSG;
411
412	if (!journal_field_valid((const char *) field, eq - field, false))
413	return -EBADMSG;
414
415	if (!GREEDY_REALLOC(iovec, n_allocated, n_iovec+1))
416	return -ENOMEM;
417
418	iovec[n_iovec++] = IOVEC_MAKE(field, v);
419
420	left -= n, q += n;
421	}
422
423	free(c->extra_fields_iovec);
424	free(c->extra_fields_data);
425
1cc6c93a	426	c->extra_fields_iovec = TAKE_PTR(iovec);
d3070fbd	427	c->extra_fields_n_iovec = n_iovec;
1cc6c93a	428	c->extra_fields_data = TAKE_PTR(data);
d3070fbd LP	429	c->extra_fields_mtime = timespec_load_nsec(&st.st_mtim);
d3070fbd LP	430
d3070fbd	431	return 0;
22e3a02b LP	432	}
22e3a02b LP	433
90fc172e AZ	434	static int client_context_read_log_rate_limit_interval(ClientContext *c) {
	435	_cleanup_free_ char *value = NULL;
	436	const char *p;
	437	int r;
	438
	439	assert(c);
	440
	441	if (!c->unit)
	442	return 0;
	443
	444	p = strjoina("/run/systemd/units/log-rate-limit-interval:", c->unit);
	445	r = readlink_malloc(p, &value);
	446	if (r < 0)
	447	return r;
	448
	449	return safe_atou64(value, &c->log_rate_limit_interval);
	450	}
	451
	452	static int client_context_read_log_rate_limit_burst(ClientContext *c) {
	453	_cleanup_free_ char *value = NULL;
	454	const char *p;
	455	int r;
	456
	457	assert(c);
	458
	459	if (!c->unit)
	460	return 0;
	461
	462	p = strjoina("/run/systemd/units/log-rate-limit-burst:", c->unit);
	463	r = readlink_malloc(p, &value);
	464	if (r < 0)
	465	return r;
	466
	467	return safe_atou(value, &c->log_rate_limit_burst);
	468	}
	469
22e3a02b LP	470	static void client_context_really_refresh(
	471	Server *s,
	472	ClientContext *c,
	473	const struct ucred *ucred,
	474	const char *label, size_t label_size,
	475	const char *unit_id,
	476	usec_t timestamp) {
	477
	478	assert(s);
	479	assert(c);
	480	assert(pid_is_valid(c->pid));
	481
	482	if (timestamp == USEC_INFINITY)
	483	timestamp = now(CLOCK_MONOTONIC);
	484
	485	client_context_read_uid_gid(c, ucred);
	486	client_context_read_basic(c);
	487	(void) client_context_read_label(c, label, label_size);
	488
	489	(void) audit_session_from_pid(c->pid, &c->auditid);
	490	(void) audit_loginuid_from_pid(c->pid, &c->loginuid);
	491
	492	(void) client_context_read_cgroup(s, c, unit_id);
	493	(void) client_context_read_invocation_id(s, c);
d3070fbd LP	494	(void) client_context_read_log_level_max(s, c);
d3070fbd LP	495	(void) client_context_read_extra_fields(s, c);
90fc172e AZ	496	(void) client_context_read_log_rate_limit_interval(c);
90fc172e AZ	497	(void) client_context_read_log_rate_limit_burst(c);
22e3a02b LP	498
	499	c->timestamp = timestamp;
	500
	501	if (c->in_lru) {
	502	assert(c->n_ref == 0);
	503	assert_se(prioq_reshuffle(s->client_contexts_lru, c, &c->lru_index) >= 0);
	504	}
	505	}
	506
	507	void client_context_maybe_refresh(
	508	Server *s,
	509	ClientContext *c,
	510	const struct ucred *ucred,
	511	const char *label, size_t label_size,
	512	const char *unit_id,
	513	usec_t timestamp) {
	514
	515	assert(s);
	516	assert(c);
	517
	518	if (timestamp == USEC_INFINITY)
	519	timestamp = now(CLOCK_MONOTONIC);
	520
	521	/* No cached data so far? Let's fill it up */
	522	if (c->timestamp == USEC_INFINITY)
	523	goto refresh;
	524
	525	/* If the data isn't pinned and if the cashed data is older than the upper limit, we flush it out
	526	* entirely. This follows the logic that as long as an entry is pinned the PID reuse is unlikely. */
	527	if (c->n_ref == 0 && c->timestamp + MAX_USEC < timestamp) {
90fc172e	528	client_context_reset(s, c);
22e3a02b LP	529	goto refresh;
	530	}
	531
	532	/* If the data is older than the lower limit, we refresh, but keep the old data for all we can't update */
	533	if (c->timestamp + REFRESH_USEC < timestamp)
	534	goto refresh;
	535
	536	/* If the data passed along doesn't match the cached data we also do a refresh */
	537	if (ucred && uid_is_valid(ucred->uid) && c->uid != ucred->uid)
	538	goto refresh;
	539
	540	if (ucred && gid_is_valid(ucred->gid) && c->gid != ucred->gid)
	541	goto refresh;
	542
	543	if (label_size > 0 && (label_size != c->label_size \|\| memcmp(label, c->label, label_size) != 0))
	544	goto refresh;
	545
	546	return;
	547
	548	refresh:
	549	client_context_really_refresh(s, c, ucred, label, label_size, unit_id, timestamp);
	550	}
	551
	552	static void client_context_try_shrink_to(Server *s, size_t limit) {
	553	assert(s);
	554
	555	/* Bring the number of cache entries below the indicated limit, so that we can create a new entry without
	556	* breaching the limit. Note that we only flush out entries that aren't pinned here. This means the number of
	557	* cache entries may very well grow beyond the limit, if all entries stored remain pinned. */
	558
	559	while (hashmap_size(s->client_contexts) > limit) {
	560	ClientContext *c;
	561
	562	c = prioq_pop(s->client_contexts_lru);
	563	if (!c)
	564	break; /* All remaining entries are pinned, give up */
	565
	566	assert(c->in_lru);
	567	assert(c->n_ref == 0);
	568
	569	c->in_lru = false;
	570
	571	client_context_free(s, c);
	572	}
	573	}
	574
	575	void client_context_flush_all(Server *s) {
	576	assert(s);
	577
	578	/* Flush out all remaining entries. This assumes all references are already dropped. */
	579
	580	s->my_context = client_context_release(s, s->my_context);
	581	s->pid1_context = client_context_release(s, s->pid1_context);
	582
	583	client_context_try_shrink_to(s, 0);
	584
	585	assert(prioq_size(s->client_contexts_lru) == 0);
	586	assert(hashmap_size(s->client_contexts) == 0);
	587
	588	s->client_contexts_lru = prioq_free(s->client_contexts_lru);
	589	s->client_contexts = hashmap_free(s->client_contexts);
	590	}
	591
	592	static int client_context_get_internal(
593	Server *s,
594	pid_t pid,
595	const struct ucred *ucred,
596	const char *label, size_t label_len,
597	const char *unit_id,
598	bool add_ref,
599	ClientContext **ret) {
600
601	ClientContext *c;
602	int r;
603
604	assert(s);
605	assert(ret);
606
607	if (!pid_is_valid(pid))
608	return -EINVAL;
609
610	c = hashmap_get(s->client_contexts, PID_TO_PTR(pid));
611	if (c) {
612
613	if (add_ref) {
614	if (c->in_lru) {
615	/* The entry wasn't pinned so far, let's remove it from the LRU list then */
616	assert(c->n_ref == 0);
617	assert_se(prioq_remove(s->client_contexts_lru, c, &c->lru_index) >= 0);
618	c->in_lru = false;
619	}
620
621	c->n_ref++;
622	}
623
624	client_context_maybe_refresh(s, c, ucred, label, label_len, unit_id, USEC_INFINITY);
625
626	*ret = c;
627	return 0;
628	}
629
630	client_context_try_shrink_to(s, CACHE_MAX-1);
631
632	r = client_context_new(s, pid, &c);
633	if (r < 0)
634	return r;
635
636	if (add_ref)
637	c->n_ref++;
638	else {
639	r = prioq_put(s->client_contexts_lru, c, &c->lru_index);
640	if (r < 0) {
641	client_context_free(s, c);
642	return r;
643	}
644
645	c->in_lru = true;
646	}
647
648	client_context_really_refresh(s, c, ucred, label, label_len, unit_id, USEC_INFINITY);
649
650	*ret = c;
651	return 0;
652	}
653
654	int client_context_get(
655	Server *s,
656	pid_t pid,
657	const struct ucred *ucred,
658	const char *label, size_t label_len,
659	const char *unit_id,
660	ClientContext **ret) {
661
662	return client_context_get_internal(s, pid, ucred, label, label_len, unit_id, false, ret);
663	}
664
665	int client_context_acquire(
666	Server *s,
667	pid_t pid,
668	const struct ucred *ucred,
669	const char *label, size_t label_len,
670	const char *unit_id,
671	ClientContext **ret) {
672
673	return client_context_get_internal(s, pid, ucred, label, label_len, unit_id, true, ret);
674	};
675
676	ClientContext client_context_release(Server s, ClientContext *c) {
677	assert(s);
678
679	if (!c)
680	return NULL;
681
682	assert(c->n_ref > 0);
683	assert(!c->in_lru);
684
685	c->n_ref--;
686	if (c->n_ref > 0)
687	return NULL;
688
689	/* The entry is not pinned anymore, let's add it to the LRU prioq if we can. If we can't we'll drop it
690	* right-away */
691
692	if (prioq_put(s->client_contexts_lru, c, &c->lru_index) < 0)
693	client_context_free(s, c);
694	else
695	c->in_lru = true;
696
697	return NULL;
698	}
699
700	void client_context_acquire_default(Server *s) {
701	int r;
702
703	assert(s);
704
705	/* Ensure that our own and PID1's contexts are always pinned. Our own context is particularly useful to
706	* generate driver messages. */
707
708	if (!s->my_context) {
709	struct ucred ucred = {
710	.pid = getpid_cached(),
711	.uid = getuid(),
712	.gid = getgid(),
713	};
714
715	r = client_context_acquire(s, ucred.pid, &ucred, NULL, 0, NULL, &s->my_context);
716	if (r < 0)
717	log_warning_errno(r, "Failed to acquire our own context, ignoring: %m");
718	}
719
720	if (!s->pid1_context) {
721
722	r = client_context_acquire(s, 1, NULL, NULL, 0, NULL, &s->pid1_context);
723	if (r < 0)
724	log_warning_errno(r, "Failed to acquire PID1's context, ignoring: %m");
725
726	}
727	}