]> git.ipfire.org Git - thirdparty/systemd.git/blobdiff - src/journal/journald-server.c
core: add "invocation ID" concept to service manager
[thirdparty/systemd.git] / src / journal / journald-server.c
index 5e120fdac08cec659ed6287051a5f6b35f31c22f..f01cf1d93701562897f98b3e0d0a9db3e468e55d 100644 (file)
@@ -44,6 +44,7 @@
 #include "fs-util.h"
 #include "hashmap.h"
 #include "hostname-util.h"
+#include "id128-util.h"
 #include "io-util.h"
 #include "journal-authenticate.h"
 #include "journal-file.h"
@@ -56,6 +57,7 @@
 #include "journald-server.h"
 #include "journald-stream.h"
 #include "journald-syslog.h"
+#include "log.h"
 #include "missing.h"
 #include "mkdir.h"
 #include "parse-util.h"
@@ -69,7 +71,6 @@
 #include "string-table.h"
 #include "string-util.h"
 #include "user-util.h"
-#include "log.h"
 
 #define USER_JOURNALS_MAX 1024
 
@@ -251,9 +252,9 @@ static int open_journal(
         assert(ret);
 
         if (reliably)
-                r = journal_file_open_reliably(fname, flags, 0640, s->compress, seal, metrics, s->mmap, NULL, &f);
+                r = journal_file_open_reliably(fname, flags, 0640, s->compress, seal, metrics, s->mmap, s->deferred_closes, NULL, &f);
         else
-                r = journal_file_open(fname, flags, 0640, s->compress, seal, metrics, s->mmap, NULL, &f);
+                r = journal_file_open(-1, fname, flags, 0640, s->compress, seal, metrics, s->mmap, s->deferred_closes, NULL, &f);
         if (r < 0)
                 return r;
 
@@ -267,6 +268,96 @@ static int open_journal(
         return r;
 }
 
+static bool flushed_flag_is_set(void) {
+        return (access("/run/systemd/journal/flushed", F_OK) >= 0);
+}
+
+static int system_journal_open(Server *s, bool flush_requested) {
+        bool flushed = false;
+        const char *fn;
+        int r = 0;
+
+        if (!s->system_journal &&
+            (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
+            (flush_requested || (flushed = flushed_flag_is_set()))) {
+
+                /* If in auto mode: first try to create the machine
+                 * path, but not the prefix.
+                 *
+                 * If in persistent mode: create /var/log/journal and
+                 * the machine path */
+
+                if (s->storage == STORAGE_PERSISTENT)
+                        (void) mkdir_p("/var/log/journal/", 0755);
+
+                fn = strjoina("/var/log/journal/", SERVER_MACHINE_ID(s));
+                (void) mkdir(fn, 0755);
+
+                fn = strjoina(fn, "/system.journal");
+                r = open_journal(s, true, fn, O_RDWR|O_CREAT, s->seal, &s->system_metrics, &s->system_journal);
+                if (r >= 0) {
+                        server_add_acls(s->system_journal, 0);
+                        (void) determine_space_for(s, &s->system_metrics, "/var/log/journal/", "System journal", true, true, NULL, NULL);
+                } else if (r < 0) {
+                        if (r != -ENOENT && r != -EROFS)
+                                log_warning_errno(r, "Failed to open system journal: %m");
+
+                        r = 0;
+                }
+
+                /* If the runtime journal is open, and we're post-flush, we're
+                 * recovering from a failed system journal rotate (ENOSPC)
+                 * for which the runtime journal was reopened.
+                 *
+                 * Perform an implicit flush to var, leaving the runtime
+                 * journal closed, now that the system journal is back.
+                 */
+                if (s->runtime_journal && flushed)
+                        (void) server_flush_to_var(s);
+        }
+
+        if (!s->runtime_journal &&
+            (s->storage != STORAGE_NONE)) {
+
+                fn = strjoina("/run/log/journal/", SERVER_MACHINE_ID(s), "/system.journal");
+
+                if (s->system_journal) {
+
+                        /* Try to open the runtime journal, but only
+                         * if it already exists, so that we can flush
+                         * it into the system journal */
+
+                        r = open_journal(s, false, fn, O_RDWR, false, &s->runtime_metrics, &s->runtime_journal);
+                        if (r < 0) {
+                                if (r != -ENOENT)
+                                        log_warning_errno(r, "Failed to open runtime journal: %m");
+
+                                r = 0;
+                        }
+
+                } else {
+
+                        /* OK, we really need the runtime journal, so create
+                         * it if necessary. */
+
+                        (void) mkdir("/run/log", 0755);
+                        (void) mkdir("/run/log/journal", 0755);
+                        (void) mkdir_parents(fn, 0750);
+
+                        r = open_journal(s, true, fn, O_RDWR|O_CREAT, false, &s->runtime_metrics, &s->runtime_journal);
+                        if (r < 0)
+                                return log_error_errno(r, "Failed to open runtime journal: %m");
+                }
+
+                if (s->runtime_journal) {
+                        server_add_acls(s->runtime_journal, 0);
+                        (void) determine_space_for(s, &s->runtime_metrics, "/run/log/journal/", "Runtime journal", true, true, NULL, NULL);
+                }
+        }
+
+        return r;
+}
+
 static JournalFile* find_journal(Server *s, uid_t uid) {
         _cleanup_free_ char *p = NULL;
         int r;
@@ -275,6 +366,17 @@ static JournalFile* find_journal(Server *s, uid_t uid) {
 
         assert(s);
 
+        /* A rotate that fails to create the new journal (ENOSPC) leaves the
+         * rotated journal as NULL.  Unless we revisit opening, even after
+         * space is made available we'll continue to return NULL indefinitely.
+         *
+         * system_journal_open() is a noop if the journals are already open, so
+         * we can just call it here to recover from failed rotates (or anything
+         * else that's left the journals as NULL).
+         *
+         * Fixes https://github.com/systemd/systemd/issues/3968 */
+        (void) system_journal_open(s, false);
+
         /* We split up user logs only on /var, not on /run. If the
          * runtime file is open, we write to it exclusively, in order
          * to guarantee proper order as soon as we flush /run to
@@ -283,7 +385,7 @@ static JournalFile* find_journal(Server *s, uid_t uid) {
         if (s->runtime_journal)
                 return s->runtime_journal;
 
-        if (uid <= SYSTEM_UID_MAX)
+        if (uid <= SYSTEM_UID_MAX || uid_is_dynamic(uid))
                 return s->system_journal;
 
         r = sd_id128_get_machine(&machine);
@@ -333,7 +435,7 @@ static int do_rotate(
         if (!*f)
                 return -EINVAL;
 
-        r = journal_file_rotate(f, s->compress, seal);
+        r = journal_file_rotate(f, s->compress, seal, s->deferred_closes);
         if (r < 0)
                 if (*f)
                         log_error_errno(r, "Failed to rotate %s: %m", (*f)->path);
@@ -364,6 +466,13 @@ void server_rotate(Server *s) {
                         /* Old file has been closed and deallocated */
                         ordered_hashmap_remove(s->user_journals, k);
         }
+
+        /* Perform any deferred closes which aren't still offlining. */
+        SET_FOREACH(f, s->deferred_closes, i)
+                if (!journal_file_is_offlining(f)) {
+                        (void) set_remove(s->deferred_closes, f);
+                        (void) journal_file_close(f);
+                }
 }
 
 void server_sync(Server *s) {
@@ -372,13 +481,13 @@ void server_sync(Server *s) {
         int r;
 
         if (s->system_journal) {
-                r = journal_file_set_offline(s->system_journal);
+                r = journal_file_set_offline(s->system_journal, false);
                 if (r < 0)
                         log_warning_errno(r, "Failed to sync system journal, ignoring: %m");
         }
 
         ORDERED_HASHMAP_FOREACH(f, s->user_journals, i) {
-                r = journal_file_set_offline(f);
+                r = journal_file_set_offline(f, false);
                 if (r < 0)
                         log_warning_errno(r, "Failed to sync user journal, ignoring: %m");
         }
@@ -485,38 +594,36 @@ static void server_cache_hostname(Server *s) {
 }
 
 static bool shall_try_append_again(JournalFile *f, int r) {
-
-        /* -E2BIG            Hit configured limit
-           -EFBIG            Hit fs limit
-           -EDQUOT           Quota limit hit
-           -ENOSPC           Disk full
-           -EIO              I/O error of some kind (mmap)
-           -EHOSTDOWN        Other machine
-           -EBUSY            Unclean shutdown
-           -EPROTONOSUPPORT  Unsupported feature
-           -EBADMSG          Corrupted
-           -ENODATA          Truncated
-           -ESHUTDOWN        Already archived
-           -EIDRM            Journal file has been deleted */
-
-        if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
+        switch(r) {
+        case -E2BIG:           /* Hit configured limit          */
+        case -EFBIG:           /* Hit fs limit                  */
+        case -EDQUOT:          /* Quota limit hit               */
+        case -ENOSPC:          /* Disk full                     */
                 log_debug("%s: Allocation limit reached, rotating.", f->path);
-        else if (r == -EHOSTDOWN)
+                return true;
+        case -EIO:             /* I/O error of some kind (mmap) */
+                log_warning("%s: IO error, rotating.", f->path);
+                return true;
+        case -EHOSTDOWN:       /* Other machine                 */
                 log_info("%s: Journal file from other machine, rotating.", f->path);
-        else if (r == -EBUSY)
+                return true;
+        case -EBUSY:           /* Unclean shutdown              */
                 log_info("%s: Unclean shutdown, rotating.", f->path);
-        else if (r == -EPROTONOSUPPORT)
+                return true;
+        case -EPROTONOSUPPORT: /* Unsupported feature           */
                 log_info("%s: Unsupported feature, rotating.", f->path);
-        else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
+                return true;
+        case -EBADMSG:         /* Corrupted                     */
+        case -ENODATA:         /* Truncated                     */
+        case -ESHUTDOWN:       /* Already archived              */
                 log_warning("%s: Journal file corrupted, rotating.", f->path);
-        else if (r == -EIO)
-                log_warning("%s: IO error, rotating.", f->path);
-        else if (r == -EIDRM)
+                return true;
+        case -EIDRM:           /* Journal file has been deleted */
                 log_warning("%s: Journal file has been deleted, rotating.", f->path);
-        else
+                return true;
+        default:
                 return false;
-
-        return true;
+        }
 }
 
 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
@@ -569,6 +676,44 @@ static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned
                 server_schedule_sync(s, priority);
 }
 
+static int get_invocation_id(const char *cgroup_root, const char *slice, const char *unit, char **ret) {
+        _cleanup_free_ char *escaped = NULL, *slice_path = NULL, *p = NULL;
+        char *copy, ids[SD_ID128_STRING_MAX];
+        int r;
+
+        /* Read the invocation ID of a unit off a unit. It's stored in the "trusted.invocation_id" extended attribute
+         * on the cgroup path. */
+
+        r = cg_slice_to_path(slice, &slice_path);
+        if (r < 0)
+                return r;
+
+        escaped = cg_escape(unit);
+        if (!escaped)
+                return -ENOMEM;
+
+        p = strjoin(cgroup_root, "/", slice_path, "/", escaped, NULL);
+        if (!p)
+                return -ENOMEM;
+
+        r = cg_get_xattr(SYSTEMD_CGROUP_CONTROLLER, p, "trusted.invocation_id", ids, 32);
+        if (r < 0)
+                return r;
+        if (r != 32)
+                return -EINVAL;
+        ids[32] = 0;
+
+        if (!id128_is_valid(ids))
+                return -EINVAL;
+
+        copy = strdup(ids);
+        if (!copy)
+                return -ENOMEM;
+
+        *ret = copy;
+        return 0;
+}
+
 static void dispatch_message_real(
                 Server *s,
                 struct iovec *iovec, unsigned n, unsigned m,
@@ -607,7 +752,7 @@ static void dispatch_message_real(
         assert(s);
         assert(iovec);
         assert(n > 0);
-        assert(n + N_IOVEC_META_FIELDS + (object_pid ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
+        assert(n + N_IOVEC_META_FIELDS + (object_pid > 0 ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
 
         if (ucred) {
                 realuid = ucred->uid;
@@ -665,6 +810,7 @@ static void dispatch_message_real(
 
                 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &c);
                 if (r >= 0) {
+                        _cleanup_free_ char *raw_unit = NULL, *raw_slice = NULL;
                         char *session = NULL;
 
                         x = strjoina("_SYSTEMD_CGROUP=", c);
@@ -684,9 +830,8 @@ static void dispatch_message_real(
                                 IOVEC_SET_STRING(iovec[n++], owner_uid);
                         }
 
-                        if (cg_path_get_unit(c, &t) >= 0) {
-                                x = strjoina("_SYSTEMD_UNIT=", t);
-                                free(t);
+                        if (cg_path_get_unit(c, &raw_unit) >= 0) {
+                                x = strjoina("_SYSTEMD_UNIT=", raw_unit);
                                 IOVEC_SET_STRING(iovec[n++], x);
                         } else if (unit_id && !session) {
                                 x = strjoina("_SYSTEMD_UNIT=", unit_id);
@@ -702,12 +847,25 @@ static void dispatch_message_real(
                                 IOVEC_SET_STRING(iovec[n++], x);
                         }
 
-                        if (cg_path_get_slice(c, &t) >= 0) {
-                                x = strjoina("_SYSTEMD_SLICE=", t);
+                        if (cg_path_get_slice(c, &raw_slice) >= 0) {
+                                x = strjoina("_SYSTEMD_SLICE=", raw_slice);
+                                IOVEC_SET_STRING(iovec[n++], x);
+                        }
+
+                        if (cg_path_get_user_slice(c, &t) >= 0) {
+                                x = strjoina("_SYSTEMD_USER_SLICE=", t);
                                 free(t);
                                 IOVEC_SET_STRING(iovec[n++], x);
                         }
 
+                        if (raw_slice && raw_unit) {
+                                if (get_invocation_id(s->cgroup_root, raw_slice, raw_unit, &t) >= 0) {
+                                        x = strjoina("_SYSTEMD_INVOCATION_ID=", t);
+                                        free(t);
+                                        IOVEC_SET_STRING(iovec[n++], x);
+                                }
+                        }
+
                         free(c);
                 } else if (unit_id) {
                         x = strjoina("_SYSTEMD_UNIT=", unit_id);
@@ -722,7 +880,7 @@ static void dispatch_message_real(
                                 *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
                                 IOVEC_SET_STRING(iovec[n++], x);
                         } else {
-                                security_context_t con;
+                                char *con;
 
                                 if (getpidcon(ucred->pid, &con) >= 0) {
                                         x = strjoina("_SELINUX_CONTEXT=", con);
@@ -813,13 +971,25 @@ static void dispatch_message_real(
                                 IOVEC_SET_STRING(iovec[n++], x);
                         }
 
+                        if (cg_path_get_slice(c, &t) >= 0) {
+                                x = strjoina("OBJECT_SYSTEMD_SLICE=", t);
+                                free(t);
+                                IOVEC_SET_STRING(iovec[n++], x);
+                        }
+
+                        if (cg_path_get_user_slice(c, &t) >= 0) {
+                                x = strjoina("OBJECT_SYSTEMD_USER_SLICE=", t);
+                                free(t);
+                                IOVEC_SET_STRING(iovec[n++], x);
+                        }
+
                         free(c);
                 }
         }
         assert(n <= m);
 
         if (tv) {
-                sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv));
+                sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=" USEC_FMT, timeval_load(tv));
                 IOVEC_SET_STRING(iovec[n++], source_time);
         }
 
@@ -872,7 +1042,7 @@ void server_driver_message(Server *s, sd_id128_t message_id, const char *format,
         assert_cc(6 == LOG_INFO);
         IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
 
-        if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
+        if (!sd_id128_is_null(message_id)) {
                 snprintf(mid, sizeof(mid), LOG_MESSAGE_ID(message_id));
                 IOVEC_SET_STRING(iovec[n++], mid);
         }
@@ -974,83 +1144,6 @@ finish:
         dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, priority, object_pid);
 }
 
-
-static int system_journal_open(Server *s, bool flush_requested) {
-        const char *fn;
-        int r = 0;
-
-        if (!s->system_journal &&
-            (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
-            (flush_requested
-             || access("/run/systemd/journal/flushed", F_OK) >= 0)) {
-
-                /* If in auto mode: first try to create the machine
-                 * path, but not the prefix.
-                 *
-                 * If in persistent mode: create /var/log/journal and
-                 * the machine path */
-
-                if (s->storage == STORAGE_PERSISTENT)
-                        (void) mkdir_p("/var/log/journal/", 0755);
-
-                fn = strjoina("/var/log/journal/", SERVER_MACHINE_ID(s));
-                (void) mkdir(fn, 0755);
-
-                fn = strjoina(fn, "/system.journal");
-                r = open_journal(s, true, fn, O_RDWR|O_CREAT, s->seal, &s->system_metrics, &s->system_journal);
-                if (r >= 0) {
-                        server_add_acls(s->system_journal, 0);
-                        (void) determine_space_for(s, &s->system_metrics, "/var/log/journal/", "System journal", true, true, NULL, NULL);
-                } else if (r < 0) {
-                        if (r != -ENOENT && r != -EROFS)
-                                log_warning_errno(r, "Failed to open system journal: %m");
-
-                        r = 0;
-                }
-        }
-
-        if (!s->runtime_journal &&
-            (s->storage != STORAGE_NONE)) {
-
-                fn = strjoina("/run/log/journal/", SERVER_MACHINE_ID(s), "/system.journal");
-
-                if (s->system_journal) {
-
-                        /* Try to open the runtime journal, but only
-                         * if it already exists, so that we can flush
-                         * it into the system journal */
-
-                        r = open_journal(s, false, fn, O_RDWR, false, &s->runtime_metrics, &s->runtime_journal);
-                        if (r < 0) {
-                                if (r != -ENOENT)
-                                        log_warning_errno(r, "Failed to open runtime journal: %m");
-
-                                r = 0;
-                        }
-
-                } else {
-
-                        /* OK, we really need the runtime journal, so create
-                         * it if necessary. */
-
-                        (void) mkdir("/run/log", 0755);
-                        (void) mkdir("/run/log/journal", 0755);
-                        (void) mkdir_parents(fn, 0750);
-
-                        r = open_journal(s, true, fn, O_RDWR|O_CREAT, false, &s->runtime_metrics, &s->runtime_journal);
-                        if (r < 0)
-                                return log_error_errno(r, "Failed to open runtime journal: %m");
-                }
-
-                if (s->runtime_journal) {
-                        server_add_acls(s->runtime_journal, 0);
-                        (void) determine_space_for(s, &s->runtime_metrics, "/run/log/journal/", "Runtime journal", true, true, NULL, NULL);
-                }
-        }
-
-        return r;
-}
-
 int server_flush_to_var(Server *s) {
         sd_id128_t machine;
         sd_journal *j = NULL;
@@ -1279,7 +1372,7 @@ static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *
 
         log_info("Received request to flush runtime journal from PID " PID_FMT, si->ssi_pid);
 
-        server_flush_to_var(s);
+        (void) server_flush_to_var(s);
         server_sync(s);
         server_vacuum(s, false, false);
 
@@ -1400,7 +1493,7 @@ static int server_parse_proc_cmdline(Server *s) {
         }
 
         p = line;
-        for(;;) {
+        for (;;) {
                 _cleanup_free_ char *word = NULL;
 
                 r = extract_first_word(&p, &word, NULL, 0);
@@ -1445,7 +1538,7 @@ static int server_parse_proc_cmdline(Server *s) {
 static int server_parse_config_file(Server *s) {
         assert(s);
 
-        return config_parse_many(PKGSYSCONFDIR "/journald.conf",
+        return config_parse_many_nulstr(PKGSYSCONFDIR "/journald.conf",
                                  CONF_PATHS_NULSTR("systemd/journald.conf.d"),
                                  "Journal\0",
                                  config_item_perf_lookup, journald_gperf_lookup,
@@ -1558,7 +1651,7 @@ static int dispatch_notify_event(sd_event_source *es, int fd, uint32_t revents,
         assert(s->notify_fd == fd);
 
         /* The $NOTIFY_SOCKET is writable again, now send exactly one
-         * message on it. Either it's the wtachdog event, the initial
+         * message on it. Either it's the watchdog event, the initial
          * READY=1 event or an stdout stream event. If there's nothing
          * to write anymore, turn our event source off. The next time
          * there's something to send it will be turned on again. */
@@ -1602,7 +1695,7 @@ static int dispatch_notify_event(sd_event_source *es, int fd, uint32_t revents,
                 /* Dispatch one stream notification event */
                 stdout_stream_send_notify(s->stdout_streams_notify_queue);
 
-        /* Leave us enabled if there's still more to to do. */
+        /* Leave us enabled if there's still more to do. */
         if (s->send_watchdog || s->stdout_streams_notify_queue)
                 return 0;
 
@@ -1655,7 +1748,7 @@ static int server_connect_notify(Server *s) {
           it. Specifically: given that PID 1 might block on
           dbus-daemon during IPC, and dbus-daemon is logging to us,
           and might hence block on us, we might end up in a deadlock
-          if we block on sending PID 1 notification messages -- by
+          if we block on sending PID 1 notification messages  by
           generating a full blocking circle. To avoid this, let's
           create a non-blocking socket, and connect it to the
           notification socket, and then wait for POLLOUT before we
@@ -1691,7 +1784,7 @@ static int server_connect_notify(Server *s) {
         if (sa.un.sun_path[0] == '@')
                 sa.un.sun_path[0] = 0;
 
-        r = connect(s->notify_fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(e));
+        r = connect(s->notify_fd, &sa.sa, SOCKADDR_UN_LEN(sa.un));
         if (r < 0)
                 return log_error_errno(errno, "Failed to connect to notify socket: %m");
 
@@ -1765,6 +1858,10 @@ int server_init(Server *s) {
         if (!s->mmap)
                 return log_oom();
 
+        s->deferred_closes = set_new(NULL);
+        if (!s->deferred_closes)
+                return log_oom();
+
         r = sd_event_default(&s->event);
         if (r < 0)
                 return log_error_errno(r, "Failed to create event loop: %m");
@@ -1918,6 +2015,11 @@ void server_done(Server *s) {
         JournalFile *f;
         assert(s);
 
+        if (s->deferred_closes) {
+                journal_file_close_set(s->deferred_closes);
+                set_free(s->deferred_closes);
+        }
+
         while (s->stdout_streams)
                 stdout_stream_free(s->stdout_streams);