]> git.ipfire.org Git - thirdparty/systemd.git/blobdiff - src/journal/journald-server.c
journald: make reading /dev/kmsg optional (#6362)
[thirdparty/systemd.git] / src / journal / journald-server.c
index f01cf1d93701562897f98b3e0d0a9db3e468e55d..6eb19e88669c1e4105626e37cf1f5c519c46ad4a 100644 (file)
@@ -40,7 +40,7 @@
 #include "extract-word.h"
 #include "fd-util.h"
 #include "fileio.h"
-#include "formats-util.h"
+#include "format-util.h"
 #include "fs-util.h"
 #include "hashmap.h"
 #include "hostname-util.h"
@@ -71,6 +71,7 @@
 #include "string-table.h"
 #include "string-util.h"
 #include "user-util.h"
+#include "syslog-util.h"
 
 #define USER_JOURNALS_MAX 1024
 
 /* The period to insert between posting changes for coalescing */
 #define POST_CHANGE_TIMER_INTERVAL_USEC (250*USEC_PER_MSEC)
 
-static int determine_space_for(
-                Server *s,
-                JournalMetrics *metrics,
-                const char *path,
-                const char *name,
-                bool verbose,
-                bool patch_min_use,
-                uint64_t *available,
-                uint64_t *limit) {
-
-        uint64_t sum = 0, ss_avail, avail;
+static int determine_path_usage(Server *s, const char *path, uint64_t *ret_used, uint64_t *ret_free) {
         _cleanup_closedir_ DIR *d = NULL;
         struct dirent *de;
         struct statvfs ss;
-        const char *p;
-        usec_t ts;
-
-        assert(s);
-        assert(metrics);
-        assert(path);
-        assert(name);
-
-        ts = now(CLOCK_MONOTONIC);
-
-        if (!verbose && s->cached_space_timestamp + RECHECK_SPACE_USEC > ts) {
-
-                if (available)
-                        *available = s->cached_space_available;
-                if (limit)
-                        *limit = s->cached_space_limit;
 
-                return 0;
-        }
+        assert(ret_used);
+        assert(ret_free);
 
-        p = strjoina(path, SERVER_MACHINE_ID(s));
-        d = opendir(p);
+        d = opendir(path);
         if (!d)
-                return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR, errno, "Failed to open %s: %m", p);
+                return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR,
+                                      errno, "Failed to open %s: %m", path);
 
         if (fstatvfs(dirfd(d), &ss) < 0)
-                return log_error_errno(errno, "Failed to fstatvfs(%s): %m", p);
+                return log_error_errno(errno, "Failed to fstatvfs(%s): %m", path);
 
+        *ret_free = ss.f_bsize * ss.f_bavail;
+        *ret_used = 0;
         FOREACH_DIRENT_ALL(de, d, break) {
                 struct stat st;
 
@@ -136,88 +113,125 @@ static int determine_space_for(
                         continue;
 
                 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) {
-                        log_debug_errno(errno, "Failed to stat %s/%s, ignoring: %m", p, de->d_name);
+                        log_debug_errno(errno, "Failed to stat %s/%s, ignoring: %m", path, de->d_name);
                         continue;
                 }
 
                 if (!S_ISREG(st.st_mode))
                         continue;
 
-                sum += (uint64_t) st.st_blocks * 512UL;
+                *ret_used += (uint64_t) st.st_blocks * 512UL;
         }
 
-        /* If requested, then let's bump the min_use limit to the
-         * current usage on disk. We do this when starting up and
-         * first opening the journal files. This way sudden spikes in
-         * disk usage will not cause journald to vacuum files without
-         * bounds. Note that this means that only a restart of
-         * journald will make it reset this value. */
-
-        if (patch_min_use)
-                metrics->min_use = MAX(metrics->min_use, sum);
-
-        ss_avail = ss.f_bsize * ss.f_bavail;
-        avail = LESS_BY(ss_avail, metrics->keep_free);
-
-        s->cached_space_limit = MIN(MAX(sum + avail, metrics->min_use), metrics->max_use);
-        s->cached_space_available = LESS_BY(s->cached_space_limit, sum);
-        s->cached_space_timestamp = ts;
-
-        if (verbose) {
-                char    fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
-                        fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX], fb6[FORMAT_BYTES_MAX];
-                format_bytes(fb1, sizeof(fb1), sum);
-                format_bytes(fb2, sizeof(fb2), metrics->max_use);
-                format_bytes(fb3, sizeof(fb3), metrics->keep_free);
-                format_bytes(fb4, sizeof(fb4), ss_avail);
-                format_bytes(fb5, sizeof(fb5), s->cached_space_limit);
-                format_bytes(fb6, sizeof(fb6), s->cached_space_available);
-
-                server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
-                                      LOG_MESSAGE("%s (%s) is %s, max %s, %s free.",
-                                                  name, path, fb1, fb5, fb6),
-                                      "JOURNAL_NAME=%s", name,
-                                      "JOURNAL_PATH=%s", path,
-                                      "CURRENT_USE=%"PRIu64, sum,
-                                      "CURRENT_USE_PRETTY=%s", fb1,
-                                      "MAX_USE=%"PRIu64, metrics->max_use,
-                                      "MAX_USE_PRETTY=%s", fb2,
-                                      "DISK_KEEP_FREE=%"PRIu64, metrics->keep_free,
-                                      "DISK_KEEP_FREE_PRETTY=%s", fb3,
-                                      "DISK_AVAILABLE=%"PRIu64, ss_avail,
-                                      "DISK_AVAILABLE_PRETTY=%s", fb4,
-                                      "LIMIT=%"PRIu64, s->cached_space_limit,
-                                      "LIMIT_PRETTY=%s", fb5,
-                                      "AVAILABLE=%"PRIu64, s->cached_space_available,
-                                      "AVAILABLE_PRETTY=%s", fb6,
-                                      NULL);
-        }
+        return 0;
+}
+
+static void cache_space_invalidate(JournalStorageSpace *space) {
+        memset(space, 0, sizeof(*space));
+}
+
+static int cache_space_refresh(Server *s, JournalStorage *storage) {
+        JournalStorageSpace *space;
+        JournalMetrics *metrics;
+        uint64_t vfs_used, vfs_avail, avail;
+        usec_t ts;
+        int r;
+
+        assert(s);
+
+        metrics = &storage->metrics;
+        space = &storage->space;
+
+        ts = now(CLOCK_MONOTONIC);
+
+        if (space->timestamp != 0 && space->timestamp + RECHECK_SPACE_USEC > ts)
+                return 0;
 
-        if (available)
-                *available = s->cached_space_available;
-        if (limit)
-                *limit = s->cached_space_limit;
+        r = determine_path_usage(s, storage->path, &vfs_used, &vfs_avail);
+        if (r < 0)
+                return r;
+
+        space->vfs_used = vfs_used;
+        space->vfs_available = vfs_avail;
 
+        avail = LESS_BY(vfs_avail, metrics->keep_free);
+
+        space->limit = MIN(MAX(vfs_used + avail, metrics->min_use), metrics->max_use);
+        space->available = LESS_BY(space->limit, vfs_used);
+        space->timestamp = ts;
         return 1;
 }
 
-static int determine_space(Server *s, bool verbose, bool patch_min_use, uint64_t *available, uint64_t *limit) {
-        JournalMetrics *metrics;
-        const char *path, *name;
+static void patch_min_use(JournalStorage *storage) {
+        assert(storage);
+
+        /* Let's bump the min_use limit to the current usage on disk. We do
+         * this when starting up and first opening the journal files. This way
+         * sudden spikes in disk usage will not cause journald to vacuum files
+         * without bounds. Note that this means that only a restart of journald
+         * will make it reset this value. */
+
+        storage->metrics.min_use = MAX(storage->metrics.min_use, storage->space.vfs_used);
+}
+
+
+static int determine_space(Server *s, uint64_t *available, uint64_t *limit) {
+        JournalStorage *js;
+        int r;
 
         assert(s);
 
-        if (s->system_journal) {
-                path = "/var/log/journal/";
-                metrics = &s->system_metrics;
-                name = "System journal";
-        } else {
-                path = "/run/log/journal/";
-                metrics = &s->runtime_metrics;
-                name = "Runtime journal";
+        js = s->system_journal ? &s->system_storage : &s->runtime_storage;
+
+        r = cache_space_refresh(s, js);
+        if (r >= 0) {
+                if (available)
+                        *available = js->space.available;
+                if (limit)
+                        *limit = js->space.limit;
         }
+        return r;
+}
+
+void server_space_usage_message(Server *s, JournalStorage *storage) {
+        char fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
+             fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX], fb6[FORMAT_BYTES_MAX];
+        JournalMetrics *metrics;
+
+        assert(s);
+
+        if (!storage)
+                storage = s->system_journal ? &s->system_storage : &s->runtime_storage;
+
+        if (cache_space_refresh(s, storage) < 0)
+                return;
 
-        return determine_space_for(s, metrics, path, name, verbose, patch_min_use, available, limit);
+        metrics = &storage->metrics;
+        format_bytes(fb1, sizeof(fb1), storage->space.vfs_used);
+        format_bytes(fb2, sizeof(fb2), metrics->max_use);
+        format_bytes(fb3, sizeof(fb3), metrics->keep_free);
+        format_bytes(fb4, sizeof(fb4), storage->space.vfs_available);
+        format_bytes(fb5, sizeof(fb5), storage->space.limit);
+        format_bytes(fb6, sizeof(fb6), storage->space.available);
+
+        server_driver_message(s, "MESSAGE_ID=" SD_MESSAGE_JOURNAL_USAGE_STR,
+                              LOG_MESSAGE("%s (%s) is %s, max %s, %s free.",
+                                          storage->name, storage->path, fb1, fb5, fb6),
+                              "JOURNAL_NAME=%s", storage->name,
+                              "JOURNAL_PATH=%s", storage->path,
+                              "CURRENT_USE=%"PRIu64, storage->space.vfs_used,
+                              "CURRENT_USE_PRETTY=%s", fb1,
+                              "MAX_USE=%"PRIu64, metrics->max_use,
+                              "MAX_USE_PRETTY=%s", fb2,
+                              "DISK_KEEP_FREE=%"PRIu64, metrics->keep_free,
+                              "DISK_KEEP_FREE_PRETTY=%s", fb3,
+                              "DISK_AVAILABLE=%"PRIu64, storage->space.vfs_available,
+                              "DISK_AVAILABLE_PRETTY=%s", fb4,
+                              "LIMIT=%"PRIu64, storage->space.limit,
+                              "LIMIT_PRETTY=%s", fb5,
+                              "AVAILABLE=%"PRIu64, storage->space.available,
+                              "AVAILABLE_PRETTY=%s", fb6,
+                              NULL);
 }
 
 static void server_add_acls(JournalFile *f, uid_t uid) {
@@ -269,17 +283,16 @@ static int open_journal(
 }
 
 static bool flushed_flag_is_set(void) {
-        return (access("/run/systemd/journal/flushed", F_OK) >= 0);
+        return access("/run/systemd/journal/flushed", F_OK) >= 0;
 }
 
 static int system_journal_open(Server *s, bool flush_requested) {
-        bool flushed = false;
         const char *fn;
         int r = 0;
 
         if (!s->system_journal &&
-            (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
-            (flush_requested || (flushed = flushed_flag_is_set()))) {
+            IN_SET(s->storage, STORAGE_PERSISTENT, STORAGE_AUTO) &&
+            (flush_requested || flushed_flag_is_set())) {
 
                 /* If in auto mode: first try to create the machine
                  * path, but not the prefix.
@@ -290,14 +303,14 @@ static int system_journal_open(Server *s, bool flush_requested) {
                 if (s->storage == STORAGE_PERSISTENT)
                         (void) mkdir_p("/var/log/journal/", 0755);
 
-                fn = strjoina("/var/log/journal/", SERVER_MACHINE_ID(s));
-                (void) mkdir(fn, 0755);
+                (void) mkdir(s->system_storage.path, 0755);
 
-                fn = strjoina(fn, "/system.journal");
-                r = open_journal(s, true, fn, O_RDWR|O_CREAT, s->seal, &s->system_metrics, &s->system_journal);
+                fn = strjoina(s->system_storage.path, "/system.journal");
+                r = open_journal(s, true, fn, O_RDWR|O_CREAT, s->seal, &s->system_storage.metrics, &s->system_journal);
                 if (r >= 0) {
                         server_add_acls(s->system_journal, 0);
-                        (void) determine_space_for(s, &s->system_metrics, "/var/log/journal/", "System journal", true, true, NULL, NULL);
+                        (void) cache_space_refresh(s, &s->system_storage);
+                        patch_min_use(&s->system_storage);
                 } else if (r < 0) {
                         if (r != -ENOENT && r != -EROFS)
                                 log_warning_errno(r, "Failed to open system journal: %m");
@@ -312,14 +325,14 @@ static int system_journal_open(Server *s, bool flush_requested) {
                  * Perform an implicit flush to var, leaving the runtime
                  * journal closed, now that the system journal is back.
                  */
-                if (s->runtime_journal && flushed)
-                        (void) server_flush_to_var(s);
+                if (!flush_requested)
+                        (void) server_flush_to_var(s, true);
         }
 
         if (!s->runtime_journal &&
             (s->storage != STORAGE_NONE)) {
 
-                fn = strjoina("/run/log/journal/", SERVER_MACHINE_ID(s), "/system.journal");
+                fn = strjoina(s->runtime_storage.path, "/system.journal");
 
                 if (s->system_journal) {
 
@@ -327,7 +340,7 @@ static int system_journal_open(Server *s, bool flush_requested) {
                          * if it already exists, so that we can flush
                          * it into the system journal */
 
-                        r = open_journal(s, false, fn, O_RDWR, false, &s->runtime_metrics, &s->runtime_journal);
+                        r = open_journal(s, false, fn, O_RDWR, false, &s->runtime_storage.metrics, &s->runtime_journal);
                         if (r < 0) {
                                 if (r != -ENOENT)
                                         log_warning_errno(r, "Failed to open runtime journal: %m");
@@ -344,14 +357,15 @@ static int system_journal_open(Server *s, bool flush_requested) {
                         (void) mkdir("/run/log/journal", 0755);
                         (void) mkdir_parents(fn, 0750);
 
-                        r = open_journal(s, true, fn, O_RDWR|O_CREAT, false, &s->runtime_metrics, &s->runtime_journal);
+                        r = open_journal(s, true, fn, O_RDWR|O_CREAT, false, &s->runtime_storage.metrics, &s->runtime_journal);
                         if (r < 0)
                                 return log_error_errno(r, "Failed to open runtime journal: %m");
                 }
 
                 if (s->runtime_journal) {
                         server_add_acls(s->runtime_journal, 0);
-                        (void) determine_space_for(s, &s->runtime_metrics, "/run/log/journal/", "Runtime journal", true, true, NULL, NULL);
+                        (void) cache_space_refresh(s, &s->runtime_storage);
+                        patch_min_use(&s->runtime_storage);
                 }
         }
 
@@ -407,7 +421,7 @@ static JournalFile* find_journal(Server *s, uid_t uid) {
                 (void) journal_file_close(f);
         }
 
-        r = open_journal(s, true, p, O_RDWR|O_CREAT, s->seal, &s->system_metrics, &f);
+        r = open_journal(s, true, p, O_RDWR|O_CREAT, s->seal, &s->system_storage.metrics, &f);
         if (r < 0)
                 return s->system_journal;
 
@@ -501,50 +515,38 @@ void server_sync(Server *s) {
         s->sync_scheduled = false;
 }
 
-static void do_vacuum(
-                Server *s,
-                JournalFile *f,
-                JournalMetrics *metrics,
-                const char *path,
-                const char *name,
-                bool verbose,
-                bool patch_min_use) {
+static void do_vacuum(Server *s, JournalStorage *storage, bool verbose) {
 
-        const char *p;
-        uint64_t limit;
         int r;
 
         assert(s);
-        assert(metrics);
-        assert(path);
-        assert(name);
+        assert(storage);
 
-        if (!f)
-                return;
-
-        p = strjoina(path, SERVER_MACHINE_ID(s));
+        (void) cache_space_refresh(s, storage);
 
-        limit = metrics->max_use;
-        (void) determine_space_for(s, metrics, path, name, verbose, patch_min_use, NULL, &limit);
+        if (verbose)
+                server_space_usage_message(s, storage);
 
-        r = journal_directory_vacuum(p, limit, metrics->n_max_files, s->max_retention_usec, &s->oldest_file_usec,  verbose);
+        r = journal_directory_vacuum(storage->path, storage->space.limit,
+                                     storage->metrics.n_max_files, s->max_retention_usec,
+                                     &s->oldest_file_usec, verbose);
         if (r < 0 && r != -ENOENT)
-                log_warning_errno(r, "Failed to vacuum %s, ignoring: %m", p);
+                log_warning_errno(r, "Failed to vacuum %s, ignoring: %m", storage->path);
+
+        cache_space_invalidate(&storage->space);
 }
 
-int server_vacuum(Server *s, bool verbose, bool patch_min_use) {
+int server_vacuum(Server *s, bool verbose) {
         assert(s);
 
         log_debug("Vacuuming...");
 
         s->oldest_file_usec = 0;
 
-        do_vacuum(s, s->system_journal, &s->system_metrics, "/var/log/journal/", "System journal", verbose, patch_min_use);
-        do_vacuum(s, s->runtime_journal, &s->runtime_metrics, "/run/log/journal/", "Runtime journal", verbose, patch_min_use);
-
-        s->cached_space_limit = 0;
-        s->cached_space_available = 0;
-        s->cached_space_timestamp = 0;
+        if (s->system_journal)
+                do_vacuum(s, &s->system_storage, verbose);
+        if (s->runtime_journal)
+                do_vacuum(s, &s->runtime_storage, verbose);
 
         return 0;
 }
@@ -595,54 +597,88 @@ static void server_cache_hostname(Server *s) {
 
 static bool shall_try_append_again(JournalFile *f, int r) {
         switch(r) {
+
         case -E2BIG:           /* Hit configured limit          */
         case -EFBIG:           /* Hit fs limit                  */
         case -EDQUOT:          /* Quota limit hit               */
         case -ENOSPC:          /* Disk full                     */
                 log_debug("%s: Allocation limit reached, rotating.", f->path);
                 return true;
+
         case -EIO:             /* I/O error of some kind (mmap) */
                 log_warning("%s: IO error, rotating.", f->path);
                 return true;
+
         case -EHOSTDOWN:       /* Other machine                 */
                 log_info("%s: Journal file from other machine, rotating.", f->path);
                 return true;
+
         case -EBUSY:           /* Unclean shutdown              */
                 log_info("%s: Unclean shutdown, rotating.", f->path);
                 return true;
+
         case -EPROTONOSUPPORT: /* Unsupported feature           */
                 log_info("%s: Unsupported feature, rotating.", f->path);
                 return true;
+
         case -EBADMSG:         /* Corrupted                     */
         case -ENODATA:         /* Truncated                     */
         case -ESHUTDOWN:       /* Already archived              */
                 log_warning("%s: Journal file corrupted, rotating.", f->path);
                 return true;
+
         case -EIDRM:           /* Journal file has been deleted */
                 log_warning("%s: Journal file has been deleted, rotating.", f->path);
                 return true;
+
+        case -ETXTBSY:         /* Journal file is from the future */
+                log_warning("%s: Journal file is from the future, rotating.", f->path);
+                return true;
+
         default:
                 return false;
         }
 }
 
 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
+        bool vacuumed = false, rotate = false;
+        struct dual_timestamp ts;
         JournalFile *f;
-        bool vacuumed = false;
         int r;
 
         assert(s);
         assert(iovec);
         assert(n > 0);
 
-        f = find_journal(s, uid);
-        if (!f)
-                return;
+        /* Get the closest, linearized time we have for this log event from the event loop. (Note that we do not use
+         * the source time, and not even the time the event was originally seen, but instead simply the time we started
+         * processing it, as we want strictly linear ordering in what we write out.) */
+        assert_se(sd_event_now(s->event, CLOCK_REALTIME, &ts.realtime) >= 0);
+        assert_se(sd_event_now(s->event, CLOCK_MONOTONIC, &ts.monotonic) >= 0);
 
-        if (journal_file_rotate_suggested(f, s->max_file_usec)) {
-                log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
+        if (ts.realtime < s->last_realtime_clock) {
+                /* When the time jumps backwards, let's immediately rotate. Of course, this should not happen during
+                 * regular operation. However, when it does happen, then we should make sure that we start fresh files
+                 * to ensure that the entries in the journal files are strictly ordered by time, in order to ensure
+                 * bisection works correctly. */
+
+                log_debug("Time jumped backwards, rotating.");
+                rotate = true;
+        } else {
+
+                f = find_journal(s, uid);
+                if (!f)
+                        return;
+
+                if (journal_file_rotate_suggested(f, s->max_file_usec)) {
+                        log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
+                        rotate = true;
+                }
+        }
+
+        if (rotate) {
                 server_rotate(s);
-                server_vacuum(s, false, false);
+                server_vacuum(s, false);
                 vacuumed = true;
 
                 f = find_journal(s, uid);
@@ -650,7 +686,9 @@ static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned
                         return;
         }
 
-        r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
+        s->last_realtime_clock = ts.realtime;
+
+        r = journal_file_append_entry(f, &ts, iovec, n, &s->seqnum, NULL, NULL);
         if (r >= 0) {
                 server_schedule_sync(s, priority);
                 return;
@@ -662,14 +700,14 @@ static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned
         }
 
         server_rotate(s);
-        server_vacuum(s, false, false);
+        server_vacuum(s, false);
 
         f = find_journal(s, uid);
         if (!f)
                 return;
 
         log_debug("Retrying write.");
-        r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
+        r = journal_file_append_entry(f, &ts, iovec, n, &s->seqnum, NULL, NULL);
         if (r < 0)
                 log_error_errno(r, "Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
         else
@@ -692,7 +730,7 @@ static int get_invocation_id(const char *cgroup_root, const char *slice, const c
         if (!escaped)
                 return -ENOMEM;
 
-        p = strjoin(cgroup_root, "/", slice_path, "/", escaped, NULL);
+        p = strjoin(cgroup_root, "/", slice_path, "/", escaped);
         if (!p)
                 return -ENOMEM;
 
@@ -722,7 +760,8 @@ static void dispatch_message_real(
                 const char *label, size_t label_len,
                 const char *unit_id,
                 int priority,
-                pid_t object_pid) {
+                pid_t object_pid,
+                char *cgroup) {
 
         char    pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
                 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
@@ -808,7 +847,12 @@ static void dispatch_message_real(
                 }
 #endif
 
-                r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &c);
+                r = 0;
+                if (cgroup)
+                        c = cgroup;
+                else
+                        r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &c);
+
                 if (r >= 0) {
                         _cleanup_free_ char *raw_unit = NULL, *raw_slice = NULL;
                         char *session = NULL;
@@ -866,14 +910,15 @@ static void dispatch_message_real(
                                 }
                         }
 
-                        free(c);
+                        if (!cgroup)
+                                free(c);
                 } else if (unit_id) {
                         x = strjoina("_SYSTEMD_UNIT=", unit_id);
                         IOVEC_SET_STRING(iovec[n++], x);
                 }
 
 #ifdef HAVE_SELINUX
-                if (mac_selinux_have()) {
+                if (mac_selinux_use()) {
                         if (label) {
                                 x = alloca(strlen("_SELINUX_CONTEXT=") + label_len + 1);
 
@@ -1023,8 +1068,7 @@ static void dispatch_message_real(
         write_to_journal(s, journal_uid, iovec, n, priority);
 }
 
-void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
-        char mid[11 + 32 + 1];
+void server_driver_message(Server *s, const char *message_id, const char *format, ...) {
         struct iovec iovec[N_IOVEC_META_FIELDS + 5 + N_IOVEC_PAYLOAD_FIELDS];
         unsigned n = 0, m;
         int r;
@@ -1042,11 +1086,8 @@ void server_driver_message(Server *s, sd_id128_t message_id, const char *format,
         assert_cc(6 == LOG_INFO);
         IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
 
-        if (!sd_id128_is_null(message_id)) {
-                snprintf(mid, sizeof(mid), LOG_MESSAGE_ID(message_id));
-                IOVEC_SET_STRING(iovec[n++], mid);
-        }
-
+        if (message_id)
+                IOVEC_SET_STRING(iovec[n++], message_id);
         m = n;
 
         va_start(ap, format);
@@ -1059,7 +1100,7 @@ void server_driver_message(Server *s, sd_id128_t message_id, const char *format,
         ucred.gid = getgid();
 
         if (r >= 0)
-                dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
+                dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0, NULL);
 
         while (m < n)
                 free(iovec[m++].iov_base);
@@ -1073,7 +1114,7 @@ void server_driver_message(Server *s, sd_id128_t message_id, const char *format,
                 n = 3;
                 IOVEC_SET_STRING(iovec[n++], "PRIORITY=4");
                 IOVEC_SET_STRING(iovec[n++], buf);
-                dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
+                dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0, NULL);
         }
 }
 
@@ -1090,7 +1131,7 @@ void server_dispatch_message(
         int rl, r;
         _cleanup_free_ char *path = NULL;
         uint64_t available = 0;
-        char *c;
+        char *c = NULL;
 
         assert(s);
         assert(iovec || n == 0);
@@ -1129,22 +1170,25 @@ void server_dispatch_message(
                 }
         }
 
-        (void) determine_space(s, false, false, &available, NULL);
+        (void) determine_space(s, &available, NULL);
         rl = journal_rate_limit_test(s->rate_limit, path, priority & LOG_PRIMASK, available);
         if (rl == 0)
                 return;
 
         /* Write a suppression message if we suppressed something */
         if (rl > 1)
-                server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
+                server_driver_message(s, "MESSAGE_ID=" SD_MESSAGE_JOURNAL_DROPPED_STR,
                                       LOG_MESSAGE("Suppressed %u messages from %s", rl - 1, path),
                                       NULL);
 
 finish:
-        dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, priority, object_pid);
+        /* restore cgroup path for logging */
+        if (c)
+                *c = '/';
+        dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, priority, object_pid, path);
 }
 
-int server_flush_to_var(Server *s) {
+int server_flush_to_var(Server *s, bool require_flag_file) {
         sd_id128_t machine;
         sd_journal *j = NULL;
         char ts[FORMAT_TIMESPAN_MAX];
@@ -1154,13 +1198,15 @@ int server_flush_to_var(Server *s) {
 
         assert(s);
 
-        if (s->storage != STORAGE_AUTO &&
-            s->storage != STORAGE_PERSISTENT)
+        if (!IN_SET(s->storage, STORAGE_AUTO, STORAGE_PERSISTENT))
                 return 0;
 
         if (!s->runtime_journal)
                 return 0;
 
+        if (require_flag_file && !flushed_flag_is_set())
+                return 0;
+
         (void) system_journal_open(s, true);
 
         if (!s->system_journal)
@@ -1205,7 +1251,7 @@ int server_flush_to_var(Server *s) {
                 }
 
                 server_rotate(s);
-                server_vacuum(s, false, false);
+                server_vacuum(s, false);
 
                 if (!s->system_journal) {
                         log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
@@ -1233,7 +1279,7 @@ finish:
 
         sd_journal_close(j);
 
-        server_driver_message(s, SD_ID128_NULL,
+        server_driver_message(s, NULL,
                               LOG_MESSAGE("Time spent on flushing to /var is %s for %u entries.",
                                           format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0),
                                           n),
@@ -1372,14 +1418,15 @@ static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *
 
         log_info("Received request to flush runtime journal from PID " PID_FMT, si->ssi_pid);
 
-        (void) server_flush_to_var(s);
+        (void) server_flush_to_var(s, false);
         server_sync(s);
-        server_vacuum(s, false, false);
+        server_vacuum(s, false);
 
         r = touch("/run/systemd/journal/flushed");
         if (r < 0)
                 log_warning_errno(r, "Failed to touch /run/systemd/journal/flushed, ignoring: %m");
 
+        server_space_usage_message(s, NULL);
         return 0;
 }
 
@@ -1391,7 +1438,12 @@ static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *
 
         log_info("Received request to rotate journal from PID " PID_FMT, si->ssi_pid);
         server_rotate(s);
-        server_vacuum(s, true, true);
+        server_vacuum(s, true);
+
+        if (s->system_journal)
+                patch_min_use(&s->system_storage);
+        if (s->runtime_journal)
+                patch_min_use(&s->runtime_storage);
 
         /* Let clients know when the most recent rotation happened. */
         r = write_timestamp_file_atomic("/run/systemd/journal/rotated", now(CLOCK_MONOTONIC));
@@ -1435,7 +1487,7 @@ static int setup_signals(Server *s) {
 
         assert(s);
 
-        assert(sigprocmask_many(SIG_SETMASK, NULL, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, SIGRTMIN+1, -1) >= 0);
+        assert_se(sigprocmask_many(SIG_SETMASK, NULL, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, SIGRTMIN+1, -1) >= 0);
 
         r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
         if (r < 0)
@@ -1481,55 +1533,101 @@ static int setup_signals(Server *s) {
         return 0;
 }
 
-static int server_parse_proc_cmdline(Server *s) {
-        _cleanup_free_ char *line = NULL;
-        const char *p;
+static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
+        Server *s = data;
         int r;
 
-        r = proc_cmdline(&line);
-        if (r < 0) {
-                log_warning_errno(r, "Failed to read /proc/cmdline, ignoring: %m");
-                return 0;
-        }
+        assert(s);
 
-        p = line;
-        for (;;) {
-                _cleanup_free_ char *word = NULL;
+        if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_syslog")) {
 
-                r = extract_first_word(&p, &word, NULL, 0);
+                r = value ? parse_boolean(value) : true;
                 if (r < 0)
-                        return log_error_errno(r, "Failed to parse journald syntax \"%s\": %m", line);
+                        log_warning("Failed to parse forward to syslog switch \"%s\". Ignoring.", value);
+                else
+                        s->forward_to_syslog = r;
 
-                if (r == 0)
-                        break;
+        } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_kmsg")) {
 
-                if (startswith(word, "systemd.journald.forward_to_syslog=")) {
-                        r = parse_boolean(word + 35);
-                        if (r < 0)
-                                log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
-                        else
-                                s->forward_to_syslog = r;
-                } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
-                        r = parse_boolean(word + 33);
-                        if (r < 0)
-                                log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
-                        else
-                                s->forward_to_kmsg = r;
-                } else if (startswith(word, "systemd.journald.forward_to_console=")) {
-                        r = parse_boolean(word + 36);
-                        if (r < 0)
-                                log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
-                        else
-                                s->forward_to_console = r;
-                } else if (startswith(word, "systemd.journald.forward_to_wall=")) {
-                        r = parse_boolean(word + 33);
-                        if (r < 0)
-                                log_warning("Failed to parse forward to wall switch %s. Ignoring.", word + 33);
-                        else
-                                s->forward_to_wall = r;
-                } else if (startswith(word, "systemd.journald"))
-                        log_warning("Invalid systemd.journald parameter. Ignoring.");
-        }
+                r = value ? parse_boolean(value) : true;
+                if (r < 0)
+                        log_warning("Failed to parse forward to kmsg switch \"%s\". Ignoring.", value);
+                else
+                        s->forward_to_kmsg = r;
+
+        } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_console")) {
+
+                r = value ? parse_boolean(value) : true;
+                if (r < 0)
+                        log_warning("Failed to parse forward to console switch \"%s\". Ignoring.", value);
+                else
+                        s->forward_to_console = r;
+
+        } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_wall")) {
+
+                r = value ? parse_boolean(value) : true;
+                if (r < 0)
+                        log_warning("Failed to parse forward to wall switch \"%s\". Ignoring.", value);
+                else
+                        s->forward_to_wall = r;
+
+        } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_console")) {
+
+                if (proc_cmdline_value_missing(key, value))
+                        return 0;
+
+                r = log_level_from_string(value);
+                if (r < 0)
+                        log_warning("Failed to parse max level console value \"%s\". Ignoring.", value);
+                else
+                        s->max_level_console = r;
+
+        } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_store")) {
+
+                if (proc_cmdline_value_missing(key, value))
+                        return 0;
+
+                r = log_level_from_string(value);
+                if (r < 0)
+                        log_warning("Failed to parse max level store value \"%s\". Ignoring.", value);
+                else
+                        s->max_level_store = r;
+
+        } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_syslog")) {
+
+                if (proc_cmdline_value_missing(key, value))
+                        return 0;
+
+                r = log_level_from_string(value);
+                if (r < 0)
+                        log_warning("Failed to parse max level syslog value \"%s\". Ignoring.", value);
+                else
+                        s->max_level_syslog = r;
+
+        } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_kmsg")) {
+
+                if (proc_cmdline_value_missing(key, value))
+                        return 0;
+
+                r = log_level_from_string(value);
+                if (r < 0)
+                        log_warning("Failed to parse max level kmsg value \"%s\". Ignoring.", value);
+                else
+                        s->max_level_kmsg = r;
+
+        } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_wall")) {
+
+                if (proc_cmdline_value_missing(key, value))
+                        return 0;
+
+                r = log_level_from_string(value);
+                if (r < 0)
+                        log_warning("Failed to parse max level wall value \"%s\". Ignoring.", value);
+                else
+                        s->max_level_wall = r;
+
+        } else if (startswith(key, "systemd.journald"))
+                log_warning("Unknown journald kernel command line option \"%s\". Ignoring.", key);
 
         /* do not warn about state here, since probably systemd already did */
         return 0;
@@ -1539,10 +1637,10 @@ static int server_parse_config_file(Server *s) {
         assert(s);
 
         return config_parse_many_nulstr(PKGSYSCONFDIR "/journald.conf",
-                                 CONF_PATHS_NULSTR("systemd/journald.conf.d"),
-                                 "Journal\0",
-                                 config_item_perf_lookup, journald_gperf_lookup,
-                                 false, s);
+                                        CONF_PATHS_NULSTR("systemd/journald.conf.d"),
+                                        "Journal\0",
+                                        config_item_perf_lookup, journald_gperf_lookup,
+                                        false, s);
 }
 
 static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
@@ -1817,6 +1915,7 @@ int server_init(Server *s) {
         s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->audit_fd = s->hostname_fd = s->notify_fd = -1;
         s->compress = true;
         s->seal = true;
+        s->read_kmsg = true;
 
         s->watchdog_usec = USEC_INFINITY;
 
@@ -1836,11 +1935,14 @@ int server_init(Server *s) {
         s->max_level_console = LOG_INFO;
         s->max_level_wall = LOG_EMERG;
 
-        journal_reset_metrics(&s->system_metrics);
-        journal_reset_metrics(&s->runtime_metrics);
+        journal_reset_metrics(&s->system_storage.metrics);
+        journal_reset_metrics(&s->runtime_storage.metrics);
 
         server_parse_config_file(s);
-        server_parse_proc_cmdline(s);
+
+        r = proc_cmdline_parse(parse_proc_cmdline_item, s, PROC_CMDLINE_STRIP_RD_PREFIX);
+        if (r < 0)
+                log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
 
         if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
                 log_debug("Setting both rate limit interval and burst from "USEC_FMT",%u to 0,0",
@@ -1950,7 +2052,7 @@ int server_init(Server *s) {
         if (r < 0)
                 return r;
 
-        /* /dev/ksmg */
+        /* /dev/kmsg */
         r = server_open_dev_kmsg(s);
         if (r < 0)
                 return r;
@@ -1990,6 +2092,14 @@ int server_init(Server *s) {
         server_cache_boot_id(s);
         server_cache_machine_id(s);
 
+        s->runtime_storage.name = "Runtime journal";
+        s->system_storage.name = "System journal";
+
+        s->runtime_storage.path = strjoin("/run/log/journal/", SERVER_MACHINE_ID(s));
+        s->system_storage.path  = strjoin("/var/log/journal/", SERVER_MACHINE_ID(s));
+        if (!s->runtime_storage.path || !s->system_storage.path)
+                return -ENOMEM;
+
         (void) server_connect_notify(s);
 
         return system_journal_open(s, false);
@@ -2068,6 +2178,8 @@ void server_done(Server *s) {
         free(s->tty_path);
         free(s->cgroup_root);
         free(s->hostname_field);
+        free(s->runtime_storage.path);
+        free(s->system_storage.path);
 
         if (s->mmap)
                 mmap_cache_unref(s->mmap);