]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
test: append to corrupted journals 28233/head
authorFrantisek Sumsal <frantisek@sumsal.cz>
Sat, 13 May 2023 15:39:35 +0000 (17:39 +0200)
committerFrantisek Sumsal <frantisek@sumsal.cz>
Thu, 6 Jul 2023 20:41:27 +0000 (22:41 +0200)
Introduce a manual test tool that creates a journal, corrupts it by
flipping bits at given offsets, and then attempts to write to the journal.
In ideal case we should handle this gracefully without any crash or
memory corruption.

src/journal/meson.build
src/journal/test-journal-append.c [new file with mode: 0644]
test/units/testsuite-04.journal-append.sh [new file with mode: 0755]

index c540a1ce45371559ea37385e9274f90e407aef78..a6837d23c54a7e578a438e6042f2be3f705ab70c 100644 (file)
@@ -111,6 +111,11 @@ tests += [
                 'sources' : files('test-journal.c'),
                 'base' : test_journal_base,
         },
+        {
+                'sources' : files('test-journal-append.c'),
+                'base' : test_journal_base,
+                'type' : 'manual',
+        },
 ]
 
 fuzzer_journald_base = {
diff --git a/src/journal/test-journal-append.c b/src/journal/test-journal-append.c
new file mode 100644 (file)
index 0000000..84fa4d5
--- /dev/null
@@ -0,0 +1,272 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include "chattr-util.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "io-util.h"
+#include "log.h"
+#include "managed-journal-file.h"
+#include "mmap-cache.h"
+#include "parse-util.h"
+#include "random-util.h"
+#include "rm-rf.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "tests.h"
+#include "tmpfile-util.h"
+
+static int journal_append_message(ManagedJournalFile *mj, const char *message) {
+        struct iovec iovec;
+        struct dual_timestamp ts;
+
+        assert(mj);
+        assert(message);
+
+        dual_timestamp_get(&ts);
+        iovec = IOVEC_MAKE_STRING(message);
+        return journal_file_append_entry(
+                                mj->file,
+                                &ts,
+                                /* boot_id= */ NULL,
+                                &iovec,
+                                /* n_iovec= */ 1,
+                                /* seqnum= */ NULL,
+                                /* seqnum_id= */ NULL,
+                                /* ret_object= */ NULL,
+                                /* ret_offset= */ NULL);
+}
+
+static int journal_corrupt_and_append(uint64_t start_offset, uint64_t step) {
+        _cleanup_(mmap_cache_unrefp) MMapCache *mmap_cache = NULL;
+        _cleanup_(rm_rf_physical_and_freep) char *tempdir = NULL;
+        _cleanup_(managed_journal_file_closep) ManagedJournalFile *mj = NULL;
+        uint64_t start, end;
+        int r;
+
+        mmap_cache = mmap_cache_new();
+        assert_se(mmap_cache);
+
+        /* managed_journal_file_open() requires a valid machine id */
+        if (sd_id128_get_machine(NULL) < 0)
+                return log_tests_skipped("No valid machine ID found");
+
+        assert_se(mkdtemp_malloc("/tmp/journal-append-XXXXXX", &tempdir) >= 0);
+        assert_se(chdir(tempdir) >= 0);
+        (void) chattr_path(tempdir, FS_NOCOW_FL, FS_NOCOW_FL, NULL);
+
+        log_debug("Opening journal %s/system.journal", tempdir);
+
+        r = managed_journal_file_open(
+                                /* fd= */ -1,
+                                "system.journal",
+                                O_RDWR|O_CREAT,
+                                JOURNAL_COMPRESS,
+                                0644,
+                                /* compress_threshold_bytes= */ UINT64_MAX,
+                                /* metrics= */ NULL,
+                                mmap_cache,
+                                /* deferred_closes= */ NULL,
+                                /* template= */ NULL,
+                                &mj);
+        if (r < 0)
+                return log_error_errno(r, "Failed to open the journal: %m");
+
+        assert_se(mj);
+        assert_se(mj->file);
+
+        /* Add a couple of initial messages */
+        for (int i = 0; i < 10; i++) {
+                _cleanup_free_ char *message = NULL;
+
+                assert_se(asprintf(&message, "MESSAGE=Initial message %d", i) >= 0);
+                r = journal_append_message(mj, message);
+                if (r < 0)
+                        return log_error_errno(r, "Failed to write to the journal: %m");
+        }
+
+        start = start_offset == UINT64_MAX ? random_u64() % mj->file->last_stat.st_size : start_offset;
+        end = (uint64_t) mj->file->last_stat.st_size;
+
+        /* Print the initial offset at which we start flipping bits, which can be
+         * later used to reproduce a potential fail */
+        log_info("Start offset: %" PRIu64 ", corrupt-step: %" PRIu64, start, step);
+        fflush(stdout);
+
+        if (start >= end)
+                return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+                                       "Start offset >= journal size, can't continue");
+
+        for (uint64_t offset = start; offset < end; offset += step) {
+                _cleanup_free_ char *message = NULL;
+                uint8_t b;
+
+                /* Flip a bit in the journal file */
+                r = pread(mj->file->fd, &b, 1, offset);
+                assert_se(r == 1);
+                b |= 0x1;
+                r = pwrite(mj->file->fd, &b, 1, offset);
+                assert_se(r == 1);
+
+                /* Close and reopen the journal to flush all caches and remap
+                 * the corrupted journal */
+                mj = managed_journal_file_close(mj);
+                r = managed_journal_file_open(
+                                        /* fd= */ -1,
+                                        "system.journal",
+                                        O_RDWR|O_CREAT,
+                                        JOURNAL_COMPRESS,
+                                        0644,
+                                        /* compress_threshold_bytes= */ UINT64_MAX,
+                                        /* metrics= */ NULL,
+                                        mmap_cache,
+                                        /* deferred_closes= */ NULL,
+                                        /* template= */ NULL,
+                                        &mj);
+                if (r < 0) {
+                        /* The corrupted journal might get rejected during reopening
+                         * if it's corrupted enough (especially its header), so
+                         * treat this as a success if it doesn't crash */
+                        log_info_errno(r, "Failed to reopen the journal: %m");
+                        break;
+                }
+
+                /* Try to write something to the (possibly corrupted) journal */
+                assert_se(asprintf(&message, "MESSAGE=Hello world %" PRIu64, offset) >= 0);
+                r = journal_append_message(mj, message);
+                if (r < 0) {
+                        /* We care only about crashes or sanitizer errors,
+                         * failed write without any crash is a success */
+                        log_info_errno(r, "Failed to write to the journal: %m");
+                        break;
+                }
+        }
+
+        return 0;
+}
+
+int main(int argc, char *argv[]) {
+        uint64_t start_offset = UINT64_MAX;
+        uint64_t iterations = 100;
+        uint64_t iteration_step = 1;
+        uint64_t corrupt_step = 31;
+        bool sequential = false, run_one = false;
+        int c, r;
+
+        test_setup_logging(LOG_DEBUG);
+
+        enum {
+                ARG_START_OFFSET = 0x1000,
+                ARG_ITERATIONS,
+                ARG_ITERATION_STEP,
+                ARG_CORRUPT_STEP,
+                ARG_SEQUENTIAL,
+                ARG_RUN_ONE,
+        };
+
+        static const struct option options[] = {
+                { "help",                no_argument,       NULL, 'h'                     },
+                { "start-offset",        required_argument, NULL, ARG_START_OFFSET        },
+                { "iterations",          required_argument, NULL, ARG_ITERATIONS          },
+                { "iteration-step",      required_argument, NULL, ARG_ITERATION_STEP      },
+                { "corrupt-step",        required_argument, NULL, ARG_CORRUPT_STEP        },
+                { "sequential",          no_argument,       NULL, ARG_SEQUENTIAL          },
+                { "run-one",             required_argument, NULL, ARG_RUN_ONE             },
+                {}
+        };
+
+        assert_se(argc >= 0);
+        assert_se(argv);
+
+        while ((c = getopt_long(argc, argv, "h", options, NULL)) >= 0)
+                switch (c) {
+
+                case 'h':
+                        printf("Syntax:\n"
+                               "  %s [OPTION...]\n"
+                               "Options:\n"
+                               "    --start-offset=OFFSET   Offset at which to start corrupting the journal\n"
+                               "                            (default: random offset is picked, unless\n"
+                               "                            --sequential is used - in that case we use 0 + iteration)\n"
+                               "    --iterations=ITER       Number of iterations to perform before exiting\n"
+                               "                            (default: 100)\n"
+                               "    --iteration-step=STEP   Iteration step (default: 1)\n"
+                               "    --corrupt-step=STEP     Corrupt every n-th byte starting from OFFSET (default: 31)\n"
+                               "    --sequential            Go through offsets sequentially instead of picking\n"
+                               "                            a random one on each iteration. If set, we go through\n"
+                               "                            offsets <0; ITER), or <OFFSET, ITER) if --start-offset=\n"
+                               "                            is set (default: false)\n"
+                               "    --run-one=OFFSET        Single shot mode for reproducing issues. Takes the same\n"
+                               "                            offset as --start-offset= and does only one iteration\n"
+                               , program_invocation_short_name);
+                        return 0;
+
+                case ARG_START_OFFSET:
+                        r = safe_atou64(optarg, &start_offset);
+                        if (r < 0)
+                                return log_error_errno(r, "Invalid starting offset: %m");
+                        break;
+
+                case ARG_ITERATIONS:
+                        r = safe_atou64(optarg, &iterations);
+                        if (r < 0)
+                                return log_error_errno(r, "Invalid value for iterations: %m");
+                        break;
+
+                case ARG_CORRUPT_STEP:
+                        r = safe_atou64(optarg, &corrupt_step);
+                        if (r < 0)
+                                return log_error_errno(r, "Invalid value for corrupt-step: %m");
+                        break;
+
+                case ARG_ITERATION_STEP:
+                        r = safe_atou64(optarg, &iteration_step);
+                        if (r < 0)
+                                return log_error_errno(r, "Invalid value for iteration-step: %m");
+                        break;
+
+                case ARG_SEQUENTIAL:
+                        sequential = true;
+                        break;
+
+                case ARG_RUN_ONE:
+                        r = safe_atou64(optarg, &start_offset);
+                        if (r < 0)
+                                return log_error_errno(r, "Invalid offset: %m");
+
+                        run_one = true;
+                        break;
+
+                case '?':
+                        return -EINVAL;
+
+                default:
+                        assert_not_reached();
+        }
+
+        if (run_one)
+                /* Reproducer mode */
+                return journal_corrupt_and_append(start_offset, corrupt_step);
+
+        for (uint64_t i = 0; i < iterations; i++) {
+                uint64_t offset = UINT64_MAX;
+
+                log_info("Iteration #%" PRIu64 ", step: %" PRIu64, i, iteration_step);
+
+                if (sequential)
+                        offset = (start_offset == UINT64_MAX ? 0 : start_offset) + i * iteration_step;
+
+                r = journal_corrupt_and_append(offset, corrupt_step);
+                if (r < 0)
+                        return EXIT_FAILURE;
+                if (r > 0)
+                        /* Reached the end of the journal file */
+                        break;
+        }
+
+        return EXIT_SUCCESS;
+}
diff --git a/test/units/testsuite-04.journal-append.sh b/test/units/testsuite-04.journal-append.sh
new file mode 100755 (executable)
index 0000000..35f9433
--- /dev/null
@@ -0,0 +1,46 @@
+#!/usr/bin/env bash
+# SPDX-License-Identifier: LGPL-2.1-or-later
+set -eux
+set -o pipefail
+
+# test-journal-append corrupts the journal file by flipping a bit at a given offset and
+# following it by a write to check if we handle appending messages to corrupted journals
+# gracefully
+
+TEST_JOURNAL_APPEND=/usr/lib/systemd/tests/unit-tests/manual/test-journal-append
+
+[[ -x "$TEST_JOURNAL_APPEND" ]]
+
+# Corrupt the first ~1024 bytes, this should be pretty quick
+"$TEST_JOURNAL_APPEND" --sequential --start-offset=0 --iterations=350 --iteration-step=3
+
+# Skip most of the test when running without acceleration, as it's excruciatingly slow
+# (this shouldn't be an issue, as it should run in nspawn as well)
+if ! [[ "$(systemd-detect-virt -v)" == "qemu" ]]; then
+    # Corrupt the beginning of every 1K block between 1K - 32K
+    for ((i = 1024; i <= (32 * 1024); i += 1024)); do
+        "$TEST_JOURNAL_APPEND" --sequential --start-offset="$i" --iterations=5 --iteration-step=13
+    done
+
+    # Corrupt the beginning of every 16K block between 32K - 128K
+    for ((i = (32 * 1024); i <= (256 * 1024); i += (16 * 1024))); do
+        "$TEST_JOURNAL_APPEND" --sequential --start-offset="$i" --iterations=5 --iteration-step=13
+    done
+
+    # Corrupt the beginning of every 128K block between 128K - 1M
+    for ((i = (128 * 1024); i <= (1 * 1024 * 1024); i += (128 * 1024))); do
+        "$TEST_JOURNAL_APPEND" --sequential --start-offset="$i" --iterations=5 --iteration-step=13
+    done
+
+    # And finally the beginning of every 1M block between 1M and 8M
+    for ((i = (1 * 1024 * 1024); i < (8 * 1024 * 1024); i += (1 * 1024 * 1024))); do
+        "$TEST_JOURNAL_APPEND" --sequential --start-offset="$i" --iterations=5 --iteration-step=13
+    done
+
+    if [[ "$(nproc)" -ge 2 ]]; then
+        # Try to corrupt random bytes throughout the journal
+        "$TEST_JOURNAL_APPEND" --iterations=25
+    fi
+else
+    "$TEST_JOURNAL_APPEND" --iterations=10
+fi