From: Vsevolod Stakhov Date: Fri, 19 Jun 2026 11:28:15 +0000 (+0100) Subject: [Feature] lua: zlib/YARA-compatible crc32 X-Git-Tag: 4.1.1~2 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=b956757ca9557786a366830cde95ecb9b6deeecd;p=thirdparty%2Frspamd.git [Feature] lua: zlib/YARA-compatible crc32 Expose standard CRC-32 (poly 0xEDB88320, init/final 0xFFFFFFFF XOR), computed via zlib crc32() so it is bit-exact with YARA hash.crc32: - rspamd_cryptobox_hash.create_specific("crc32"): streaming update + final, big-endian out so :hex() yields e.g. cbf43926 - rspamd_text:crc32([start[, len]]) and rspamd_util.crc32(input[, start, len]): return the checksum as a Lua integer over a zero-copy 1-based slice (no buffer copy) Tests cover golden values ("" -> 0, "123456789" -> 0xCBF43926), 1-based slicing, streaming-vs-one-shot, reset, and a real attachment-like buffer cross-checked against zlib/YARA crc32. --- diff --git a/src/lua/lua_cryptobox.c b/src/lua/lua_cryptobox.c index da40523581..ef0d4865b7 100644 --- a/src/lua/lua_cryptobox.c +++ b/src/lua/lua_cryptobox.c @@ -34,6 +34,7 @@ #include "unix-std.h" #include "contrib/libottery/ottery.h" #include "libutil/ref.h" +#include /* for crc32() */ #include #include @@ -50,6 +51,7 @@ enum lua_cryptobox_hash_type { LUA_CRYPTOBOX_HASH_XXHASH3, LUA_CRYPTOBOX_HASH_MUM, LUA_CRYPTOBOX_HASH_T1HA, + LUA_CRYPTOBOX_HASH_CRC32, }; struct rspamd_lua_cryptobox_hash { @@ -62,6 +64,7 @@ struct rspamd_lua_cryptobox_hash { HMAC_CTX *hmac_c; #endif rspamd_cryptobox_fast_hash_state_t *fh; + uint32_t crc; /* running zlib crc32 for LUA_CRYPTOBOX_HASH_CRC32 */ } content; unsigned char out[rspamd_cryptobox_HASHBYTES]; @@ -945,6 +948,22 @@ rspamd_lua_hash_update(struct rspamd_lua_cryptobox_hash *h, case LUA_CRYPTOBOX_HASH_T1HA: rspamd_cryptobox_fast_hash_update(h->content.fh, p, len); break; + case LUA_CRYPTOBOX_HASH_CRC32: { + const unsigned char *cp = (const unsigned char *) p; + gsize remain = len; + uLong crc = h->content.crc; + + /* zlib crc32() takes a uInt length, so feed it in chunks */ + while (remain > 0) { + uInt chunk = remain > G_MAXUINT ? G_MAXUINT : (uInt) remain; + crc = crc32(crc, cp, chunk); + cp += chunk; + remain -= chunk; + } + + h->content.crc = (uint32_t) crc; + break; + } default: g_assert_not_reached(); } @@ -979,6 +998,9 @@ lua_cryptobox_hash_dtor(struct rspamd_lua_cryptobox_hash *h) rspamd_explicit_memzero(h->content.h, sizeof(*h->content.h)); free(h->content.h); /* Allocated by posix_memalign */ } + else if (h->type == LUA_CRYPTOBOX_HASH_CRC32) { + /* Plain value, nothing to free */ + } else { rspamd_cryptobox_fast_hash_free(h->content.fh); } @@ -1147,6 +1169,15 @@ rspamd_lua_hash_create(const char *type, const char *key, gsize keylen) RSPAMD_CRYPTOBOX_T1HA, 0); h->out_len = sizeof(uint64_t); } + else if (g_ascii_strcasecmp(type, "crc32") == 0) { + /* + * Standard CRC-32 (poly 0xEDB88320, init/final 0xFFFFFFFF XOR); + * uses zlib crc32() so it is bit-exact with YARA hash.crc32. + */ + h->type = LUA_CRYPTOBOX_HASH_CRC32; + h->content.crc = (uint32_t) crc32(0L, Z_NULL, 0); + h->out_len = sizeof(uint32_t); + } else if (g_ascii_strcasecmp(type, "blake2") == 0) { rspamd_lua_hash_init_default(h, key, keylen); } @@ -1210,7 +1241,7 @@ lua_cryptobox_hash_create(lua_State *L) /*** * @function rspamd_cryptobox_hash.create_specific(type, [string]) * Creates new hash context - * @param {string} type type of hash (blake2, sha256, md5, sha512, mum, xxh64, xxh32, t1ha) + * @param {string} type type of hash (blake2, sha256, md5, sha512, mum, xxh64, xxh32, t1ha, crc32) * @param {string} string initial data * @return {cryptobox_hash} hash object */ @@ -1618,6 +1649,9 @@ lua_cryptobox_hash_reset(lua_State *L) rspamd_cryptobox_fast_hash_init_specific(h->content.fh, RSPAMD_CRYPTOBOX_T1HA, 0); break; + case LUA_CRYPTOBOX_HASH_CRC32: + h->content.crc = (uint32_t) crc32(0L, Z_NULL, 0); + break; default: g_assert_not_reached(); } @@ -1674,6 +1708,16 @@ lua_cryptobox_hash_finish(struct rspamd_lua_cryptobox_hash *h) ll = rspamd_cryptobox_fast_hash_final(h->content.fh); memcpy(h->out, &ll, sizeof(ll)); break; + case LUA_CRYPTOBOX_HASH_CRC32: { + uint32_t crc = h->content.crc; + /* Big-endian, so :hex() yields the canonical crc32 text (e.g. cbf43926) */ + h->out[0] = (crc >> 24) & 0xff; + h->out[1] = (crc >> 16) & 0xff; + h->out[2] = (crc >> 8) & 0xff; + h->out[3] = crc & 0xff; + h->out_len = sizeof(uint32_t); + break; + } default: g_assert_not_reached(); } diff --git a/src/lua/lua_text.c b/src/lua/lua_text.c index 6a390d7796..2201d102ed 100644 --- a/src/lua/lua_text.c +++ b/src/lua/lua_text.c @@ -18,6 +18,7 @@ #include "libcryptobox/cryptobox.h" #include "rspamd_simdutf.h" #include "unix-std.h" +#include /* for crc32() */ /*** * @module rspamd_text @@ -242,6 +243,18 @@ LUA_FUNCTION_DEF(text, base64); * @return {rspamd_text} new text encoded in hex */ LUA_FUNCTION_DEF(text, hex); +/*** + * @method rspamd_text:crc32([start[, len]]) + * Returns the CRC-32 checksum of the byte range starting at `start` + * (1-indexed, default 1) and spanning `len` bytes (default: to the end of the + * text). The range is read in place (no copy). This is the standard CRC-32 + * (zlib crc32 / YARA hash.crc32, polynomial 0xEDB88320) and the result is a Lua + * integer suitable for numeric comparison (e.g. `t:crc32() == 0xCBF43926`). + * @param {integer} start 1-based start index + * @param {integer} len number of bytes + * @return {integer} crc32 value as an unsigned 32-bit integer + */ +LUA_FUNCTION_DEF(text, crc32); /*** * @method rspamd_text:find(pattern [, init]) * Looks for the first match of pattern in the string s. @@ -295,6 +308,7 @@ static const struct luaL_reg textlib_m[] = { LUA_INTERFACE_DEF(text, base32), LUA_INTERFACE_DEF(text, base64), LUA_INTERFACE_DEF(text, hex), + LUA_INTERFACE_DEF(text, crc32), LUA_INTERFACE_DEF(text, find), LUA_INTERFACE_DEF(text, strtoul), {"write", lua_text_save_in_file}, @@ -1582,6 +1596,55 @@ lua_text_hex(lua_State *L) return 1; } +static int +lua_text_crc32(lua_State *L) +{ + LUA_TRACE_POINT; + struct rspamd_lua_text *t = lua_check_text(L, 1); + int64_t start = 1, len = -1; + + if (t == NULL) { + return luaL_error(L, "invalid arguments, text required"); + } + + if (lua_isnumber(L, 2)) { + start = lua_tointeger(L, 2); + } + if (lua_isnumber(L, 3)) { + len = lua_tointeger(L, 3); + } + + if (start < 1 || (start - 1) > t->len) { + return luaL_error(L, "invalid start offset %d (text len %d)", + (int) start, (int) t->len); + } + + if (len == -1) { + len = t->len - (start - 1); + } + + if (len < 0 || len > (t->len - (start - 1))) { + return luaL_error(L, "invalid length"); + } + + /* Standard CRC-32 (poly 0xEDB88320), bit-exact with zlib/YARA hash.crc32 */ + const unsigned char *p = (const unsigned char *) t->start + (start - 1); + gsize remain = (gsize) len; + uLong crc = crc32(0L, Z_NULL, 0); + + /* zlib crc32() takes a uInt length, so feed the slice in chunks */ + while (remain > 0) { + uInt chunk = remain > G_MAXUINT ? G_MAXUINT : (uInt) remain; + crc = crc32(crc, p, chunk); + p += chunk; + remain -= chunk; + } + + lua_pushinteger(L, (lua_Integer) (uint32_t) crc); + + return 1; +} + static int lua_text_find(lua_State *L) { diff --git a/src/lua/lua_util.c b/src/lua/lua_util.c index cdbcab02ac..9aecbdc362 100644 --- a/src/lua/lua_util.c +++ b/src/lua/lua_util.c @@ -22,6 +22,7 @@ #include "libmime/mime_headers.h" #include "libutil/hash.h" #include "libutil/str_util.h" +#include /* for crc32() */ #include "libserver/html/html.h" #include "libserver/hyperscan_tools.h" #include "libserver/async_session.h" @@ -714,6 +715,20 @@ LUA_FUNCTION_DEF(util, caseless_hash); */ LUA_FUNCTION_DEF(util, caseless_hash_fast); +/*** + * @function util.crc32(input[, start[, len]]) + * Calculates the CRC-32 checksum of a string or rspamd_text, optionally over the + * byte range starting at `start` (1-indexed, default 1) and spanning `len` bytes + * (default: to the end). The range is read in place (no copy). This is the + * standard CRC-32 (zlib crc32 / YARA hash.crc32, polynomial 0xEDB88320) and the + * result is a Lua integer suitable for numeric comparison. + * @param {string|text} input data to checksum + * @param {integer} start 1-based start index + * @param {integer} len number of bytes + * @return {integer} crc32 value as an unsigned 32-bit integer + */ +LUA_FUNCTION_DEF(util, crc32); + /*** * @function util.get_hostname() * Returns hostname for this machine @@ -836,6 +851,7 @@ static const struct luaL_reg utillib_f[] = { LUA_INTERFACE_DEF(util, normalize_prob), LUA_INTERFACE_DEF(util, caseless_hash), LUA_INTERFACE_DEF(util, caseless_hash_fast), + LUA_INTERFACE_DEF(util, crc32), LUA_INTERFACE_DEF(util, is_utf_spoofed), LUA_INTERFACE_DEF(util, is_utf_mixed_script), LUA_INTERFACE_DEF(util, is_utf_outside_range), @@ -2439,6 +2455,57 @@ lua_util_caseless_hash_fast(lua_State *L) return 1; } +static int +lua_util_crc32(lua_State *L) +{ + LUA_TRACE_POINT; + struct rspamd_lua_text *t = NULL; + int64_t start = 1, len = -1; + + t = lua_check_text_or_string(L, 1); + + if (t == NULL || t->start == NULL) { + return luaL_error(L, "invalid arguments"); + } + + if (lua_isnumber(L, 2)) { + start = lua_tointeger(L, 2); + } + if (lua_isnumber(L, 3)) { + len = lua_tointeger(L, 3); + } + + if (start < 1 || (start - 1) > t->len) { + return luaL_error(L, "invalid start offset %d (input len %d)", + (int) start, (int) t->len); + } + + if (len == -1) { + len = t->len - (start - 1); + } + + if (len < 0 || len > (t->len - (start - 1))) { + return luaL_error(L, "invalid length"); + } + + /* Standard CRC-32 (poly 0xEDB88320), bit-exact with zlib/YARA hash.crc32 */ + const unsigned char *p = (const unsigned char *) t->start + (start - 1); + gsize remain = (gsize) len; + uLong crc = crc32(0L, Z_NULL, 0); + + /* zlib crc32() takes a uInt length, so feed the slice in chunks */ + while (remain > 0) { + uInt chunk = remain > G_MAXUINT ? G_MAXUINT : (uInt) remain; + crc = crc32(crc, p, chunk); + p += chunk; + remain -= chunk; + } + + lua_pushinteger(L, (lua_Integer) (uint32_t) crc); + + return 1; +} + static int lua_util_is_utf_spoofed(lua_State *L) { diff --git a/test/lua/unit/crc32.lua b/test/lua/unit/crc32.lua new file mode 100644 index 0000000000..e608485e02 --- /dev/null +++ b/test/lua/unit/crc32.lua @@ -0,0 +1,100 @@ +-- CRC-32 tests: rspamd_text:crc32, rspamd_util.crc32 and +-- rspamd_cryptobox_hash.create_specific("crc32"). +-- +-- Golden values are the standard CRC-32 (zlib crc32 / YARA hash.crc32, +-- polynomial 0xEDB88320). The "real attachment buffer" values were produced +-- with Python's zlib.crc32 (identical to YARA hash.crc32) over the exact same +-- bytes constructed below. + +context("CRC-32", function() + local rspamd_text = require "rspamd_text" + local rspamd_util = require "rspamd_util" + local hash = require "rspamd_cryptobox_hash" + + local function T(s) + return rspamd_text.fromstring(s) + end + + -- A deterministic, attachment-like binary buffer: + -- 15-byte PDF-ish header + 8 copies of all 256 byte values + footer. + local function sample_buffer() + local bytes = {} + for i = 0, 255 do + bytes[#bytes + 1] = string.char(i) + end + local block = string.rep(table.concat(bytes), 8) -- 2048 bytes + return "%PDF-1.7\n%\226\227\207\211\n" .. block .. "\nendstream endobj\n" + end + + test("text:crc32 golden values", function() + assert_equal(T(""):crc32(), 0x00000000) + assert_equal(T("123456789"):crc32(), 0xCBF43926) + end) + + test("util.crc32 golden values (string and text input)", function() + assert_equal(rspamd_util.crc32(""), 0x00000000) + assert_equal(rspamd_util.crc32("123456789"), 0xCBF43926) + assert_equal(rspamd_util.crc32(T("123456789")), 0xCBF43926) + end) + + test("create_specific('crc32') == zlib/YARA crc32", function() + -- via initial data + assert_equal(hash.create_specific("crc32", "123456789"):hex(), "cbf43926") + -- empty input + assert_equal(hash.create_specific("crc32"):hex(), "00000000") + end) + + test("create_specific('crc32') streaming matches one-shot", function() + local h = hash.create_specific("crc32") + h:update("123") + h:update("456") + h:update("789") + assert_equal(h:hex(), "cbf43926") + -- hex form must match the integer form of the text method + assert_equal(tonumber(h:hex(), 16), T("123456789"):crc32()) + end) + + test("create_specific('crc32'):reset re-initialises the state", function() + local h = hash.create_specific("crc32", "123456789") + assert_equal(h:hex(), "cbf43926") + h:reset() + h:update("123456789") + assert_equal(h:hex(), "cbf43926") + end) + + test("text:crc32 1-based start/len slicing", function() + -- "234" is the 3-byte slice of "123456789" starting at position 2 + assert_equal(T("123456789"):crc32(2, 3), 0x0D717969) + -- self-consistency: slice crc == crc of the same bytes on their own + assert_equal(T("123456789"):crc32(2, 3), T("234"):crc32()) + -- default len runs to the end + assert_equal(T("123456789"):crc32(1), 0xCBF43926) + assert_equal(T("123456789"):crc32(1, 9), 0xCBF43926) + end) + + test("util.crc32 1-based start/len slicing matches text method", function() + local s = "123456789" + assert_equal(rspamd_util.crc32(s, 2, 3), T(s):crc32(2, 3)) + assert_equal(rspamd_util.crc32(s, 4), T(s):crc32(4)) + end) + + test("real attachment buffer cross-checked against zlib/YARA crc32", function() + local buf = sample_buffer() + assert_equal(#buf, 2081) + local t = T(buf) + -- whole buffer + assert_equal(t:crc32(), 0xD0A1329A) + assert_equal(rspamd_util.crc32(buf), 0xD0A1329A) + assert_equal(tonumber(hash.create_specific("crc32", t):hex(), 16), 0xD0A1329A) + -- a 40-byte interior slice (Python buf[10:50] == 1-based start=11, len=40) + assert_equal(t:crc32(11, 40), 0x2F95E096) + assert_equal(rspamd_util.crc32(buf, 11, 40), 0x2F95E096) + end) + + test("text:crc32 rejects out-of-range arguments", function() + local t = T("123456789") + assert_false(pcall(function() t:crc32(0) end)) -- start < 1 + assert_false(pcall(function() t:crc32(11) end)) -- start past end+1 + assert_false(pcall(function() t:crc32(1, 100) end)) -- len too large + end) +end)