#include "unix-std.h"
#include "contrib/libottery/ottery.h"
#include "libutil/ref.h"
+#include <zlib.h> /* for crc32() */
#include <stdalign.h>
#include <openssl/hmac.h>
LUA_CRYPTOBOX_HASH_XXHASH3,
LUA_CRYPTOBOX_HASH_MUM,
LUA_CRYPTOBOX_HASH_T1HA,
+ LUA_CRYPTOBOX_HASH_CRC32,
};
struct rspamd_lua_cryptobox_hash {
HMAC_CTX *hmac_c;
#endif
rspamd_cryptobox_fast_hash_state_t *fh;
+ uint32_t crc; /* running zlib crc32 for LUA_CRYPTOBOX_HASH_CRC32 */
} content;
unsigned char out[rspamd_cryptobox_HASHBYTES];
case LUA_CRYPTOBOX_HASH_T1HA:
rspamd_cryptobox_fast_hash_update(h->content.fh, p, len);
break;
+ case LUA_CRYPTOBOX_HASH_CRC32: {
+ const unsigned char *cp = (const unsigned char *) p;
+ gsize remain = len;
+ uLong crc = h->content.crc;
+
+ /* zlib crc32() takes a uInt length, so feed it in chunks */
+ while (remain > 0) {
+ uInt chunk = remain > G_MAXUINT ? G_MAXUINT : (uInt) remain;
+ crc = crc32(crc, cp, chunk);
+ cp += chunk;
+ remain -= chunk;
+ }
+
+ h->content.crc = (uint32_t) crc;
+ break;
+ }
default:
g_assert_not_reached();
}
rspamd_explicit_memzero(h->content.h, sizeof(*h->content.h));
free(h->content.h); /* Allocated by posix_memalign */
}
+ else if (h->type == LUA_CRYPTOBOX_HASH_CRC32) {
+ /* Plain value, nothing to free */
+ }
else {
rspamd_cryptobox_fast_hash_free(h->content.fh);
}
RSPAMD_CRYPTOBOX_T1HA, 0);
h->out_len = sizeof(uint64_t);
}
+ else if (g_ascii_strcasecmp(type, "crc32") == 0) {
+ /*
+ * Standard CRC-32 (poly 0xEDB88320, init/final 0xFFFFFFFF XOR);
+ * uses zlib crc32() so it is bit-exact with YARA hash.crc32.
+ */
+ h->type = LUA_CRYPTOBOX_HASH_CRC32;
+ h->content.crc = (uint32_t) crc32(0L, Z_NULL, 0);
+ h->out_len = sizeof(uint32_t);
+ }
else if (g_ascii_strcasecmp(type, "blake2") == 0) {
rspamd_lua_hash_init_default(h, key, keylen);
}
/***
* @function rspamd_cryptobox_hash.create_specific(type, [string])
* Creates new hash context
- * @param {string} type type of hash (blake2, sha256, md5, sha512, mum, xxh64, xxh32, t1ha)
+ * @param {string} type type of hash (blake2, sha256, md5, sha512, mum, xxh64, xxh32, t1ha, crc32)
* @param {string} string initial data
* @return {cryptobox_hash} hash object
*/
rspamd_cryptobox_fast_hash_init_specific(h->content.fh,
RSPAMD_CRYPTOBOX_T1HA, 0);
break;
+ case LUA_CRYPTOBOX_HASH_CRC32:
+ h->content.crc = (uint32_t) crc32(0L, Z_NULL, 0);
+ break;
default:
g_assert_not_reached();
}
ll = rspamd_cryptobox_fast_hash_final(h->content.fh);
memcpy(h->out, &ll, sizeof(ll));
break;
+ case LUA_CRYPTOBOX_HASH_CRC32: {
+ uint32_t crc = h->content.crc;
+ /* Big-endian, so :hex() yields the canonical crc32 text (e.g. cbf43926) */
+ h->out[0] = (crc >> 24) & 0xff;
+ h->out[1] = (crc >> 16) & 0xff;
+ h->out[2] = (crc >> 8) & 0xff;
+ h->out[3] = crc & 0xff;
+ h->out_len = sizeof(uint32_t);
+ break;
+ }
default:
g_assert_not_reached();
}
#include "libcryptobox/cryptobox.h"
#include "rspamd_simdutf.h"
#include "unix-std.h"
+#include <zlib.h> /* for crc32() */
/***
* @module rspamd_text
* @return {rspamd_text} new text encoded in hex
*/
LUA_FUNCTION_DEF(text, hex);
+/***
+ * @method rspamd_text:crc32([start[, len]])
+ * Returns the CRC-32 checksum of the byte range starting at `start`
+ * (1-indexed, default 1) and spanning `len` bytes (default: to the end of the
+ * text). The range is read in place (no copy). This is the standard CRC-32
+ * (zlib crc32 / YARA hash.crc32, polynomial 0xEDB88320) and the result is a Lua
+ * integer suitable for numeric comparison (e.g. `t:crc32() == 0xCBF43926`).
+ * @param {integer} start 1-based start index
+ * @param {integer} len number of bytes
+ * @return {integer} crc32 value as an unsigned 32-bit integer
+ */
+LUA_FUNCTION_DEF(text, crc32);
/***
* @method rspamd_text:find(pattern [, init])
* Looks for the first match of pattern in the string s.
LUA_INTERFACE_DEF(text, base32),
LUA_INTERFACE_DEF(text, base64),
LUA_INTERFACE_DEF(text, hex),
+ LUA_INTERFACE_DEF(text, crc32),
LUA_INTERFACE_DEF(text, find),
LUA_INTERFACE_DEF(text, strtoul),
{"write", lua_text_save_in_file},
return 1;
}
+static int
+lua_text_crc32(lua_State *L)
+{
+ LUA_TRACE_POINT;
+ struct rspamd_lua_text *t = lua_check_text(L, 1);
+ int64_t start = 1, len = -1;
+
+ if (t == NULL) {
+ return luaL_error(L, "invalid arguments, text required");
+ }
+
+ if (lua_isnumber(L, 2)) {
+ start = lua_tointeger(L, 2);
+ }
+ if (lua_isnumber(L, 3)) {
+ len = lua_tointeger(L, 3);
+ }
+
+ if (start < 1 || (start - 1) > t->len) {
+ return luaL_error(L, "invalid start offset %d (text len %d)",
+ (int) start, (int) t->len);
+ }
+
+ if (len == -1) {
+ len = t->len - (start - 1);
+ }
+
+ if (len < 0 || len > (t->len - (start - 1))) {
+ return luaL_error(L, "invalid length");
+ }
+
+ /* Standard CRC-32 (poly 0xEDB88320), bit-exact with zlib/YARA hash.crc32 */
+ const unsigned char *p = (const unsigned char *) t->start + (start - 1);
+ gsize remain = (gsize) len;
+ uLong crc = crc32(0L, Z_NULL, 0);
+
+ /* zlib crc32() takes a uInt length, so feed the slice in chunks */
+ while (remain > 0) {
+ uInt chunk = remain > G_MAXUINT ? G_MAXUINT : (uInt) remain;
+ crc = crc32(crc, p, chunk);
+ p += chunk;
+ remain -= chunk;
+ }
+
+ lua_pushinteger(L, (lua_Integer) (uint32_t) crc);
+
+ return 1;
+}
+
static int
lua_text_find(lua_State *L)
{
#include "libmime/mime_headers.h"
#include "libutil/hash.h"
#include "libutil/str_util.h"
+#include <zlib.h> /* for crc32() */
#include "libserver/html/html.h"
#include "libserver/hyperscan_tools.h"
#include "libserver/async_session.h"
*/
LUA_FUNCTION_DEF(util, caseless_hash_fast);
+/***
+ * @function util.crc32(input[, start[, len]])
+ * Calculates the CRC-32 checksum of a string or rspamd_text, optionally over the
+ * byte range starting at `start` (1-indexed, default 1) and spanning `len` bytes
+ * (default: to the end). The range is read in place (no copy). This is the
+ * standard CRC-32 (zlib crc32 / YARA hash.crc32, polynomial 0xEDB88320) and the
+ * result is a Lua integer suitable for numeric comparison.
+ * @param {string|text} input data to checksum
+ * @param {integer} start 1-based start index
+ * @param {integer} len number of bytes
+ * @return {integer} crc32 value as an unsigned 32-bit integer
+ */
+LUA_FUNCTION_DEF(util, crc32);
+
/***
* @function util.get_hostname()
* Returns hostname for this machine
LUA_INTERFACE_DEF(util, normalize_prob),
LUA_INTERFACE_DEF(util, caseless_hash),
LUA_INTERFACE_DEF(util, caseless_hash_fast),
+ LUA_INTERFACE_DEF(util, crc32),
LUA_INTERFACE_DEF(util, is_utf_spoofed),
LUA_INTERFACE_DEF(util, is_utf_mixed_script),
LUA_INTERFACE_DEF(util, is_utf_outside_range),
return 1;
}
+static int
+lua_util_crc32(lua_State *L)
+{
+ LUA_TRACE_POINT;
+ struct rspamd_lua_text *t = NULL;
+ int64_t start = 1, len = -1;
+
+ t = lua_check_text_or_string(L, 1);
+
+ if (t == NULL || t->start == NULL) {
+ return luaL_error(L, "invalid arguments");
+ }
+
+ if (lua_isnumber(L, 2)) {
+ start = lua_tointeger(L, 2);
+ }
+ if (lua_isnumber(L, 3)) {
+ len = lua_tointeger(L, 3);
+ }
+
+ if (start < 1 || (start - 1) > t->len) {
+ return luaL_error(L, "invalid start offset %d (input len %d)",
+ (int) start, (int) t->len);
+ }
+
+ if (len == -1) {
+ len = t->len - (start - 1);
+ }
+
+ if (len < 0 || len > (t->len - (start - 1))) {
+ return luaL_error(L, "invalid length");
+ }
+
+ /* Standard CRC-32 (poly 0xEDB88320), bit-exact with zlib/YARA hash.crc32 */
+ const unsigned char *p = (const unsigned char *) t->start + (start - 1);
+ gsize remain = (gsize) len;
+ uLong crc = crc32(0L, Z_NULL, 0);
+
+ /* zlib crc32() takes a uInt length, so feed the slice in chunks */
+ while (remain > 0) {
+ uInt chunk = remain > G_MAXUINT ? G_MAXUINT : (uInt) remain;
+ crc = crc32(crc, p, chunk);
+ p += chunk;
+ remain -= chunk;
+ }
+
+ lua_pushinteger(L, (lua_Integer) (uint32_t) crc);
+
+ return 1;
+}
+
static int
lua_util_is_utf_spoofed(lua_State *L)
{
--- /dev/null
+-- CRC-32 tests: rspamd_text:crc32, rspamd_util.crc32 and
+-- rspamd_cryptobox_hash.create_specific("crc32").
+--
+-- Golden values are the standard CRC-32 (zlib crc32 / YARA hash.crc32,
+-- polynomial 0xEDB88320). The "real attachment buffer" values were produced
+-- with Python's zlib.crc32 (identical to YARA hash.crc32) over the exact same
+-- bytes constructed below.
+
+context("CRC-32", function()
+ local rspamd_text = require "rspamd_text"
+ local rspamd_util = require "rspamd_util"
+ local hash = require "rspamd_cryptobox_hash"
+
+ local function T(s)
+ return rspamd_text.fromstring(s)
+ end
+
+ -- A deterministic, attachment-like binary buffer:
+ -- 15-byte PDF-ish header + 8 copies of all 256 byte values + footer.
+ local function sample_buffer()
+ local bytes = {}
+ for i = 0, 255 do
+ bytes[#bytes + 1] = string.char(i)
+ end
+ local block = string.rep(table.concat(bytes), 8) -- 2048 bytes
+ return "%PDF-1.7\n%\226\227\207\211\n" .. block .. "\nendstream endobj\n"
+ end
+
+ test("text:crc32 golden values", function()
+ assert_equal(T(""):crc32(), 0x00000000)
+ assert_equal(T("123456789"):crc32(), 0xCBF43926)
+ end)
+
+ test("util.crc32 golden values (string and text input)", function()
+ assert_equal(rspamd_util.crc32(""), 0x00000000)
+ assert_equal(rspamd_util.crc32("123456789"), 0xCBF43926)
+ assert_equal(rspamd_util.crc32(T("123456789")), 0xCBF43926)
+ end)
+
+ test("create_specific('crc32') == zlib/YARA crc32", function()
+ -- via initial data
+ assert_equal(hash.create_specific("crc32", "123456789"):hex(), "cbf43926")
+ -- empty input
+ assert_equal(hash.create_specific("crc32"):hex(), "00000000")
+ end)
+
+ test("create_specific('crc32') streaming matches one-shot", function()
+ local h = hash.create_specific("crc32")
+ h:update("123")
+ h:update("456")
+ h:update("789")
+ assert_equal(h:hex(), "cbf43926")
+ -- hex form must match the integer form of the text method
+ assert_equal(tonumber(h:hex(), 16), T("123456789"):crc32())
+ end)
+
+ test("create_specific('crc32'):reset re-initialises the state", function()
+ local h = hash.create_specific("crc32", "123456789")
+ assert_equal(h:hex(), "cbf43926")
+ h:reset()
+ h:update("123456789")
+ assert_equal(h:hex(), "cbf43926")
+ end)
+
+ test("text:crc32 1-based start/len slicing", function()
+ -- "234" is the 3-byte slice of "123456789" starting at position 2
+ assert_equal(T("123456789"):crc32(2, 3), 0x0D717969)
+ -- self-consistency: slice crc == crc of the same bytes on their own
+ assert_equal(T("123456789"):crc32(2, 3), T("234"):crc32())
+ -- default len runs to the end
+ assert_equal(T("123456789"):crc32(1), 0xCBF43926)
+ assert_equal(T("123456789"):crc32(1, 9), 0xCBF43926)
+ end)
+
+ test("util.crc32 1-based start/len slicing matches text method", function()
+ local s = "123456789"
+ assert_equal(rspamd_util.crc32(s, 2, 3), T(s):crc32(2, 3))
+ assert_equal(rspamd_util.crc32(s, 4), T(s):crc32(4))
+ end)
+
+ test("real attachment buffer cross-checked against zlib/YARA crc32", function()
+ local buf = sample_buffer()
+ assert_equal(#buf, 2081)
+ local t = T(buf)
+ -- whole buffer
+ assert_equal(t:crc32(), 0xD0A1329A)
+ assert_equal(rspamd_util.crc32(buf), 0xD0A1329A)
+ assert_equal(tonumber(hash.create_specific("crc32", t):hex(), 16), 0xD0A1329A)
+ -- a 40-byte interior slice (Python buf[10:50] == 1-based start=11, len=40)
+ assert_equal(t:crc32(11, 40), 0x2F95E096)
+ assert_equal(rspamd_util.crc32(buf, 11, 40), 0x2F95E096)
+ end)
+
+ test("text:crc32 rejects out-of-range arguments", function()
+ local t = T("123456789")
+ assert_false(pcall(function() t:crc32(0) end)) -- start < 1
+ assert_false(pcall(function() t:crc32(11) end)) -- start past end+1
+ assert_false(pcall(function() t:crc32(1, 100) end)) -- len too large
+ end)
+end)