#include "khash.h"
#include "fstring.h"
#include "libutil/cxx/utf8_util.h"
+#include "libutil/multipattern.h" /* for RSPAMD_MULTIPATTERN_MAX_REENTRANCY */
#ifdef __cplusplus
extern "C" {
/**
* How deep to follow URLs nested inside the query of an already query-extracted
* URL (a properly escaped wrapper carries one target per encoding layer).
- */
-#define RSPAMD_URL_QUERY_MAX_NESTING 5
+ *
+ * Each level re-enters the URL multipattern scan while the enclosing scan is
+ * still on the stack. The peak number of simultaneously-held scratch contexts
+ * on the deepest chain is therefore this depth plus two: one for the enclosing
+ * text/subject scan, and one for the per-URL TLD lookup that rspamd_url_parse
+ * runs on the freshly extracted leaf URL. Keep that within the multipattern
+ * scratch budget (RSPAMD_MULTIPATTERN_MAX_REENTRANCY) so normal nesting stays
+ * on the fast static-scratch path.
+ */
+#define RSPAMD_URL_QUERY_MAX_NESTING (RSPAMD_MULTIPATTERN_MAX_REENTRANCY - 2)
/**
* Find URLs embedded in the query parameters of `url`. Unlike
#include "libutil/regexp.h"
#include <stdalign.h>
-#define MAX_SCRATCH 4
+/*
+ * Depth of the per-multipattern scratch stack. A hyperscan scratch context is
+ * ~2.5-4 KiB, so a stack of RSPAMD_MULTIPATTERN_MAX_REENTRANCY costs only a few
+ * tens of KiB per multipattern while letting lookups be safely re-entered from
+ * within a match callback (see RSPAMD_MULTIPATTERN_MAX_REENTRANCY).
+ */
+#define MAX_SCRATCH RSPAMD_MULTIPATTERN_MAX_REENTRANCY
+
+/* scratch_used is an unsigned int bitmask, so the stack cannot exceed its width */
+G_STATIC_ASSERT(MAX_SCRATCH <= sizeof(unsigned int) * 8);
/*
* Threshold for "small" multipatterns that are compiled in memory
GArray *hs_pats;
GArray *hs_ids;
GArray *hs_flags;
- unsigned int scratch_used;
+ unsigned int scratch_used; /* bitmask of busy scratch[] slots */
#endif
ac_trie_t *t;
GArray *pats;
/* Use hyperscan if it's compiled and ready */
if (mp->state == RSPAMD_MP_STATE_COMPILED && mp->hs_db != NULL) {
hs_scratch_t *scr = NULL;
+ gboolean scr_temporary = FALSE;
unsigned int i;
for (i = 0; i < MAX_SCRATCH; i++) {
}
}
- g_assert(scr != NULL);
+ if (scr == NULL) {
+ /*
+ * The static scratch stack (MAX_SCRATCH deep) is exhausted by an
+ * unusually deep reentrant lookup - a lookup re-entered from within
+ * a match callback more than MAX_SCRATCH levels deep (e.g. a
+ * pathologically nested chain of query-embedded URLs). Callers are
+ * expected to bound their recursion below MAX_SCRATCH
+ * (see RSPAMD_MULTIPATTERN_MAX_REENTRANCY), so this is a cold safety
+ * net: allocate a one-off scratch for this scan rather than abort
+ * the whole worker on attacker-controlled input.
+ */
+ int rc = hs_alloc_scratch(rspamd_hyperscan_get_database(mp->hs_db),
+ &scr);
+
+ if (rc != HS_SUCCESS || scr == NULL) {
+ msg_err("cannot allocate temporary hyperscan scratch "
+ "(error code %d) at reentrancy depth > %d; skipping lookup",
+ rc, (int) MAX_SCRATCH);
+ return 0;
+ }
+
+ scr_temporary = TRUE;
+ }
ret = hs_scan(rspamd_hyperscan_get_database(mp->hs_db), in, len, 0, scr,
rspamd_multipattern_hs_cb, &cbd);
- mp->scratch_used &= ~(1 << i);
+ if (scr_temporary) {
+ hs_free_scratch(scr);
+ }
+ else {
+ mp->scratch_used &= ~(1 << i);
+ }
if (ret == HS_SUCCESS) {
ret = 0;
struct ev_loop;
+/**
+ * Maximum reentrancy depth of rspamd_multipattern_lookup() on a single
+ * multipattern. Each multipattern keeps a small stack of hyperscan scratch
+ * contexts of this depth, so a lookup may be re-entered (from within a match
+ * callback) up to this many levels deep before the scratch pool is exhausted.
+ * Callers that recurse into the same multipattern from their callback (e.g. URL
+ * query unwrapping, which re-scans an embedded URL while the enclosing scan is
+ * still on the stack) MUST keep their total nesting at or below this bound.
+ */
+#define RSPAMD_MULTIPATTERN_MAX_REENTRANCY 10
+
enum rspamd_multipattern_flags {
RSPAMD_MULTIPATTERN_DEFAULT = 0,
RSPAMD_MULTIPATTERN_ICASE = (1 << 0),
--- /dev/null
+*** Settings ***
+Suite Setup Rspamd Setup
+Suite Teardown Rspamd Teardown
+Library ${RSPAMD_TESTDIR}/lib/rspamd.py
+Resource ${RSPAMD_TESTDIR}/lib/rspamd.robot
+Variables ${RSPAMD_TESTDIR}/lib/vars.py
+
+*** Variables ***
+${CONFIG} ${RSPAMD_TESTDIR}/configs/url_query_nesting.conf
+${MESSAGE} ${RSPAMD_TESTDIR}/messages/url_query_nesting.eml
+${RSPAMD_SCOPE} Suite
+${RSPAMD_URL_TLD} ${RSPAMD_TESTDIR}/../lua/unit/test_tld.dat
+
+*** Test Cases ***
+DEEPLY NESTED QUERY URLS DO NOT CRASH THE WORKER
+ [Documentation] A URL whose query embeds another URL, repeated many times,
+ ... makes the URL multipattern scan re-enter itself once per
+ ... nesting level. The per-multipattern hyperscan scratch stack
+ ... must absorb that reentrancy (bounded by
+ ... RSPAMD_URL_QUERY_MAX_NESTING) instead of aborting the worker
+ ... on a "scr != NULL" assertion. Regression for the scratch-pool
+ ... exhaustion crash in rspamd_multipattern_lookup.
+ Scan File ${MESSAGE}
+ # The scan completing at all proves the worker survived (a crash would make
+ # Scan File fail). The outermost URL is always extracted; deeper hops are
+ # followed up to the nesting cap.
+ Expect URL h0.example.org
+ Expect URL h1.example.org
--- /dev/null
+options = {
+ filters = ["regexp"]
+ url_tld = "{= env.TESTDIR =}/../lua/unit/test_tld.dat"
+ pidfile = "{= env.TMPDIR =}/rspamd.pid";
+ lua_path = "{= env.INSTALLROOT =}/share/rspamd/lib/?.lua";
+ dns {
+ retransmits = 2;
+ }
+}
+logging = {
+ type = "file",
+ level = "debug"
+ filename = "{= env.TMPDIR =}/rspamd.log";
+}
+metric = {
+ name = "default",
+ actions = {
+ reject = 100500,
+ }
+ unknown_weight = 1
+}
+
+worker {
+ type = normal
+ bind_socket = "{= env.LOCAL_ADDR =}:{= env.PORT_NORMAL =}"
+ count = 1
+ keypair {
+ pubkey = "{= env.KEY_PUB1 =}";
+ privkey = "{= env.KEY_PVT1 =}";
+ }
+ task_timeout = 10s;
+}
+
+worker {
+ type = controller
+ bind_socket = "{= env.LOCAL_ADDR =}:{= env.PORT_CONTROLLER =}"
+ count = 1
+ secure_ip = ["127.0.0.1", "::1"];
+ stats_path = "{= env.TMPDIR =}/stats.ucl"
+}
+
+modules {
+ path = "{= env.TESTDIR =}/../../src/plugins/lua/"
+}
+lua = "{= env.INSTALLROOT =}/share/rspamd/rules/rspamd.lua"
--- /dev/null
+From: attacker@example.org
+To: victim@example.com
+Subject: nested query urls regression
+Content-Type: text/plain
+Date: Mon, 01 Jun 2026 00:00:00 +0000
+Message-ID: <url-query-nesting-dos@example.org>
+
+Please click http://h0.example.org/?u=http://h1.example.org/?u=http://h2.example.org/?u=http://h3.example.org/?u=http://h4.example.org/?u=http://h5.example.org/?u=http://h6.example.org/?u=http://h7.example.org/?u=http://h8.example.org/?u=http://h9.example.org/?u=http://h10.example.org/?u=http://h11.example.org/?u=http://h12.example.org/ now.