#include <unicode/usprep.h>
#include <unicode/ucnv.h>
+/*
+ * Following a query-embedded URL re-enters the URL multipattern scan; the
+ * deepest chain holds RSPAMD_URL_QUERY_MAX_NESTING scratch contexts plus the
+ * enclosing scan and the leaf TLD lookup. Keep that on the fast static-scratch
+ * path of the multipattern matcher (a deeper run still works via the graceful
+ * fallback in rspamd_multipattern_lookup, just without a cached scratch).
+ */
+G_STATIC_ASSERT(RSPAMD_URL_QUERY_MAX_NESTING + 2 <= RSPAMD_MULTIPATTERN_MAX_REENTRANCY);
+
/* Lua URL filter consultation return values */
enum rspamd_url_lua_filter_result {
RSPAMD_URL_LUA_FILTER_ACCEPT = 0, /* Continue parsing normally */
* How deep to follow URLs nested inside the query of an already query-extracted
* URL (a properly escaped wrapper carries one target per encoding layer).
*
+ * This is a functional limit on how far redirect/wrapper chains are unwrapped.
* Each level re-enters the URL multipattern scan while the enclosing scan is
- * still on the stack. The peak number of simultaneously-held scratch contexts
- * on the deepest chain is therefore this depth plus two: one for the enclosing
- * text/subject scan, and one for the per-URL TLD lookup that rspamd_url_parse
- * runs on the freshly extracted leaf URL. Keep that within the multipattern
- * scratch budget (RSPAMD_MULTIPATTERN_MAX_REENTRANCY) so normal nesting stays
- * on the fast static-scratch path.
- */
-#define RSPAMD_URL_QUERY_MAX_NESTING (RSPAMD_MULTIPATTERN_MAX_REENTRANCY - 2)
+ * still on the stack, so the deepest chain holds this depth plus two scratch
+ * contexts (the enclosing text/subject scan, and the per-URL TLD lookup that
+ * rspamd_url_parse runs on the freshly extracted leaf). It must therefore stay
+ * comfortably below the multipattern scratch budget
+ * (RSPAMD_MULTIPATTERN_MAX_REENTRANCY); the static assert in url.c enforces
+ * that, and rspamd_multipattern_lookup() degrades gracefully if it is ever
+ * exceeded.
+ */
+#define RSPAMD_URL_QUERY_MAX_NESTING 5
/**
* Find URLs embedded in the query parameters of `url`. Unlike