]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
[Fix] Add PCRE2 complexity checks before JIT compilation
authorVsevolod Stakhov <vsevolod@rspamd.com>
Sun, 15 Mar 2026 12:26:17 +0000 (12:26 +0000)
committerVsevolod Stakhov <vsevolod@rspamd.com>
Sun, 15 Mar 2026 12:26:17 +0000 (12:26 +0000)
Check compiled pattern size, frame size, and capture count
before calling pcre2_jit_compile to avoid crashes on
pathological patterns. Also set map->map pointer consistently
in lua_config_add_map for all map types.

src/libutil/regexp.c
src/lua/lua_map.c

index 0646285aebdf84c444ea1955c9f4c616f3749780..86a6b1688cefe22315faf6e9a1d997a82276498d 100644 (file)
@@ -191,8 +191,51 @@ rspamd_regexp_post_process(rspamd_regexp_t *r)
        PCRE2_UCHAR errstr[128];
        int errcode;
 
+       /*
+        * Check compiled pattern complexity before JIT to avoid crashes
+        * in pcre2_jit_compile on pathological patterns (e.g., huge Unicode
+        * character classes, deeply nested alternations).
+        * PCRE2_INFO_SIZE returns the size of the compiled pattern — patterns
+        * above ~64KB are risky for JIT. PCRE2_INFO_FRAMESIZE returns the
+        * backtracking frame size — very large frames indicate complexity.
+        */
+#define RSPAMD_RE_MAX_PCRE2_SIZE (1U << 16) /* 64 KB compiled size */
+#define RSPAMD_RE_MAX_FRAMESIZE (1U << 14)  /* 16 KB frame */
+#define RSPAMD_RE_MAX_CAPTURE_COUNT 128
+
        if (can_jit) {
-               if ((errcode = pcre2_jit_compile(r->re, jit_flags)) < 0) {
+               gboolean jit_safe = TRUE;
+               gsize compiled_sz = 0;
+               gsize frame_sz = 0;
+               uint32_t capture_cnt = 0;
+
+               if (pcre2_pattern_info(r->re, PCRE2_INFO_SIZE, &compiled_sz) == 0 &&
+                       compiled_sz > RSPAMD_RE_MAX_PCRE2_SIZE) {
+                       msg_info("pattern too large for JIT (%z bytes): \"%s\"",
+                                        compiled_sz, r->pattern);
+                       jit_safe = FALSE;
+               }
+
+               if (jit_safe &&
+                       pcre2_pattern_info(r->re, PCRE2_INFO_FRAMESIZE, &frame_sz) == 0 &&
+                       frame_sz > RSPAMD_RE_MAX_FRAMESIZE) {
+                       msg_info("pattern frame too large for JIT (%z bytes): \"%s\"",
+                                        frame_sz, r->pattern);
+                       jit_safe = FALSE;
+               }
+
+               if (jit_safe &&
+                       pcre2_pattern_info(r->re, PCRE2_INFO_CAPTURECOUNT, &capture_cnt) == 0 &&
+                       capture_cnt > RSPAMD_RE_MAX_CAPTURE_COUNT) {
+                       msg_info("pattern has too many captures for JIT (%ud): \"%s\"",
+                                        capture_cnt, r->pattern);
+                       jit_safe = FALSE;
+               }
+
+               if (!jit_safe) {
+                       r->flags |= RSPAMD_REGEXP_FLAG_DISABLE_JIT;
+               }
+               else if ((errcode = pcre2_jit_compile(r->re, jit_flags)) < 0) {
                        pcre2_get_error_message(errcode, errstr, G_N_ELEMENTS(errstr));
                        msg_err("jit compilation is not supported: %s; pattern: \"%s\"", errstr, r->pattern);
                        r->flags |= RSPAMD_REGEXP_FLAG_DISABLE_JIT;
@@ -213,7 +256,29 @@ rspamd_regexp_post_process(rspamd_regexp_t *r)
        }
 
        if (r->raw_re && r->re != r->raw_re && !(r->flags & RSPAMD_REGEXP_FLAG_DISABLE_JIT)) {
-               if ((errcode = pcre2_jit_compile(r->raw_re, jit_flags)) < 0) {
+               gboolean raw_jit_safe = TRUE;
+               gsize compiled_sz = 0;
+               gsize frame_sz = 0;
+
+               if (pcre2_pattern_info(r->raw_re, PCRE2_INFO_SIZE, &compiled_sz) == 0 &&
+                       compiled_sz > RSPAMD_RE_MAX_PCRE2_SIZE) {
+                       msg_info("raw pattern too large for JIT (%z bytes): \"%s\"",
+                                        compiled_sz, r->pattern);
+                       raw_jit_safe = FALSE;
+               }
+
+               if (raw_jit_safe &&
+                       pcre2_pattern_info(r->raw_re, PCRE2_INFO_FRAMESIZE, &frame_sz) == 0 &&
+                       frame_sz > RSPAMD_RE_MAX_FRAMESIZE) {
+                       msg_info("raw pattern frame too large for JIT (%z bytes): \"%s\"",
+                                        frame_sz, r->pattern);
+                       raw_jit_safe = FALSE;
+               }
+
+               if (!raw_jit_safe) {
+                       r->flags |= RSPAMD_REGEXP_FLAG_DISABLE_JIT;
+               }
+               else if ((errcode = pcre2_jit_compile(r->raw_re, jit_flags)) < 0) {
                        pcre2_get_error_message(errcode, errstr, G_N_ELEMENTS(errstr));
                        msg_debug("jit compilation is not supported for raw regexp: %s; pattern: \"%s\"", errstr, r->pattern);
                        r->flags |= RSPAMD_REGEXP_FLAG_DISABLE_JIT;
@@ -228,6 +293,9 @@ rspamd_regexp_post_process(rspamd_regexp_t *r)
                        }
                }
        }
+#undef RSPAMD_RE_MAX_PCRE2_SIZE
+#undef RSPAMD_RE_MAX_FRAMESIZE
+#undef RSPAMD_RE_MAX_CAPTURE_COUNT
 #endif
 
 #else
index fa375cf63a7059bcacd2f339021104cd459a752b..075e2b08f8e2e3ceac9d0e6220f7fea41a48cafb 100644 (file)
@@ -747,6 +747,7 @@ int lua_config_add_map(lua_State *L)
 
                                return 1;
                        }
+                       map->map = m;
                        m->lua_map = map;
                }
                else if (strcmp(type, "set") == 0) {
@@ -765,6 +766,7 @@ int lua_config_add_map(lua_State *L)
 
                                return 1;
                        }
+                       map->map = m;
                        m->lua_map = map;
                }
                else if (strcmp(type, "map") == 0 || strcmp(type, "hash") == 0) {
@@ -783,6 +785,7 @@ int lua_config_add_map(lua_State *L)
 
                                return 1;
                        }
+                       map->map = m;
                        m->lua_map = map;
                }
                else if (strcmp(type, "radix") == 0) {
@@ -801,6 +804,7 @@ int lua_config_add_map(lua_State *L)
 
                                return 1;
                        }
+                       map->map = m;
                        m->lua_map = map;
                }
                else if (strcmp(type, "regexp") == 0) {
@@ -819,6 +823,7 @@ int lua_config_add_map(lua_State *L)
 
                                return 1;
                        }
+                       map->map = m;
                        m->lua_map = map;
                }
                else if (strcmp(type, "regexp_multi") == 0) {
@@ -837,6 +842,7 @@ int lua_config_add_map(lua_State *L)
 
                                return 1;
                        }
+                       map->map = m;
                        m->lua_map = map;
                }
                else if (strcmp(type, "glob") == 0) {
@@ -855,6 +861,7 @@ int lua_config_add_map(lua_State *L)
 
                                return 1;
                        }
+                       map->map = m;
                        m->lua_map = map;
                }
                else if (strcmp(type, "glob_multi") == 0) {