* encoding, but HTML tags and line breaks will still be present.
* Multiline expressions will need to be used to match strings that are
* broken by line breaks.
+ *
+ * Note: parsed content is transfer-decoded but NOT charset-converted,
+ * so it may contain non-UTF-8 data. Always use raw mode.
*/
if (MESSAGE_FIELD(task, text_parts)->len > 0) {
cnt = MESSAGE_FIELD(task, text_parts)->len;
if (text_part->parsed.len > 0) {
scvec[i] = (unsigned char *) text_part->parsed.begin;
lenvec[i] = text_part->parsed.len;
-
- if (!IS_TEXT_PART_UTF(text_part)) {
- raw = TRUE;
- }
}
else {
scvec[i] = (unsigned char *) "";
}
}
+ /* Always raw - parsed content is not charset-converted */
ret = rspamd_re_cache_process_regexp_data(rt, re,
- task, scvec, lenvec, cnt, raw, &processed_hyperscan);
+ task, scvec, lenvec, cnt, TRUE, &processed_hyperscan);
msg_debug_re_task("checked sa rawbody regexp: %s -> %d",
rspamd_regexp_get_pattern(re), ret);
g_free(scvec);