struct html_component_href : html_component_base {
std::string_view value;
- std::size_t offset = 0;// offset in decoded HTML buffer
- std::size_t len = 0; // length of raw attribute value
+ std::size_t offset = 0;// offset in UTF-8 HTML buffer (utf_raw_content)
+ std::size_t len = 0; // length in UTF-8 HTML buffer (utf_raw_content)
explicit constexpr html_component_href(std::string_view v, std::size_t off = 0, std::size_t l = 0)
: value(v), offset(off), len(l)
{
struct html_component_src : html_component_base {
std::string_view value;
- std::size_t offset = 0;// offset in decoded HTML buffer
- std::size_t len = 0; // length of raw attribute value
+ std::size_t offset = 0;// offset in UTF-8 HTML buffer (utf_raw_content)
+ std::size_t len = 0; // length in UTF-8 HTML buffer (utf_raw_content)
explicit html_component_src(std::string_view v, std::size_t off = 0, std::size_t l = 0)
: value(v), offset(off), len(l)
{
const html_tag *tag; // Tag containing the attribute
std::string_view attr_name;// "href" or "src"
std::string absolute_url; // Absolute/canonicalized URL for Lua policy
- std::size_t offset; // Offset of attribute value in decoded HTML buffer
- std::size_t len; // Length of attribute value in decoded HTML buffer
+ std::size_t offset; // Offset of attribute value in UTF-8 HTML buffer (utf_raw_content)
+ std::size_t len; // Length of attribute value in UTF-8 HTML buffer (utf_raw_content)
int part_id; // MIME part ID (for multi-part messages)
};
/**
- * Patch to apply to the decoded HTML buffer
+ * Patch to apply to the UTF-8 HTML buffer
* Represents a single replacement operation
*/
struct rewrite_patch {
int part_id; // MIME part ID
- std::size_t offset; // Offset in decoded buffer
- std::size_t len; // Length to replace
+ std::size_t offset; // Offset in UTF-8 HTML buffer (utf_raw_content)
+ std::size_t len; // Length to replace in UTF-8 HTML buffer (utf_raw_content)
std::string replacement;// Replacement string
// For sorting patches by offset
auto validate_patches(std::vector<rewrite_patch> &patches) -> bool;
/**
- * Apply patches to a decoded HTML buffer
- * @param original original decoded buffer
+ * Apply patches to a UTF-8 HTML buffer
+ * @param original original UTF-8 HTML buffer (utf_raw_content)
* @param patches sorted, non-overlapping patches
* @return rewritten buffer
*/
* @param hc HTML content
* @param func_ref Lua function reference from luaL_ref
* @param part_id MIME part ID
- * @param original_html Original HTML content (decoded)
+ * @param original_html Original HTML content (UTF-8, from utf_raw_content)
* @return Rewritten HTML or nullopt if no changes
*/
auto process_html_url_rewrite(struct rspamd_task *task,
continue;
}
+ /* Skip if no UTF-8 content available */
+ if (!text_part->utf_raw_content || text_part->utf_raw_content->len == 0) {
+ continue;
+ }
+
char *output_html = NULL;
gsize output_len = 0;
- /* Process URL rewriting using C wrapper */
+ /* Process URL rewriting using C wrapper on UTF-8 buffer */
int ret = rspamd_html_url_rewrite(
task,
L,
text_part->html,
func_ref,
text_part->mime_part->part_number,
- (const char *) text_part->parsed.begin,
- text_part->parsed.len,
+ (const char *) text_part->utf_raw_content->data,
+ text_part->utf_raw_content->len,
&output_html,
&output_len);