From: Vsevolod Stakhov Date: Thu, 17 Jul 2025 08:16:50 +0000 (+0100) Subject: [Project] Support more common html attributes X-Git-Tag: 3.13.0~43^2~4 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=4d39aa998cdd7f595925d8f0a81d6bee25cc091a;p=thirdparty%2Frspamd.git [Project] Support more common html attributes --- diff --git a/src/libserver/html/html.cxx b/src/libserver/html/html.cxx index 5597b7eb57..374fb349c9 100644 --- a/src/libserver/html/html.cxx +++ b/src/libserver/html/html.cxx @@ -39,6 +39,7 @@ #include "contrib/frozen/include/frozen/string.h" #include "contrib/fmt/include/fmt/core.h" +#include #include namespace rspamd::html { @@ -47,23 +48,88 @@ static const unsigned int max_tags = 8192; /* Ignore tags if this maximum is rea static const html_tags_storage html_tags_defs; -auto html_components_map = frozen::make_unordered_map( +auto html_components_map = frozen::make_unordered_map( { - {"name", html_component_type::RSPAMD_HTML_COMPONENT_NAME}, - {"href", html_component_type::RSPAMD_HTML_COMPONENT_HREF}, - {"src", html_component_type::RSPAMD_HTML_COMPONENT_HREF}, - {"action", html_component_type::RSPAMD_HTML_COMPONENT_HREF}, - {"color", html_component_type::RSPAMD_HTML_COMPONENT_COLOR}, - {"bgcolor", html_component_type::RSPAMD_HTML_COMPONENT_BGCOLOR}, - {"style", html_component_type::RSPAMD_HTML_COMPONENT_STYLE}, - {"class", html_component_type::RSPAMD_HTML_COMPONENT_CLASS}, - {"width", html_component_type::RSPAMD_HTML_COMPONENT_WIDTH}, - {"height", html_component_type::RSPAMD_HTML_COMPONENT_HEIGHT}, - {"size", html_component_type::RSPAMD_HTML_COMPONENT_SIZE}, - {"rel", html_component_type::RSPAMD_HTML_COMPONENT_REL}, - {"alt", html_component_type::RSPAMD_HTML_COMPONENT_ALT}, - {"id", html_component_type::RSPAMD_HTML_COMPONENT_ID}, - {"hidden", html_component_type::RSPAMD_HTML_COMPONENT_HIDDEN}, + {"name", html_component_enum_type::RSPAMD_HTML_COMPONENT_NAME}, + {"href", html_component_enum_type::RSPAMD_HTML_COMPONENT_HREF}, + {"src", html_component_enum_type::RSPAMD_HTML_COMPONENT_SRC}, + {"action", html_component_enum_type::RSPAMD_HTML_COMPONENT_HREF}, + {"color", html_component_enum_type::RSPAMD_HTML_COMPONENT_COLOR}, + {"bgcolor", html_component_enum_type::RSPAMD_HTML_COMPONENT_BGCOLOR}, + {"style", html_component_enum_type::RSPAMD_HTML_COMPONENT_STYLE}, + {"class", html_component_enum_type::RSPAMD_HTML_COMPONENT_CLASS}, + {"width", html_component_enum_type::RSPAMD_HTML_COMPONENT_WIDTH}, + {"height", html_component_enum_type::RSPAMD_HTML_COMPONENT_HEIGHT}, + {"size", html_component_enum_type::RSPAMD_HTML_COMPONENT_SIZE}, + {"rel", html_component_enum_type::RSPAMD_HTML_COMPONENT_REL}, + {"alt", html_component_enum_type::RSPAMD_HTML_COMPONENT_ALT}, + {"id", html_component_enum_type::RSPAMD_HTML_COMPONENT_ID}, + {"hidden", html_component_enum_type::RSPAMD_HTML_COMPONENT_HIDDEN}, + // Typography + {"font-family", html_component_enum_type::RSPAMD_HTML_COMPONENT_FONT_FAMILY}, + {"font-size", html_component_enum_type::RSPAMD_HTML_COMPONENT_FONT_SIZE}, + {"font-weight", html_component_enum_type::RSPAMD_HTML_COMPONENT_FONT_WEIGHT}, + {"font-style", html_component_enum_type::RSPAMD_HTML_COMPONENT_FONT_STYLE}, + {"text-align", html_component_enum_type::RSPAMD_HTML_COMPONENT_TEXT_ALIGN}, + {"text-decoration", html_component_enum_type::RSPAMD_HTML_COMPONENT_TEXT_DECORATION}, + {"line-height", html_component_enum_type::RSPAMD_HTML_COMPONENT_LINE_HEIGHT}, + // Layout & positioning + {"margin", html_component_enum_type::RSPAMD_HTML_COMPONENT_MARGIN}, + {"margin-top", html_component_enum_type::RSPAMD_HTML_COMPONENT_MARGIN_TOP}, + {"margin-bottom", html_component_enum_type::RSPAMD_HTML_COMPONENT_MARGIN_BOTTOM}, + {"margin-left", html_component_enum_type::RSPAMD_HTML_COMPONENT_MARGIN_LEFT}, + {"margin-right", html_component_enum_type::RSPAMD_HTML_COMPONENT_MARGIN_RIGHT}, + {"padding", html_component_enum_type::RSPAMD_HTML_COMPONENT_PADDING}, + {"padding-top", html_component_enum_type::RSPAMD_HTML_COMPONENT_PADDING_TOP}, + {"padding-bottom", html_component_enum_type::RSPAMD_HTML_COMPONENT_PADDING_BOTTOM}, + {"padding-left", html_component_enum_type::RSPAMD_HTML_COMPONENT_PADDING_LEFT}, + {"padding-right", html_component_enum_type::RSPAMD_HTML_COMPONENT_PADDING_RIGHT}, + {"border", html_component_enum_type::RSPAMD_HTML_COMPONENT_BORDER}, + {"border-color", html_component_enum_type::RSPAMD_HTML_COMPONENT_BORDER_COLOR}, + {"border-width", html_component_enum_type::RSPAMD_HTML_COMPONENT_BORDER_WIDTH}, + {"border-style", html_component_enum_type::RSPAMD_HTML_COMPONENT_BORDER_STYLE}, + // Display & visibility + {"display", html_component_enum_type::RSPAMD_HTML_COMPONENT_DISPLAY}, + {"visibility", html_component_enum_type::RSPAMD_HTML_COMPONENT_VISIBILITY}, + {"opacity", html_component_enum_type::RSPAMD_HTML_COMPONENT_OPACITY}, + // Dimensions + {"min-width", html_component_enum_type::RSPAMD_HTML_COMPONENT_MIN_WIDTH}, + {"max-width", html_component_enum_type::RSPAMD_HTML_COMPONENT_MAX_WIDTH}, + {"min-height", html_component_enum_type::RSPAMD_HTML_COMPONENT_MIN_HEIGHT}, + {"max-height", html_component_enum_type::RSPAMD_HTML_COMPONENT_MAX_HEIGHT}, + // Table attributes + {"cellpadding", html_component_enum_type::RSPAMD_HTML_COMPONENT_CELLPADDING}, + {"cellspacing", html_component_enum_type::RSPAMD_HTML_COMPONENT_CELLSPACING}, + {"valign", html_component_enum_type::RSPAMD_HTML_COMPONENT_VALIGN}, + {"align", html_component_enum_type::RSPAMD_HTML_COMPONENT_ALIGN}, + // Form attributes + {"type", html_component_enum_type::RSPAMD_HTML_COMPONENT_TYPE}, + {"value", html_component_enum_type::RSPAMD_HTML_COMPONENT_VALUE}, + {"placeholder", html_component_enum_type::RSPAMD_HTML_COMPONENT_PLACEHOLDER}, + {"disabled", html_component_enum_type::RSPAMD_HTML_COMPONENT_DISABLED}, + {"readonly", html_component_enum_type::RSPAMD_HTML_COMPONENT_READONLY}, + {"checked", html_component_enum_type::RSPAMD_HTML_COMPONENT_CHECKED}, + {"selected", html_component_enum_type::RSPAMD_HTML_COMPONENT_SELECTED}, + // Link & media + {"target", html_component_enum_type::RSPAMD_HTML_COMPONENT_TARGET}, + {"title", html_component_enum_type::RSPAMD_HTML_COMPONENT_TITLE}, + // Meta & document + {"charset", html_component_enum_type::RSPAMD_HTML_COMPONENT_CHARSET}, + {"content", html_component_enum_type::RSPAMD_HTML_COMPONENT_CONTENT}, + {"http-equiv", html_component_enum_type::RSPAMD_HTML_COMPONENT_HTTP_EQUIV}, + // Accessibility + {"role", html_component_enum_type::RSPAMD_HTML_COMPONENT_ROLE}, + {"tabindex", html_component_enum_type::RSPAMD_HTML_COMPONENT_TABINDEX}, + // Background + {"background", html_component_enum_type::RSPAMD_HTML_COMPONENT_BACKGROUND}, + {"background-image", html_component_enum_type::RSPAMD_HTML_COMPONENT_BACKGROUND_IMAGE}, + {"background-color", html_component_enum_type::RSPAMD_HTML_COMPONENT_BACKGROUND_COLOR}, + {"background-repeat", html_component_enum_type::RSPAMD_HTML_COMPONENT_BACKGROUND_REPEAT}, + {"background-position", html_component_enum_type::RSPAMD_HTML_COMPONENT_BACKGROUND_POSITION}, + // Email-specific tracking + {"data-track", html_component_enum_type::RSPAMD_HTML_COMPONENT_DATA_TRACK}, + {"data-id", html_component_enum_type::RSPAMD_HTML_COMPONENT_DATA_ID}, + {"data-url", html_component_enum_type::RSPAMD_HTML_COMPONENT_DATA_URL}, }); #define msg_debug_html(...) rspamd_conditional_debug_fast(NULL, NULL, \ @@ -205,32 +271,153 @@ auto html_component_from_string(std::string_view name, std::string_view value) - if (known_component_it != html_components_map.end()) { switch (known_component_it->second) { - case html_component_type::RSPAMD_HTML_COMPONENT_NAME: + case html_component_enum_type::RSPAMD_HTML_COMPONENT_NAME: return html_component_name{value}; - case html_component_type::RSPAMD_HTML_COMPONENT_HREF: + case html_component_enum_type::RSPAMD_HTML_COMPONENT_HREF: return html_component_href{value}; - case html_component_type::RSPAMD_HTML_COMPONENT_COLOR: + case html_component_enum_type::RSPAMD_HTML_COMPONENT_COLOR: return html_component_color{value}; - case html_component_type::RSPAMD_HTML_COMPONENT_BGCOLOR: + case html_component_enum_type::RSPAMD_HTML_COMPONENT_BGCOLOR: return html_component_bgcolor{value}; - case html_component_type::RSPAMD_HTML_COMPONENT_STYLE: + case html_component_enum_type::RSPAMD_HTML_COMPONENT_STYLE: return html_component_style{value}; - case html_component_type::RSPAMD_HTML_COMPONENT_CLASS: + case html_component_enum_type::RSPAMD_HTML_COMPONENT_CLASS: return html_component_class{value}; - case html_component_type::RSPAMD_HTML_COMPONENT_WIDTH: + case html_component_enum_type::RSPAMD_HTML_COMPONENT_WIDTH: return html_component_width{value}; - case html_component_type::RSPAMD_HTML_COMPONENT_HEIGHT: + case html_component_enum_type::RSPAMD_HTML_COMPONENT_HEIGHT: return html_component_height{value}; - case html_component_type::RSPAMD_HTML_COMPONENT_SIZE: + case html_component_enum_type::RSPAMD_HTML_COMPONENT_SIZE: return html_component_size{value}; - case html_component_type::RSPAMD_HTML_COMPONENT_REL: + case html_component_enum_type::RSPAMD_HTML_COMPONENT_REL: return html_component_rel{value}; - case html_component_type::RSPAMD_HTML_COMPONENT_ALT: + case html_component_enum_type::RSPAMD_HTML_COMPONENT_ALT: return html_component_alt{value}; - case html_component_type::RSPAMD_HTML_COMPONENT_ID: + case html_component_enum_type::RSPAMD_HTML_COMPONENT_ID: return html_component_id{value}; - case html_component_type::RSPAMD_HTML_COMPONENT_HIDDEN: + case html_component_enum_type::RSPAMD_HTML_COMPONENT_HIDDEN: return html_component_hidden{}; + // Typography + case html_component_enum_type::RSPAMD_HTML_COMPONENT_FONT_FAMILY: + return html_component_font_family{value}; + case html_component_enum_type::RSPAMD_HTML_COMPONENT_FONT_SIZE: + return html_component_font_size{value}; + case html_component_enum_type::RSPAMD_HTML_COMPONENT_FONT_WEIGHT: + return html_component_font_weight{value}; + case html_component_enum_type::RSPAMD_HTML_COMPONENT_FONT_STYLE: + return html_component_font_style{value}; + case html_component_enum_type::RSPAMD_HTML_COMPONENT_TEXT_ALIGN: + return html_component_text_align{value}; + case html_component_enum_type::RSPAMD_HTML_COMPONENT_TEXT_DECORATION: + return html_component_text_decoration{value}; + case html_component_enum_type::RSPAMD_HTML_COMPONENT_LINE_HEIGHT: + return html_component_line_height{value}; + // Layout + case html_component_enum_type::RSPAMD_HTML_COMPONENT_MARGIN: + return html_component_margin{value}; + case html_component_enum_type::RSPAMD_HTML_COMPONENT_MARGIN_TOP: + return html_component_margin_top{value}; + case html_component_enum_type::RSPAMD_HTML_COMPONENT_MARGIN_BOTTOM: + return html_component_margin_bottom{value}; + case html_component_enum_type::RSPAMD_HTML_COMPONENT_MARGIN_LEFT: + return html_component_margin_left{value}; + case html_component_enum_type::RSPAMD_HTML_COMPONENT_MARGIN_RIGHT: + return html_component_margin_right{value}; + case html_component_enum_type::RSPAMD_HTML_COMPONENT_PADDING: + return html_component_padding{value}; + case html_component_enum_type::RSPAMD_HTML_COMPONENT_PADDING_TOP: + return html_component_padding_top{value}; + case html_component_enum_type::RSPAMD_HTML_COMPONENT_PADDING_BOTTOM: + return html_component_padding_bottom{value}; + case html_component_enum_type::RSPAMD_HTML_COMPONENT_PADDING_LEFT: + return html_component_padding_left{value}; + case html_component_enum_type::RSPAMD_HTML_COMPONENT_PADDING_RIGHT: + return html_component_padding_right{value}; + case html_component_enum_type::RSPAMD_HTML_COMPONENT_BORDER: + return html_component_border{value}; + case html_component_enum_type::RSPAMD_HTML_COMPONENT_BORDER_COLOR: + return html_component_border_color{value}; + case html_component_enum_type::RSPAMD_HTML_COMPONENT_BORDER_WIDTH: + return html_component_border_width{value}; + case html_component_enum_type::RSPAMD_HTML_COMPONENT_BORDER_STYLE: + return html_component_border_style{value}; + // Display + case html_component_enum_type::RSPAMD_HTML_COMPONENT_DISPLAY: + return html_component_display{value}; + case html_component_enum_type::RSPAMD_HTML_COMPONENT_VISIBILITY: + return html_component_visibility{value}; + case html_component_enum_type::RSPAMD_HTML_COMPONENT_OPACITY: + return html_component_opacity{value}; + // Dimensions + case html_component_enum_type::RSPAMD_HTML_COMPONENT_MIN_WIDTH: + return html_component_min_width{value}; + case html_component_enum_type::RSPAMD_HTML_COMPONENT_MAX_WIDTH: + return html_component_max_width{value}; + case html_component_enum_type::RSPAMD_HTML_COMPONENT_MIN_HEIGHT: + return html_component_min_height{value}; + case html_component_enum_type::RSPAMD_HTML_COMPONENT_MAX_HEIGHT: + return html_component_max_height{value}; + // Table + case html_component_enum_type::RSPAMD_HTML_COMPONENT_CELLPADDING: + return html_component_cellpadding{value}; + case html_component_enum_type::RSPAMD_HTML_COMPONENT_CELLSPACING: + return html_component_cellspacing{value}; + case html_component_enum_type::RSPAMD_HTML_COMPONENT_VALIGN: + return html_component_valign{value}; + case html_component_enum_type::RSPAMD_HTML_COMPONENT_ALIGN: + return html_component_align{value}; + // Form + case html_component_enum_type::RSPAMD_HTML_COMPONENT_TYPE: + return html_component_type{value}; + case html_component_enum_type::RSPAMD_HTML_COMPONENT_VALUE: + return html_component_value{value}; + case html_component_enum_type::RSPAMD_HTML_COMPONENT_PLACEHOLDER: + return html_component_placeholder{value}; + case html_component_enum_type::RSPAMD_HTML_COMPONENT_DISABLED: + return html_component_disabled{}; + case html_component_enum_type::RSPAMD_HTML_COMPONENT_READONLY: + return html_component_readonly{}; + case html_component_enum_type::RSPAMD_HTML_COMPONENT_CHECKED: + return html_component_checked{}; + case html_component_enum_type::RSPAMD_HTML_COMPONENT_SELECTED: + return html_component_selected{}; + // Link & media + case html_component_enum_type::RSPAMD_HTML_COMPONENT_TARGET: + return html_component_target{value}; + case html_component_enum_type::RSPAMD_HTML_COMPONENT_TITLE: + return html_component_title{value}; + case html_component_enum_type::RSPAMD_HTML_COMPONENT_SRC: + return html_component_src{value}; + // Meta + case html_component_enum_type::RSPAMD_HTML_COMPONENT_CHARSET: + return html_component_charset{value}; + case html_component_enum_type::RSPAMD_HTML_COMPONENT_CONTENT: + return html_component_content{value}; + case html_component_enum_type::RSPAMD_HTML_COMPONENT_HTTP_EQUIV: + return html_component_http_equiv{value}; + // Accessibility + case html_component_enum_type::RSPAMD_HTML_COMPONENT_ROLE: + return html_component_role{value}; + case html_component_enum_type::RSPAMD_HTML_COMPONENT_TABINDEX: + return html_component_tabindex{value}; + // Background + case html_component_enum_type::RSPAMD_HTML_COMPONENT_BACKGROUND: + return html_component_background{value}; + case html_component_enum_type::RSPAMD_HTML_COMPONENT_BACKGROUND_IMAGE: + return html_component_background_image{value}; + case html_component_enum_type::RSPAMD_HTML_COMPONENT_BACKGROUND_COLOR: + return html_component_background_color{value}; + case html_component_enum_type::RSPAMD_HTML_COMPONENT_BACKGROUND_REPEAT: + return html_component_background_repeat{value}; + case html_component_enum_type::RSPAMD_HTML_COMPONENT_BACKGROUND_POSITION: + return html_component_background_position{value}; + // Email tracking + case html_component_enum_type::RSPAMD_HTML_COMPONENT_DATA_TRACK: + return html_component_data_track{value}; + case html_component_enum_type::RSPAMD_HTML_COMPONENT_DATA_ID: + return html_component_data_id{value}; + case html_component_enum_type::RSPAMD_HTML_COMPONENT_DATA_URL: + return html_component_data_url{value}; default: return html_component_unknown{name, value}; } @@ -240,6 +427,424 @@ auto html_component_from_string(std::string_view name, std::string_view value) - } } +using component_extractor_func = std::function(const html_tag *)>; +static const auto component_extractors = frozen::make_unordered_map( + { + // Basic components + {"name", [](const html_tag *tag) -> std::optional { + if (auto comp = tag->find_component()) { + return comp.value()->value; + } + return std::nullopt; + }}, + {"href", [](const html_tag *tag) { return tag->find_href(); }}, + {"src", [](const html_tag *tag) -> std::optional { + if (auto comp = tag->find_component()) { + return comp.value()->value; + } + return std::nullopt; + }}, + {"class", [](const html_tag *tag) { return tag->find_class(); }}, + {"id", [](const html_tag *tag) { return tag->find_id(); }}, + {"style", [](const html_tag *tag) { return tag->find_style(); }}, + {"alt", [](const html_tag *tag) { return tag->find_alt(); }}, + {"rel", [](const html_tag *tag) { return tag->find_rel(); }}, + {"color", [](const html_tag *tag) -> std::optional { + if (auto comp = tag->find_component()) { + return comp.value()->value; + } + return std::nullopt; + }}, + {"bgcolor", [](const html_tag *tag) -> std::optional { + if (auto comp = tag->find_component()) { + return comp.value()->value; + } + return std::nullopt; + }}, + + // Numeric components (return string representation) + {"width", [](const html_tag *tag) -> std::optional { + if (auto comp = tag->find_component()) { + return comp.value()->get_string_value(); + } + return std::nullopt; + }}, + {"height", [](const html_tag *tag) -> std::optional { + if (auto comp = tag->find_component()) { + return comp.value()->get_string_value(); + } + return std::nullopt; + }}, + {"size", [](const html_tag *tag) -> std::optional { + if (auto comp = tag->find_component()) { + return comp.value()->get_string_value(); + } + return std::nullopt; + }}, + + // Boolean components + {"hidden", [](const html_tag *tag) -> std::optional { + return tag->is_hidden() ? std::optional{"true"} : std::nullopt; + }}, + + // Typography components + {"font-family", [](const html_tag *tag) -> std::optional { + if (auto comp = tag->find_component()) { + return comp.value()->value; + } + return std::nullopt; + }}, + {"font-size", [](const html_tag *tag) -> std::optional { + if (auto comp = tag->find_component()) { + return comp.value()->get_string_value(); + } + return std::nullopt; + }}, + {"font-weight", [](const html_tag *tag) -> std::optional { + if (auto comp = tag->find_component()) { + return comp.value()->value; + } + return std::nullopt; + }}, + {"font-style", [](const html_tag *tag) -> std::optional { + if (auto comp = tag->find_component()) { + return comp.value()->value; + } + return std::nullopt; + }}, + {"text-align", [](const html_tag *tag) -> std::optional { + if (auto comp = tag->find_component()) { + return comp.value()->value; + } + return std::nullopt; + }}, + {"text-decoration", [](const html_tag *tag) -> std::optional { + if (auto comp = tag->find_component()) { + return comp.value()->value; + } + return std::nullopt; + }}, + {"line-height", [](const html_tag *tag) -> std::optional { + if (auto comp = tag->find_component()) { + return comp.value()->get_string_value(); + } + return std::nullopt; + }}, + + // Layout components + {"margin", [](const html_tag *tag) -> std::optional { + if (auto comp = tag->find_component()) { + return comp.value()->value; + } + return std::nullopt; + }}, + {"margin-top", [](const html_tag *tag) -> std::optional { + if (auto comp = tag->find_component()) { + return comp.value()->value; + } + return std::nullopt; + }}, + {"margin-bottom", [](const html_tag *tag) -> std::optional { + if (auto comp = tag->find_component()) { + return comp.value()->value; + } + return std::nullopt; + }}, + {"margin-left", [](const html_tag *tag) -> std::optional { + if (auto comp = tag->find_component()) { + return comp.value()->value; + } + return std::nullopt; + }}, + {"margin-right", [](const html_tag *tag) -> std::optional { + if (auto comp = tag->find_component()) { + return comp.value()->value; + } + return std::nullopt; + }}, + {"padding", [](const html_tag *tag) -> std::optional { + if (auto comp = tag->find_component()) { + return comp.value()->value; + } + return std::nullopt; + }}, + {"padding-top", [](const html_tag *tag) -> std::optional { + if (auto comp = tag->find_component()) { + return comp.value()->value; + } + return std::nullopt; + }}, + {"padding-bottom", [](const html_tag *tag) -> std::optional { + if (auto comp = tag->find_component()) { + return comp.value()->value; + } + return std::nullopt; + }}, + {"padding-left", [](const html_tag *tag) -> std::optional { + if (auto comp = tag->find_component()) { + return comp.value()->value; + } + return std::nullopt; + }}, + {"padding-right", [](const html_tag *tag) -> std::optional { + if (auto comp = tag->find_component()) { + return comp.value()->value; + } + return std::nullopt; + }}, + {"border", [](const html_tag *tag) -> std::optional { + if (auto comp = tag->find_component()) { + return comp.value()->value; + } + return std::nullopt; + }}, + {"border-color", [](const html_tag *tag) -> std::optional { + if (auto comp = tag->find_component()) { + return comp.value()->value; + } + return std::nullopt; + }}, + {"border-width", [](const html_tag *tag) -> std::optional { + if (auto comp = tag->find_component()) { + return comp.value()->get_string_value(); + } + return std::nullopt; + }}, + {"border-style", [](const html_tag *tag) -> std::optional { + if (auto comp = tag->find_component()) { + return comp.value()->value; + } + return std::nullopt; + }}, + + // Display components + {"display", [](const html_tag *tag) -> std::optional { + if (auto comp = tag->find_component()) { + return comp.value()->value; + } + return std::nullopt; + }}, + {"visibility", [](const html_tag *tag) -> std::optional { + if (auto comp = tag->find_component()) { + return comp.value()->value; + } + return std::nullopt; + }}, + {"opacity", [](const html_tag *tag) -> std::optional { + if (auto comp = tag->find_component()) { + return comp.value()->get_string_value(); + } + return std::nullopt; + }}, + + // Additional dimensions + {"min-width", [](const html_tag *tag) -> std::optional { + if (auto comp = tag->find_component()) { + return comp.value()->get_string_value(); + } + return std::nullopt; + }}, + {"max-width", [](const html_tag *tag) -> std::optional { + if (auto comp = tag->find_component()) { + return comp.value()->get_string_value(); + } + return std::nullopt; + }}, + {"min-height", [](const html_tag *tag) -> std::optional { + if (auto comp = tag->find_component()) { + return comp.value()->get_string_value(); + } + return std::nullopt; + }}, + {"max-height", [](const html_tag *tag) -> std::optional { + if (auto comp = tag->find_component()) { + return comp.value()->get_string_value(); + } + return std::nullopt; + }}, + + // Table components + {"cellpadding", [](const html_tag *tag) -> std::optional { + if (auto comp = tag->find_component()) { + return comp.value()->get_string_value(); + } + return std::nullopt; + }}, + {"cellspacing", [](const html_tag *tag) -> std::optional { + if (auto comp = tag->find_component()) { + return comp.value()->get_string_value(); + } + return std::nullopt; + }}, + {"valign", [](const html_tag *tag) -> std::optional { + if (auto comp = tag->find_component()) { + return comp.value()->value; + } + return std::nullopt; + }}, + {"align", [](const html_tag *tag) -> std::optional { + if (auto comp = tag->find_component()) { + return comp.value()->value; + } + return std::nullopt; + }}, + + // Form components + {"type", [](const html_tag *tag) -> std::optional { + if (auto comp = tag->find_component()) { + return comp.value()->value; + } + return std::nullopt; + }}, + {"value", [](const html_tag *tag) -> std::optional { + if (auto comp = tag->find_component()) { + return comp.value()->value; + } + return std::nullopt; + }}, + {"placeholder", [](const html_tag *tag) -> std::optional { + if (auto comp = tag->find_component()) { + return comp.value()->value; + } + return std::nullopt; + }}, + {"disabled", [](const html_tag *tag) -> std::optional { + if (auto comp = tag->find_component()) { + return comp.value()->is_present() ? std::optional{"true"} : std::nullopt; + } + return std::nullopt; + }}, + {"readonly", [](const html_tag *tag) -> std::optional { + if (auto comp = tag->find_component()) { + return comp.value()->is_present() ? std::optional{"true"} : std::nullopt; + } + return std::nullopt; + }}, + {"checked", [](const html_tag *tag) -> std::optional { + if (auto comp = tag->find_component()) { + return comp.value()->is_present() ? std::optional{"true"} : std::nullopt; + } + return std::nullopt; + }}, + {"selected", [](const html_tag *tag) -> std::optional { + if (auto comp = tag->find_component()) { + return comp.value()->is_present() ? std::optional{"true"} : std::nullopt; + } + return std::nullopt; + }}, + + // Link & media components + {"target", [](const html_tag *tag) -> std::optional { + if (auto comp = tag->find_component()) { + return comp.value()->value; + } + return std::nullopt; + }}, + {"title", [](const html_tag *tag) -> std::optional { + if (auto comp = tag->find_component()) { + return comp.value()->value; + } + return std::nullopt; + }}, + + // Meta components + {"charset", [](const html_tag *tag) -> std::optional { + if (auto comp = tag->find_component()) { + return comp.value()->value; + } + return std::nullopt; + }}, + {"content", [](const html_tag *tag) -> std::optional { + if (auto comp = tag->find_component()) { + return comp.value()->value; + } + return std::nullopt; + }}, + {"http-equiv", [](const html_tag *tag) -> std::optional { + if (auto comp = tag->find_component()) { + return comp.value()->value; + } + return std::nullopt; + }}, + + // Accessibility components + {"role", [](const html_tag *tag) -> std::optional { + if (auto comp = tag->find_component()) { + return comp.value()->value; + } + return std::nullopt; + }}, + {"tabindex", [](const html_tag *tag) -> std::optional { + if (auto comp = tag->find_component()) { + return comp.value()->get_string_value(); + } + return std::nullopt; + }}, + + // Background components + {"background", [](const html_tag *tag) -> std::optional { + if (auto comp = tag->find_component()) { + return comp.value()->value; + } + return std::nullopt; + }}, + {"background-image", [](const html_tag *tag) -> std::optional { + if (auto comp = tag->find_component()) { + return comp.value()->value; + } + return std::nullopt; + }}, + {"background-color", [](const html_tag *tag) -> std::optional { + if (auto comp = tag->find_component()) { + return comp.value()->value; + } + return std::nullopt; + }}, + {"background-repeat", [](const html_tag *tag) -> std::optional { + if (auto comp = tag->find_component()) { + return comp.value()->value; + } + return std::nullopt; + }}, + {"background-position", [](const html_tag *tag) -> std::optional { + if (auto comp = tag->find_component()) { + return comp.value()->value; + } + return std::nullopt; + }}, + + // Email tracking components + {"data-track", [](const html_tag *tag) -> std::optional { + if (auto comp = tag->find_component()) { + return comp.value()->value; + } + return std::nullopt; + }}, + {"data-id", [](const html_tag *tag) -> std::optional { + if (auto comp = tag->find_component()) { + return comp.value()->value; + } + return std::nullopt; + }}, + {"data-url", [](const html_tag *tag) -> std::optional { + if (auto comp = tag->find_component()) { + return comp.value()->value; + } + return std::nullopt; + }}, + }); + +auto html_tag::find_component_by_name(std::string_view attr_name) const -> std::optional +{ + auto it = component_extractors.find(attr_name); + if (it != component_extractors.end()) { + return it->second(this); + } + + // Fallback to unknown components + return find_unknown_component(attr_name); +} + enum tag_parser_state { parse_start = 0, parse_name, @@ -839,57 +1444,66 @@ html_process_img_tag(rspamd_mempool_t *pool, img = rspamd_mempool_alloc0_type(pool, struct html_image); img->tag = tag; - // Process HREF component - if (auto href_value = tag->find_href()) { - if (href_value->size() > 0) { - rspamd_ftok_t fstr; - fstr.begin = href_value->data(); - fstr.len = href_value->size(); - img->src = rspamd_mempool_ftokdup(pool, &fstr); + // Process SRC component (preferred for img tags) or HREF component (fallback) + std::optional href_value; + + // Try SRC first (standard for img tags) + if (auto src_comp = tag->find_component()) { + href_value = src_comp.value()->value; + } + // Fallback to HREF (for backward compatibility or non-standard usage) + else if (auto href_comp = tag->find_href()) { + href_value = href_comp; + } + + if (href_value && href_value->size() > 0) { + rspamd_ftok_t fstr; + fstr.begin = href_value->data(); + fstr.len = href_value->size(); + img->src = rspamd_mempool_ftokdup(pool, &fstr); - if (href_value->size() > sizeof("cid:") - 1 && memcmp(href_value->data(), - "cid:", sizeof("cid:") - 1) == 0) { - /* We have an embedded image */ - img->src += sizeof("cid:") - 1; - img->flags |= RSPAMD_HTML_FLAG_IMAGE_EMBEDDED; + if (href_value->size() > sizeof("cid:") - 1 && memcmp(href_value->data(), + "cid:", sizeof("cid:") - 1) == 0) { + /* We have an embedded image */ + img->src += sizeof("cid:") - 1; + img->flags |= RSPAMD_HTML_FLAG_IMAGE_EMBEDDED; + } + else { + if (href_value->size() > sizeof("data:") - 1 && memcmp(href_value->data(), + "data:", sizeof("data:") - 1) == 0) { + /* We have an embedded image in HTML tag */ + img->flags |= + (RSPAMD_HTML_FLAG_IMAGE_EMBEDDED | RSPAMD_HTML_FLAG_IMAGE_DATA); + html_process_data_image(pool, img, *href_value); + hc->flags |= RSPAMD_HTML_FLAG_HAS_DATA_URLS; } else { - if (href_value->size() > sizeof("data:") - 1 && memcmp(href_value->data(), - "data:", sizeof("data:") - 1) == 0) { - /* We have an embedded image in HTML tag */ - img->flags |= - (RSPAMD_HTML_FLAG_IMAGE_EMBEDDED | RSPAMD_HTML_FLAG_IMAGE_DATA); - html_process_data_image(pool, img, *href_value); - hc->flags |= RSPAMD_HTML_FLAG_HAS_DATA_URLS; - } - else { - img->flags |= RSPAMD_HTML_FLAG_IMAGE_EXTERNAL; - if (img->src) { - - std::string_view cpy{*href_value}; - auto maybe_url = html_process_url(pool, cpy); - - if (maybe_url) { - img->url = maybe_url.value(); - struct rspamd_url *existing; - - img->url->flags |= RSPAMD_URL_FLAG_IMAGE; - existing = rspamd_url_set_add_or_return(url_set, - img->url); - - if (existing && existing != img->url) { - /* - * We have some other URL that could be - * found, e.g. from another part. However, - * we still want to set an image flag on it - */ - existing->flags |= img->url->flags; - existing->count++; - } - else if (part_urls) { - /* New url */ - g_ptr_array_add(part_urls, img->url); - } + img->flags |= RSPAMD_HTML_FLAG_IMAGE_EXTERNAL; + if (img->src) { + + std::string_view cpy{*href_value}; + auto maybe_url = html_process_url(pool, cpy); + + if (maybe_url) { + img->url = maybe_url.value(); + struct rspamd_url *existing; + + img->url->flags |= RSPAMD_URL_FLAG_IMAGE; + existing = rspamd_url_set_add_or_return(url_set, + img->url); + + if (existing && existing != img->url) { + /* + * We have some other URL that could be + * found, e.g. from another part. However, + * we still want to set an image flag on it + */ + existing->flags |= img->url->flags; + existing->count++; + } + else if (part_urls) { + /* New url */ + g_ptr_array_add(part_urls, img->url); } } } diff --git a/src/libserver/html/html_tag.hxx b/src/libserver/html/html_tag.hxx index a6b366a913..5948b91bf0 100644 --- a/src/libserver/html/html_tag.hxx +++ b/src/libserver/html/html_tag.hxx @@ -36,7 +36,7 @@ namespace rspamd::html { struct html_content; /* Forward declaration */ // Internal enum for mapping (not exposed in public API) -enum class html_component_type : std::uint8_t { +enum class html_component_enum_type : std::uint8_t { RSPAMD_HTML_COMPONENT_NAME = 0, RSPAMD_HTML_COMPONENT_HREF, RSPAMD_HTML_COMPONENT_COLOR, @@ -50,6 +50,72 @@ enum class html_component_type : std::uint8_t { RSPAMD_HTML_COMPONENT_ALT, RSPAMD_HTML_COMPONENT_ID, RSPAMD_HTML_COMPONENT_HIDDEN, + // Typography + RSPAMD_HTML_COMPONENT_FONT_FAMILY, + RSPAMD_HTML_COMPONENT_FONT_SIZE, + RSPAMD_HTML_COMPONENT_FONT_WEIGHT, + RSPAMD_HTML_COMPONENT_FONT_STYLE, + RSPAMD_HTML_COMPONENT_TEXT_ALIGN, + RSPAMD_HTML_COMPONENT_TEXT_DECORATION, + RSPAMD_HTML_COMPONENT_LINE_HEIGHT, + // Layout & positioning + RSPAMD_HTML_COMPONENT_MARGIN, + RSPAMD_HTML_COMPONENT_MARGIN_TOP, + RSPAMD_HTML_COMPONENT_MARGIN_BOTTOM, + RSPAMD_HTML_COMPONENT_MARGIN_LEFT, + RSPAMD_HTML_COMPONENT_MARGIN_RIGHT, + RSPAMD_HTML_COMPONENT_PADDING, + RSPAMD_HTML_COMPONENT_PADDING_TOP, + RSPAMD_HTML_COMPONENT_PADDING_BOTTOM, + RSPAMD_HTML_COMPONENT_PADDING_LEFT, + RSPAMD_HTML_COMPONENT_PADDING_RIGHT, + RSPAMD_HTML_COMPONENT_BORDER, + RSPAMD_HTML_COMPONENT_BORDER_COLOR, + RSPAMD_HTML_COMPONENT_BORDER_WIDTH, + RSPAMD_HTML_COMPONENT_BORDER_STYLE, + // Display & visibility + RSPAMD_HTML_COMPONENT_DISPLAY, + RSPAMD_HTML_COMPONENT_VISIBILITY, + RSPAMD_HTML_COMPONENT_OPACITY, + // Dimensions + RSPAMD_HTML_COMPONENT_MIN_WIDTH, + RSPAMD_HTML_COMPONENT_MAX_WIDTH, + RSPAMD_HTML_COMPONENT_MIN_HEIGHT, + RSPAMD_HTML_COMPONENT_MAX_HEIGHT, + // Table attributes + RSPAMD_HTML_COMPONENT_CELLPADDING, + RSPAMD_HTML_COMPONENT_CELLSPACING, + RSPAMD_HTML_COMPONENT_VALIGN, + RSPAMD_HTML_COMPONENT_ALIGN, + // Form attributes + RSPAMD_HTML_COMPONENT_TYPE, + RSPAMD_HTML_COMPONENT_VALUE, + RSPAMD_HTML_COMPONENT_PLACEHOLDER, + RSPAMD_HTML_COMPONENT_DISABLED, + RSPAMD_HTML_COMPONENT_READONLY, + RSPAMD_HTML_COMPONENT_CHECKED, + RSPAMD_HTML_COMPONENT_SELECTED, + // Link & media + RSPAMD_HTML_COMPONENT_TARGET, + RSPAMD_HTML_COMPONENT_TITLE, + RSPAMD_HTML_COMPONENT_SRC, + // Meta & document + RSPAMD_HTML_COMPONENT_CHARSET, + RSPAMD_HTML_COMPONENT_CONTENT, + RSPAMD_HTML_COMPONENT_HTTP_EQUIV, + // Accessibility + RSPAMD_HTML_COMPONENT_ROLE, + RSPAMD_HTML_COMPONENT_TABINDEX, + // Background + RSPAMD_HTML_COMPONENT_BACKGROUND, + RSPAMD_HTML_COMPONENT_BACKGROUND_IMAGE, + RSPAMD_HTML_COMPONENT_BACKGROUND_COLOR, + RSPAMD_HTML_COMPONENT_BACKGROUND_REPEAT, + RSPAMD_HTML_COMPONENT_BACKGROUND_POSITION, + // Email-specific tracking + RSPAMD_HTML_COMPONENT_DATA_TRACK, + RSPAMD_HTML_COMPONENT_DATA_ID, + RSPAMD_HTML_COMPONENT_DATA_URL, }; // Forward declarations for component types @@ -71,25 +137,704 @@ struct html_component_unknown; // Base interface for all components struct html_component_base { virtual ~html_component_base() = default; - virtual std::string_view get_string_value() const = 0; + virtual constexpr std::string_view get_string_value() const = 0; }; // String-based components struct html_component_name : html_component_base { std::string_view value; - explicit html_component_name(std::string_view v) + explicit constexpr html_component_name(std::string_view v) : value(v) { } + constexpr std::string_view get_string_value() const override + { + return value; + } +}; + +struct html_component_href : html_component_base { + std::string_view value; + explicit constexpr html_component_href(std::string_view v) + : value(v) + { + } + constexpr std::string_view get_string_value() const override + { + return value; + } +}; + +struct html_component_style : html_component_base { + std::string_view value; + explicit constexpr html_component_style(std::string_view v) + : value(v) + { + } + constexpr std::string_view get_string_value() const override + { + return value; + } +}; + +struct html_component_class : html_component_base { + std::string_view value; + explicit constexpr html_component_class(std::string_view v) + : value(v) + { + } + constexpr std::string_view get_string_value() const override + { + return value; + } +}; + +struct html_component_rel : html_component_base { + std::string_view value; + explicit constexpr html_component_rel(std::string_view v) + : value(v) + { + } + constexpr std::string_view get_string_value() const override + { + return value; + } +}; + +struct html_component_alt : html_component_base { + std::string_view value; + explicit constexpr html_component_alt(std::string_view v) + : value(v) + { + } + constexpr std::string_view get_string_value() const override + { + return value; + } +}; + +struct html_component_id : html_component_base { + std::string_view value; + explicit constexpr html_component_id(std::string_view v) + : value(v) + { + } + constexpr std::string_view get_string_value() const override + { + return value; + } +}; + +// Color components (could be extended to parse actual colors) +struct html_component_color : html_component_base { + std::string_view value; + explicit constexpr html_component_color(std::string_view v) + : value(v) + { + } + constexpr std::string_view get_string_value() const override + { + return value; + } +}; + +struct html_component_bgcolor : html_component_base { + std::string_view value; + explicit constexpr html_component_bgcolor(std::string_view v) + : value(v) + { + } + constexpr std::string_view get_string_value() const override + { + return value; + } +}; + +// Numeric components +struct html_component_width : html_component_base { + std::string_view raw_value; + std::optional numeric_value; + + explicit html_component_width(const std::string_view v) + : raw_value(v) + { + unsigned long val; + if (rspamd_strtoul(v.data(), v.size(), &val)) { + numeric_value = static_cast(val); + } + } + + constexpr std::string_view get_string_value() const override + { + return raw_value; + } + constexpr std::optional get_numeric_value() const + { + return numeric_value; + } +}; + +struct html_component_height : html_component_base { + std::string_view raw_value; + std::optional numeric_value; + + explicit html_component_height(const std::string_view v) + : raw_value(v) + { + unsigned long val; + if (rspamd_strtoul(v.data(), v.size(), &val)) { + numeric_value = static_cast(val); + } + } + + constexpr std::string_view get_string_value() const override + { + return raw_value; + } + constexpr std::optional get_numeric_value() const + { + return numeric_value; + } +}; + +struct html_component_size : html_component_base { + std::string_view raw_value; + std::optional numeric_value; + + explicit html_component_size(std::string_view v) + : raw_value(v) + { + unsigned long val; + if (rspamd_strtoul(v.data(), v.size(), &val)) { + numeric_value = static_cast(val); + } + } + + constexpr std::string_view get_string_value() const override + { + return raw_value; + } + constexpr std::optional get_numeric_value() const + { + return numeric_value; + } +}; + +// Boolean/flag component +struct html_component_hidden : html_component_base { + bool present; + explicit constexpr html_component_hidden() + : present(true) + { + } + constexpr std::string_view get_string_value() const override + { + return present ? "true" : "false"; + } + constexpr bool is_present() const + { + return present; + } +}; + +// Unknown component with both name and value +struct html_component_unknown : html_component_base { + std::string_view name; + std::string_view value; + + constexpr html_component_unknown(std::string_view n, std::string_view v) + : name(n), value(v) + { + } + constexpr std::string_view get_string_value() const override + { + return value; + } + constexpr std::string_view get_name() const + { + return name; + } +}; + +// Typography components +struct html_component_font_family : html_component_base { + std::string_view value; + explicit constexpr html_component_font_family(std::string_view v) + : value(v) + { + } + constexpr std::string_view get_string_value() const override + { + return value; + } +}; + +struct html_component_font_size : html_component_base { + std::string_view raw_value; + std::optional numeric_value; + + explicit html_component_font_size(std::string_view v) + : raw_value(v) + { + unsigned long val; + if (rspamd_strtoul(v.data(), v.size(), &val)) { + numeric_value = static_cast(val); + } + } + + constexpr std::string_view get_string_value() const override + { + return raw_value; + } + constexpr std::optional get_numeric_value() const + { + return numeric_value; + } +}; + +struct html_component_font_weight : html_component_base { + std::string_view value; + explicit constexpr html_component_font_weight(std::string_view v) + : value(v) + { + } + constexpr std::string_view get_string_value() const override + { + return value; + } +}; + +struct html_component_font_style : html_component_base { + std::string_view value; + explicit constexpr html_component_font_style(std::string_view v) + : value(v) + { + } + constexpr std::string_view get_string_value() const override + { + return value; + } +}; + +struct html_component_text_align : html_component_base { + std::string_view value; + explicit constexpr html_component_text_align(std::string_view v) + : value(v) + { + } + constexpr std::string_view get_string_value() const override + { + return value; + } +}; + +struct html_component_text_decoration : html_component_base { + std::string_view value; + explicit constexpr html_component_text_decoration(std::string_view v) + : value(v) + { + } + constexpr std::string_view get_string_value() const override + { + return value; + } +}; + +struct html_component_line_height : html_component_base { + std::string_view raw_value; + std::optional numeric_value; + + explicit html_component_line_height(std::string_view v) + : raw_value(v) + { + unsigned long val; + if (rspamd_strtoul(v.data(), v.size(), &val)) { + numeric_value = static_cast(val); + } + } + + std::string_view get_string_value() const override + { + return raw_value; + } + std::optional get_numeric_value() const + { + return numeric_value; + } +}; + +// Layout components (most are string-based for flexibility) +struct html_component_margin : html_component_base { + std::string_view value; + explicit constexpr html_component_margin(std::string_view v) + : value(v) + { + } + constexpr std::string_view get_string_value() const override + { + return value; + } +}; + +struct html_component_margin_top : html_component_base { + std::string_view value; + explicit constexpr html_component_margin_top(std::string_view v) + : value(v) + { + } + constexpr std::string_view get_string_value() const override + { + return value; + } +}; + +struct html_component_margin_bottom : html_component_base { + std::string_view value; + explicit constexpr html_component_margin_bottom(std::string_view v) + : value(v) + { + } + constexpr std::string_view get_string_value() const override + { + return value; + } +}; + +struct html_component_margin_left : html_component_base { + std::string_view value; + explicit constexpr html_component_margin_left(std::string_view v) + : value(v) + { + } + constexpr std::string_view get_string_value() const override + { + return value; + } +}; + +struct html_component_margin_right : html_component_base { + std::string_view value; + explicit constexpr html_component_margin_right(std::string_view v) + : value(v) + { + } + constexpr std::string_view get_string_value() const override + { + return value; + } +}; + +struct html_component_padding : html_component_base { + std::string_view value; + explicit constexpr html_component_padding(std::string_view v) + : value(v) + { + } + constexpr std::string_view get_string_value() const override + { + return value; + } +}; + +struct html_component_padding_top : html_component_base { + std::string_view value; + explicit constexpr html_component_padding_top(std::string_view v) + : value(v) + { + } + constexpr std::string_view get_string_value() const override + { + return value; + } +}; + +struct html_component_padding_bottom : html_component_base { + std::string_view value; + explicit constexpr html_component_padding_bottom(std::string_view v) + : value(v) + { + } + constexpr std::string_view get_string_value() const override + { + return value; + } +}; + +struct html_component_padding_left : html_component_base { + std::string_view value; + explicit constexpr html_component_padding_left(std::string_view v) + : value(v) + { + } + constexpr std::string_view get_string_value() const override + { + return value; + } +}; + +struct html_component_padding_right : html_component_base { + std::string_view value; + explicit constexpr html_component_padding_right(std::string_view v) + : value(v) + { + } + constexpr std::string_view get_string_value() const override + { + return value; + } +}; + +struct html_component_border : html_component_base { + std::string_view value; + explicit html_component_border(std::string_view v) + : value(v) + { + } + std::string_view get_string_value() const override + { + return value; + } +}; + +struct html_component_border_color : html_component_base { + std::string_view value; + explicit html_component_border_color(std::string_view v) + : value(v) + { + } + std::string_view get_string_value() const override + { + return value; + } +}; + +struct html_component_border_width : html_component_base { + std::string_view raw_value; + std::optional numeric_value; + + explicit html_component_border_width(std::string_view v) + : raw_value(v) + { + unsigned long val; + if (rspamd_strtoul(v.data(), v.size(), &val)) { + numeric_value = static_cast(val); + } + } + + std::string_view get_string_value() const override + { + return raw_value; + } + std::optional get_numeric_value() const + { + return numeric_value; + } +}; + +struct html_component_border_style : html_component_base { + std::string_view value; + explicit html_component_border_style(std::string_view v) + : value(v) + { + } + std::string_view get_string_value() const override + { + return value; + } +}; + +// Display components +struct html_component_display : html_component_base { + std::string_view value; + explicit html_component_display(std::string_view v) + : value(v) + { + } + std::string_view get_string_value() const override + { + return value; + } +}; + +struct html_component_visibility : html_component_base { + std::string_view value; + explicit html_component_visibility(std::string_view v) + : value(v) + { + } + std::string_view get_string_value() const override + { + return value; + } +}; + +struct html_component_opacity : html_component_base { + std::string_view raw_value; + std::optional numeric_value; + + explicit html_component_opacity(std::string_view v) + : raw_value(v) + { + char *endptr; + auto val = std::strtof(v.data(), &endptr); + if (endptr != v.data() && val >= 0.0f && val <= 1.0f) { + numeric_value = val; + } + } + + std::string_view get_string_value() const override + { + return raw_value; + } + std::optional get_numeric_value() const + { + return numeric_value; + } +}; + +// Additional dimension components +struct html_component_min_width : html_component_base { + std::string_view raw_value; + std::optional numeric_value; + + explicit html_component_min_width(std::string_view v) + : raw_value(v) + { + unsigned long val; + if (rspamd_strtoul(v.data(), v.size(), &val)) { + numeric_value = static_cast(val); + } + } + + std::string_view get_string_value() const override + { + return raw_value; + } + std::optional get_numeric_value() const + { + return numeric_value; + } +}; + +struct html_component_max_width : html_component_base { + std::string_view raw_value; + std::optional numeric_value; + + explicit html_component_max_width(std::string_view v) + : raw_value(v) + { + unsigned long val; + if (rspamd_strtoul(v.data(), v.size(), &val)) { + numeric_value = static_cast(val); + } + } + + std::string_view get_string_value() const override + { + return raw_value; + } + std::optional get_numeric_value() const + { + return numeric_value; + } +}; + +struct html_component_min_height : html_component_base { + std::string_view raw_value; + std::optional numeric_value; + + explicit html_component_min_height(std::string_view v) + : raw_value(v) + { + unsigned long val; + if (rspamd_strtoul(v.data(), v.size(), &val)) { + numeric_value = static_cast(val); + } + } + + std::string_view get_string_value() const override + { + return raw_value; + } + std::optional get_numeric_value() const + { + return numeric_value; + } +}; + +struct html_component_max_height : html_component_base { + std::string_view raw_value; + std::optional numeric_value; + + explicit html_component_max_height(std::string_view v) + : raw_value(v) + { + unsigned long val; + if (rspamd_strtoul(v.data(), v.size(), &val)) { + numeric_value = static_cast(val); + } + } + std::string_view get_string_value() const override { - return value; + return raw_value; + } + std::optional get_numeric_value() const + { + return numeric_value; } }; -struct html_component_href : html_component_base { +// Table components +struct html_component_cellpadding : html_component_base { + std::string_view raw_value; + std::optional numeric_value; + + explicit html_component_cellpadding(std::string_view v) + : raw_value(v) + { + unsigned long val; + if (rspamd_strtoul(v.data(), v.size(), &val)) { + numeric_value = static_cast(val); + } + } + + std::string_view get_string_value() const override + { + return raw_value; + } + std::optional get_numeric_value() const + { + return numeric_value; + } +}; + +struct html_component_cellspacing : html_component_base { + std::string_view raw_value; + std::optional numeric_value; + + explicit html_component_cellspacing(std::string_view v) + : raw_value(v) + { + unsigned long val; + if (rspamd_strtoul(v.data(), v.size(), &val)) { + numeric_value = static_cast(val); + } + } + + std::string_view get_string_value() const override + { + return raw_value; + } + std::optional get_numeric_value() const + { + return numeric_value; + } +}; + +struct html_component_valign : html_component_base { std::string_view value; - explicit html_component_href(std::string_view v) + explicit html_component_valign(std::string_view v) : value(v) { } @@ -99,9 +844,9 @@ struct html_component_href : html_component_base { } }; -struct html_component_style : html_component_base { +struct html_component_align : html_component_base { std::string_view value; - explicit html_component_style(std::string_view v) + explicit html_component_align(std::string_view v) : value(v) { } @@ -111,9 +856,10 @@ struct html_component_style : html_component_base { } }; -struct html_component_class : html_component_base { +// Form components +struct html_component_type : html_component_base { std::string_view value; - explicit html_component_class(std::string_view v) + explicit html_component_type(std::string_view v) : value(v) { } @@ -123,9 +869,9 @@ struct html_component_class : html_component_base { } }; -struct html_component_rel : html_component_base { +struct html_component_value : html_component_base { std::string_view value; - explicit html_component_rel(std::string_view v) + explicit html_component_value(std::string_view v) : value(v) { } @@ -135,9 +881,9 @@ struct html_component_rel : html_component_base { } }; -struct html_component_alt : html_component_base { +struct html_component_placeholder : html_component_base { std::string_view value; - explicit html_component_alt(std::string_view v) + explicit html_component_placeholder(std::string_view v) : value(v) { } @@ -147,9 +893,75 @@ struct html_component_alt : html_component_base { } }; -struct html_component_id : html_component_base { +// Boolean form components +struct html_component_disabled : html_component_base { + bool present; + explicit constexpr html_component_disabled() + : present(true) + { + } + constexpr std::string_view get_string_value() const override + { + return present ? "true" : "false"; + } + constexpr bool is_present() const + { + return present; + } +}; + +struct html_component_readonly : html_component_base { + bool present; + explicit constexpr html_component_readonly() + : present(true) + { + } + constexpr std::string_view get_string_value() const override + { + return present ? "true" : "false"; + } + constexpr bool is_present() const + { + return present; + } +}; + +struct html_component_checked : html_component_base { + bool present; + explicit constexpr html_component_checked() + : present(true) + { + } + constexpr std::string_view get_string_value() const override + { + return present ? "true" : "false"; + } + constexpr bool is_present() const + { + return present; + } +}; + +struct html_component_selected : html_component_base { + bool present; + explicit constexpr html_component_selected() + : present(true) + { + } + constexpr std::string_view get_string_value() const override + { + return present ? "true" : "false"; + } + constexpr bool is_present() const + { + return present; + } +}; + +// Link & media components +struct html_component_target : html_component_base { std::string_view value; - explicit html_component_id(std::string_view v) + explicit html_component_target(std::string_view v) : value(v) { } @@ -159,10 +971,9 @@ struct html_component_id : html_component_base { } }; -// Color components (could be extended to parse actual colors) -struct html_component_color : html_component_base { +struct html_component_title : html_component_base { std::string_view value; - explicit html_component_color(std::string_view v) + explicit html_component_title(std::string_view v) : value(v) { } @@ -172,9 +983,9 @@ struct html_component_color : html_component_base { } }; -struct html_component_bgcolor : html_component_base { +struct html_component_src : html_component_base { std::string_view value; - explicit html_component_bgcolor(std::string_view v) + explicit html_component_src(std::string_view v) : value(v) { } @@ -184,40 +995,66 @@ struct html_component_bgcolor : html_component_base { } }; -// Numeric components -struct html_component_width : html_component_base { - std::string_view raw_value; - std::optional numeric_value; +// Meta components +struct html_component_charset : html_component_base { + std::string_view value; + explicit html_component_charset(std::string_view v) + : value(v) + { + } + std::string_view get_string_value() const override + { + return value; + } +}; - explicit html_component_width(const std::string_view v) - : raw_value(v) +struct html_component_content : html_component_base { + std::string_view value; + explicit html_component_content(std::string_view v) + : value(v) { - unsigned long val; - if (rspamd_strtoul(v.data(), v.size(), &val)) { - numeric_value = static_cast(val); - } } + std::string_view get_string_value() const override + { + return value; + } +}; +struct html_component_http_equiv : html_component_base { + std::string_view value; + explicit html_component_http_equiv(std::string_view v) + : value(v) + { + } std::string_view get_string_value() const override { - return raw_value; + return value; } - std::optional get_numeric_value() const +}; + +// Accessibility components +struct html_component_role : html_component_base { + std::string_view value; + explicit html_component_role(std::string_view v) + : value(v) { - return numeric_value; + } + std::string_view get_string_value() const override + { + return value; } }; -struct html_component_height : html_component_base { +struct html_component_tabindex : html_component_base { std::string_view raw_value; - std::optional numeric_value; + std::optional numeric_value; - explicit html_component_height(const std::string_view v) + explicit html_component_tabindex(std::string_view v) : raw_value(v) { - unsigned long val; - if (rspamd_strtoul(v.data(), v.size(), &val)) { - numeric_value = static_cast(val); + long val; + if (rspamd_strtol(v.data(), v.size(), &val)) { + numeric_value = static_cast(val); } } @@ -225,68 +1062,107 @@ struct html_component_height : html_component_base { { return raw_value; } - std::optional get_numeric_value() const + std::optional get_numeric_value() const { return numeric_value; } }; -struct html_component_size : html_component_base { - std::string_view raw_value; - std::optional numeric_value; - - explicit html_component_size(std::string_view v) - : raw_value(v) +// Background components +struct html_component_background : html_component_base { + std::string_view value; + explicit html_component_background(std::string_view v) + : value(v) { - unsigned long val; - if (rspamd_strtoul(v.data(), v.size(), &val)) { - numeric_value = static_cast(val); - } } + std::string_view get_string_value() const override + { + return value; + } +}; +struct html_component_background_image : html_component_base { + std::string_view value; + explicit html_component_background_image(std::string_view v) + : value(v) + { + } std::string_view get_string_value() const override { - return raw_value; + return value; } - std::optional get_numeric_value() const +}; + +struct html_component_background_color : html_component_base { + std::string_view value; + explicit html_component_background_color(std::string_view v) + : value(v) { - return numeric_value; + } + std::string_view get_string_value() const override + { + return value; } }; -// Boolean/flag component -struct html_component_hidden : html_component_base { - bool present; - explicit html_component_hidden() - : present(true) +struct html_component_background_repeat : html_component_base { + std::string_view value; + explicit html_component_background_repeat(std::string_view v) + : value(v) { } std::string_view get_string_value() const override { - return present ? "true" : "false"; + return value; + } +}; + +struct html_component_background_position : html_component_base { + std::string_view value; + explicit html_component_background_position(std::string_view v) + : value(v) + { } - bool is_present() const + std::string_view get_string_value() const override { - return present; + return value; } }; -// Unknown component with both name and value -struct html_component_unknown : html_component_base { - std::string_view name; +// Email tracking components +struct html_component_data_track : html_component_base { std::string_view value; + explicit html_component_data_track(std::string_view v) + : value(v) + { + } + std::string_view get_string_value() const override + { + return value; + } +}; - html_component_unknown(std::string_view n, std::string_view v) - : name(n), value(v) +struct html_component_data_id : html_component_base { + std::string_view value; + explicit html_component_data_id(std::string_view v) + : value(v) { } std::string_view get_string_value() const override { return value; } - std::string_view get_name() const +}; + +struct html_component_data_url : html_component_base { + std::string_view value; + explicit html_component_data_url(std::string_view v) + : value(v) + { + } + std::string_view get_string_value() const override { - return name; + return value; } }; @@ -305,6 +1181,73 @@ using html_tag_component = std::variant< html_component_alt, html_component_id, html_component_hidden, + // Typography + html_component_font_family, + html_component_font_size, + html_component_font_weight, + html_component_font_style, + html_component_text_align, + html_component_text_decoration, + html_component_line_height, + // Layout + html_component_margin, + html_component_margin_top, + html_component_margin_bottom, + html_component_margin_left, + html_component_margin_right, + html_component_padding, + html_component_padding_top, + html_component_padding_bottom, + html_component_padding_left, + html_component_padding_right, + html_component_border, + html_component_border_color, + html_component_border_width, + html_component_border_style, + // Display + html_component_display, + html_component_visibility, + html_component_opacity, + // Dimensions + html_component_min_width, + html_component_max_width, + html_component_min_height, + html_component_max_height, + // Table + html_component_cellpadding, + html_component_cellspacing, + html_component_valign, + html_component_align, + // Form + html_component_type, + html_component_value, + html_component_placeholder, + html_component_disabled, + html_component_readonly, + html_component_checked, + html_component_selected, + // Link & media + html_component_target, + html_component_title, + html_component_src, + // Meta + html_component_charset, + html_component_content, + html_component_http_equiv, + // Accessibility + html_component_role, + html_component_tabindex, + // Background + html_component_background, + html_component_background_image, + html_component_background_color, + html_component_background_repeat, + html_component_background_position, + // Email tracking + html_component_data_track, + html_component_data_id, + html_component_data_url, + // Unknown html_component_unknown>; /** @@ -356,7 +1299,7 @@ struct html_tag { // Template method to find component by type template - auto find_component() const -> std::optional + constexpr auto find_component() const -> std::optional { for (const auto &comp: components) { if (std::holds_alternative(comp)) { @@ -367,7 +1310,7 @@ struct html_tag { } // Helper methods for common component access - auto find_href() const -> std::optional + constexpr auto find_href() const -> std::optional { if (auto comp = find_component()) { return comp.value()->value; @@ -375,7 +1318,7 @@ struct html_tag { return std::nullopt; } - auto find_class() const -> std::optional + constexpr auto find_class() const -> std::optional { if (auto comp = find_component()) { return comp.value()->value; @@ -383,7 +1326,7 @@ struct html_tag { return std::nullopt; } - auto find_id() const -> std::optional + constexpr auto find_id() const -> std::optional { if (auto comp = find_component()) { return comp.value()->value; @@ -391,7 +1334,7 @@ struct html_tag { return std::nullopt; } - auto find_width() const -> std::optional + constexpr auto find_width() const -> std::optional { if (auto comp = find_component()) { return comp.value()->get_numeric_value(); @@ -399,7 +1342,7 @@ struct html_tag { return std::nullopt; } - auto find_height() const -> std::optional + constexpr auto find_height() const -> std::optional { if (auto comp = find_component()) { return comp.value()->get_numeric_value(); @@ -407,7 +1350,7 @@ struct html_tag { return std::nullopt; } - auto find_style() const -> std::optional + constexpr auto find_style() const -> std::optional { if (auto comp = find_component()) { return comp.value()->value; @@ -415,7 +1358,7 @@ struct html_tag { return std::nullopt; } - auto find_alt() const -> std::optional + constexpr auto find_alt() const -> std::optional { if (auto comp = find_component()) { return comp.value()->value; @@ -423,7 +1366,7 @@ struct html_tag { return std::nullopt; } - auto find_rel() const -> std::optional + constexpr auto find_rel() const -> std::optional { if (auto comp = find_component()) { return comp.value()->value; @@ -431,12 +1374,12 @@ struct html_tag { return std::nullopt; } - auto is_hidden() const -> bool + constexpr auto is_hidden() const -> bool { return find_component().has_value(); } - auto find_unknown_component(std::string_view attr_name) const -> std::optional + constexpr auto find_unknown_component(std::string_view attr_name) const -> std::optional { for (const auto &comp: components) { if (std::holds_alternative(comp)) { @@ -449,7 +1392,7 @@ struct html_tag { return std::nullopt; } - auto get_unknown_components() const -> std::vector> + constexpr auto get_unknown_components() const -> std::vector> { std::vector> unknown_attrs; for (const auto &comp: components) { @@ -470,57 +1413,8 @@ struct html_tag { } } - // Find any component by attribute name (for Lua bindings and generic access) - auto find_component_by_name(std::string_view attr_name) const -> std::optional - { - // Check known component types first using their helper methods - if (attr_name == "href") return find_href(); - if (attr_name == "class") return find_class(); - if (attr_name == "id") return find_id(); - if (attr_name == "style") return find_style(); - if (attr_name == "alt") return find_alt(); - if (attr_name == "rel") return find_rel(); - if (attr_name == "hidden") return is_hidden() ? std::optional{"true"} : std::nullopt; - - // Handle numeric components that need string conversion - if (attr_name == "width") { - if (auto comp = find_component()) { - return comp.value()->get_string_value(); - } - } - if (attr_name == "height") { - if (auto comp = find_component()) { - return comp.value()->get_string_value(); - } - } - if (attr_name == "size") { - if (auto comp = find_component()) { - return comp.value()->get_string_value(); - } - } - - // Handle color components - if (attr_name == "color") { - if (auto comp = find_component()) { - return comp.value()->value; - } - } - if (attr_name == "bgcolor") { - if (auto comp = find_component()) { - return comp.value()->value; - } - } - - // Handle name component - if (attr_name == "name") { - if (auto comp = find_component()) { - return comp.value()->value; - } - } - - // Finally check unknown components - return find_unknown_component(attr_name); - } + // Find any component by attribute name + auto find_component_by_name(std::string_view attr_name) const -> std::optional; auto clear(void) -> void {