]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
[Project] Support more common html attributes
authorVsevolod Stakhov <vsevolod@rspamd.com>
Thu, 17 Jul 2025 08:16:50 +0000 (09:16 +0100)
committerVsevolod Stakhov <vsevolod@rspamd.com>
Thu, 17 Jul 2025 08:16:50 +0000 (09:16 +0100)
src/libserver/html/html.cxx
src/libserver/html/html_tag.hxx

index 5597b7eb5705dbaa84dd64287f9db1fdc0db5d4d..374fb349c9141b9beb193d53b3581612aeba66e7 100644 (file)
@@ -39,6 +39,7 @@
 #include "contrib/frozen/include/frozen/string.h"
 #include "contrib/fmt/include/fmt/core.h"
 
+#include <functional>
 #include <unicode/uversion.h>
 
 namespace rspamd::html {
@@ -47,23 +48,88 @@ static const unsigned int max_tags = 8192; /* Ignore tags if this maximum is rea
 
 static const html_tags_storage html_tags_defs;
 
-auto html_components_map = frozen::make_unordered_map<frozen::string, html_component_type>(
+auto html_components_map = frozen::make_unordered_map<frozen::string, html_component_enum_type>(
        {
-               {"name", html_component_type::RSPAMD_HTML_COMPONENT_NAME},
-               {"href", html_component_type::RSPAMD_HTML_COMPONENT_HREF},
-               {"src", html_component_type::RSPAMD_HTML_COMPONENT_HREF},
-               {"action", html_component_type::RSPAMD_HTML_COMPONENT_HREF},
-               {"color", html_component_type::RSPAMD_HTML_COMPONENT_COLOR},
-               {"bgcolor", html_component_type::RSPAMD_HTML_COMPONENT_BGCOLOR},
-               {"style", html_component_type::RSPAMD_HTML_COMPONENT_STYLE},
-               {"class", html_component_type::RSPAMD_HTML_COMPONENT_CLASS},
-               {"width", html_component_type::RSPAMD_HTML_COMPONENT_WIDTH},
-               {"height", html_component_type::RSPAMD_HTML_COMPONENT_HEIGHT},
-               {"size", html_component_type::RSPAMD_HTML_COMPONENT_SIZE},
-               {"rel", html_component_type::RSPAMD_HTML_COMPONENT_REL},
-               {"alt", html_component_type::RSPAMD_HTML_COMPONENT_ALT},
-               {"id", html_component_type::RSPAMD_HTML_COMPONENT_ID},
-               {"hidden", html_component_type::RSPAMD_HTML_COMPONENT_HIDDEN},
+               {"name", html_component_enum_type::RSPAMD_HTML_COMPONENT_NAME},
+               {"href", html_component_enum_type::RSPAMD_HTML_COMPONENT_HREF},
+               {"src", html_component_enum_type::RSPAMD_HTML_COMPONENT_SRC},
+               {"action", html_component_enum_type::RSPAMD_HTML_COMPONENT_HREF},
+               {"color", html_component_enum_type::RSPAMD_HTML_COMPONENT_COLOR},
+               {"bgcolor", html_component_enum_type::RSPAMD_HTML_COMPONENT_BGCOLOR},
+               {"style", html_component_enum_type::RSPAMD_HTML_COMPONENT_STYLE},
+               {"class", html_component_enum_type::RSPAMD_HTML_COMPONENT_CLASS},
+               {"width", html_component_enum_type::RSPAMD_HTML_COMPONENT_WIDTH},
+               {"height", html_component_enum_type::RSPAMD_HTML_COMPONENT_HEIGHT},
+               {"size", html_component_enum_type::RSPAMD_HTML_COMPONENT_SIZE},
+               {"rel", html_component_enum_type::RSPAMD_HTML_COMPONENT_REL},
+               {"alt", html_component_enum_type::RSPAMD_HTML_COMPONENT_ALT},
+               {"id", html_component_enum_type::RSPAMD_HTML_COMPONENT_ID},
+               {"hidden", html_component_enum_type::RSPAMD_HTML_COMPONENT_HIDDEN},
+               // Typography
+               {"font-family", html_component_enum_type::RSPAMD_HTML_COMPONENT_FONT_FAMILY},
+               {"font-size", html_component_enum_type::RSPAMD_HTML_COMPONENT_FONT_SIZE},
+               {"font-weight", html_component_enum_type::RSPAMD_HTML_COMPONENT_FONT_WEIGHT},
+               {"font-style", html_component_enum_type::RSPAMD_HTML_COMPONENT_FONT_STYLE},
+               {"text-align", html_component_enum_type::RSPAMD_HTML_COMPONENT_TEXT_ALIGN},
+               {"text-decoration", html_component_enum_type::RSPAMD_HTML_COMPONENT_TEXT_DECORATION},
+               {"line-height", html_component_enum_type::RSPAMD_HTML_COMPONENT_LINE_HEIGHT},
+               // Layout & positioning
+               {"margin", html_component_enum_type::RSPAMD_HTML_COMPONENT_MARGIN},
+               {"margin-top", html_component_enum_type::RSPAMD_HTML_COMPONENT_MARGIN_TOP},
+               {"margin-bottom", html_component_enum_type::RSPAMD_HTML_COMPONENT_MARGIN_BOTTOM},
+               {"margin-left", html_component_enum_type::RSPAMD_HTML_COMPONENT_MARGIN_LEFT},
+               {"margin-right", html_component_enum_type::RSPAMD_HTML_COMPONENT_MARGIN_RIGHT},
+               {"padding", html_component_enum_type::RSPAMD_HTML_COMPONENT_PADDING},
+               {"padding-top", html_component_enum_type::RSPAMD_HTML_COMPONENT_PADDING_TOP},
+               {"padding-bottom", html_component_enum_type::RSPAMD_HTML_COMPONENT_PADDING_BOTTOM},
+               {"padding-left", html_component_enum_type::RSPAMD_HTML_COMPONENT_PADDING_LEFT},
+               {"padding-right", html_component_enum_type::RSPAMD_HTML_COMPONENT_PADDING_RIGHT},
+               {"border", html_component_enum_type::RSPAMD_HTML_COMPONENT_BORDER},
+               {"border-color", html_component_enum_type::RSPAMD_HTML_COMPONENT_BORDER_COLOR},
+               {"border-width", html_component_enum_type::RSPAMD_HTML_COMPONENT_BORDER_WIDTH},
+               {"border-style", html_component_enum_type::RSPAMD_HTML_COMPONENT_BORDER_STYLE},
+               // Display & visibility
+               {"display", html_component_enum_type::RSPAMD_HTML_COMPONENT_DISPLAY},
+               {"visibility", html_component_enum_type::RSPAMD_HTML_COMPONENT_VISIBILITY},
+               {"opacity", html_component_enum_type::RSPAMD_HTML_COMPONENT_OPACITY},
+               // Dimensions
+               {"min-width", html_component_enum_type::RSPAMD_HTML_COMPONENT_MIN_WIDTH},
+               {"max-width", html_component_enum_type::RSPAMD_HTML_COMPONENT_MAX_WIDTH},
+               {"min-height", html_component_enum_type::RSPAMD_HTML_COMPONENT_MIN_HEIGHT},
+               {"max-height", html_component_enum_type::RSPAMD_HTML_COMPONENT_MAX_HEIGHT},
+               // Table attributes
+               {"cellpadding", html_component_enum_type::RSPAMD_HTML_COMPONENT_CELLPADDING},
+               {"cellspacing", html_component_enum_type::RSPAMD_HTML_COMPONENT_CELLSPACING},
+               {"valign", html_component_enum_type::RSPAMD_HTML_COMPONENT_VALIGN},
+               {"align", html_component_enum_type::RSPAMD_HTML_COMPONENT_ALIGN},
+               // Form attributes
+               {"type", html_component_enum_type::RSPAMD_HTML_COMPONENT_TYPE},
+               {"value", html_component_enum_type::RSPAMD_HTML_COMPONENT_VALUE},
+               {"placeholder", html_component_enum_type::RSPAMD_HTML_COMPONENT_PLACEHOLDER},
+               {"disabled", html_component_enum_type::RSPAMD_HTML_COMPONENT_DISABLED},
+               {"readonly", html_component_enum_type::RSPAMD_HTML_COMPONENT_READONLY},
+               {"checked", html_component_enum_type::RSPAMD_HTML_COMPONENT_CHECKED},
+               {"selected", html_component_enum_type::RSPAMD_HTML_COMPONENT_SELECTED},
+               // Link & media
+               {"target", html_component_enum_type::RSPAMD_HTML_COMPONENT_TARGET},
+               {"title", html_component_enum_type::RSPAMD_HTML_COMPONENT_TITLE},
+               // Meta & document
+               {"charset", html_component_enum_type::RSPAMD_HTML_COMPONENT_CHARSET},
+               {"content", html_component_enum_type::RSPAMD_HTML_COMPONENT_CONTENT},
+               {"http-equiv", html_component_enum_type::RSPAMD_HTML_COMPONENT_HTTP_EQUIV},
+               // Accessibility
+               {"role", html_component_enum_type::RSPAMD_HTML_COMPONENT_ROLE},
+               {"tabindex", html_component_enum_type::RSPAMD_HTML_COMPONENT_TABINDEX},
+               // Background
+               {"background", html_component_enum_type::RSPAMD_HTML_COMPONENT_BACKGROUND},
+               {"background-image", html_component_enum_type::RSPAMD_HTML_COMPONENT_BACKGROUND_IMAGE},
+               {"background-color", html_component_enum_type::RSPAMD_HTML_COMPONENT_BACKGROUND_COLOR},
+               {"background-repeat", html_component_enum_type::RSPAMD_HTML_COMPONENT_BACKGROUND_REPEAT},
+               {"background-position", html_component_enum_type::RSPAMD_HTML_COMPONENT_BACKGROUND_POSITION},
+               // Email-specific tracking
+               {"data-track", html_component_enum_type::RSPAMD_HTML_COMPONENT_DATA_TRACK},
+               {"data-id", html_component_enum_type::RSPAMD_HTML_COMPONENT_DATA_ID},
+               {"data-url", html_component_enum_type::RSPAMD_HTML_COMPONENT_DATA_URL},
        });
 
 #define msg_debug_html(...) rspamd_conditional_debug_fast(NULL, NULL,                                \
@@ -205,32 +271,153 @@ auto html_component_from_string(std::string_view name, std::string_view value) -
 
        if (known_component_it != html_components_map.end()) {
                switch (known_component_it->second) {
-               case html_component_type::RSPAMD_HTML_COMPONENT_NAME:
+               case html_component_enum_type::RSPAMD_HTML_COMPONENT_NAME:
                        return html_component_name{value};
-               case html_component_type::RSPAMD_HTML_COMPONENT_HREF:
+               case html_component_enum_type::RSPAMD_HTML_COMPONENT_HREF:
                        return html_component_href{value};
-               case html_component_type::RSPAMD_HTML_COMPONENT_COLOR:
+               case html_component_enum_type::RSPAMD_HTML_COMPONENT_COLOR:
                        return html_component_color{value};
-               case html_component_type::RSPAMD_HTML_COMPONENT_BGCOLOR:
+               case html_component_enum_type::RSPAMD_HTML_COMPONENT_BGCOLOR:
                        return html_component_bgcolor{value};
-               case html_component_type::RSPAMD_HTML_COMPONENT_STYLE:
+               case html_component_enum_type::RSPAMD_HTML_COMPONENT_STYLE:
                        return html_component_style{value};
-               case html_component_type::RSPAMD_HTML_COMPONENT_CLASS:
+               case html_component_enum_type::RSPAMD_HTML_COMPONENT_CLASS:
                        return html_component_class{value};
-               case html_component_type::RSPAMD_HTML_COMPONENT_WIDTH:
+               case html_component_enum_type::RSPAMD_HTML_COMPONENT_WIDTH:
                        return html_component_width{value};
-               case html_component_type::RSPAMD_HTML_COMPONENT_HEIGHT:
+               case html_component_enum_type::RSPAMD_HTML_COMPONENT_HEIGHT:
                        return html_component_height{value};
-               case html_component_type::RSPAMD_HTML_COMPONENT_SIZE:
+               case html_component_enum_type::RSPAMD_HTML_COMPONENT_SIZE:
                        return html_component_size{value};
-               case html_component_type::RSPAMD_HTML_COMPONENT_REL:
+               case html_component_enum_type::RSPAMD_HTML_COMPONENT_REL:
                        return html_component_rel{value};
-               case html_component_type::RSPAMD_HTML_COMPONENT_ALT:
+               case html_component_enum_type::RSPAMD_HTML_COMPONENT_ALT:
                        return html_component_alt{value};
-               case html_component_type::RSPAMD_HTML_COMPONENT_ID:
+               case html_component_enum_type::RSPAMD_HTML_COMPONENT_ID:
                        return html_component_id{value};
-               case html_component_type::RSPAMD_HTML_COMPONENT_HIDDEN:
+               case html_component_enum_type::RSPAMD_HTML_COMPONENT_HIDDEN:
                        return html_component_hidden{};
+               // Typography
+               case html_component_enum_type::RSPAMD_HTML_COMPONENT_FONT_FAMILY:
+                       return html_component_font_family{value};
+               case html_component_enum_type::RSPAMD_HTML_COMPONENT_FONT_SIZE:
+                       return html_component_font_size{value};
+               case html_component_enum_type::RSPAMD_HTML_COMPONENT_FONT_WEIGHT:
+                       return html_component_font_weight{value};
+               case html_component_enum_type::RSPAMD_HTML_COMPONENT_FONT_STYLE:
+                       return html_component_font_style{value};
+               case html_component_enum_type::RSPAMD_HTML_COMPONENT_TEXT_ALIGN:
+                       return html_component_text_align{value};
+               case html_component_enum_type::RSPAMD_HTML_COMPONENT_TEXT_DECORATION:
+                       return html_component_text_decoration{value};
+               case html_component_enum_type::RSPAMD_HTML_COMPONENT_LINE_HEIGHT:
+                       return html_component_line_height{value};
+               // Layout
+               case html_component_enum_type::RSPAMD_HTML_COMPONENT_MARGIN:
+                       return html_component_margin{value};
+               case html_component_enum_type::RSPAMD_HTML_COMPONENT_MARGIN_TOP:
+                       return html_component_margin_top{value};
+               case html_component_enum_type::RSPAMD_HTML_COMPONENT_MARGIN_BOTTOM:
+                       return html_component_margin_bottom{value};
+               case html_component_enum_type::RSPAMD_HTML_COMPONENT_MARGIN_LEFT:
+                       return html_component_margin_left{value};
+               case html_component_enum_type::RSPAMD_HTML_COMPONENT_MARGIN_RIGHT:
+                       return html_component_margin_right{value};
+               case html_component_enum_type::RSPAMD_HTML_COMPONENT_PADDING:
+                       return html_component_padding{value};
+               case html_component_enum_type::RSPAMD_HTML_COMPONENT_PADDING_TOP:
+                       return html_component_padding_top{value};
+               case html_component_enum_type::RSPAMD_HTML_COMPONENT_PADDING_BOTTOM:
+                       return html_component_padding_bottom{value};
+               case html_component_enum_type::RSPAMD_HTML_COMPONENT_PADDING_LEFT:
+                       return html_component_padding_left{value};
+               case html_component_enum_type::RSPAMD_HTML_COMPONENT_PADDING_RIGHT:
+                       return html_component_padding_right{value};
+               case html_component_enum_type::RSPAMD_HTML_COMPONENT_BORDER:
+                       return html_component_border{value};
+               case html_component_enum_type::RSPAMD_HTML_COMPONENT_BORDER_COLOR:
+                       return html_component_border_color{value};
+               case html_component_enum_type::RSPAMD_HTML_COMPONENT_BORDER_WIDTH:
+                       return html_component_border_width{value};
+               case html_component_enum_type::RSPAMD_HTML_COMPONENT_BORDER_STYLE:
+                       return html_component_border_style{value};
+               // Display
+               case html_component_enum_type::RSPAMD_HTML_COMPONENT_DISPLAY:
+                       return html_component_display{value};
+               case html_component_enum_type::RSPAMD_HTML_COMPONENT_VISIBILITY:
+                       return html_component_visibility{value};
+               case html_component_enum_type::RSPAMD_HTML_COMPONENT_OPACITY:
+                       return html_component_opacity{value};
+               // Dimensions
+               case html_component_enum_type::RSPAMD_HTML_COMPONENT_MIN_WIDTH:
+                       return html_component_min_width{value};
+               case html_component_enum_type::RSPAMD_HTML_COMPONENT_MAX_WIDTH:
+                       return html_component_max_width{value};
+               case html_component_enum_type::RSPAMD_HTML_COMPONENT_MIN_HEIGHT:
+                       return html_component_min_height{value};
+               case html_component_enum_type::RSPAMD_HTML_COMPONENT_MAX_HEIGHT:
+                       return html_component_max_height{value};
+               // Table
+               case html_component_enum_type::RSPAMD_HTML_COMPONENT_CELLPADDING:
+                       return html_component_cellpadding{value};
+               case html_component_enum_type::RSPAMD_HTML_COMPONENT_CELLSPACING:
+                       return html_component_cellspacing{value};
+               case html_component_enum_type::RSPAMD_HTML_COMPONENT_VALIGN:
+                       return html_component_valign{value};
+               case html_component_enum_type::RSPAMD_HTML_COMPONENT_ALIGN:
+                       return html_component_align{value};
+               // Form
+               case html_component_enum_type::RSPAMD_HTML_COMPONENT_TYPE:
+                       return html_component_type{value};
+               case html_component_enum_type::RSPAMD_HTML_COMPONENT_VALUE:
+                       return html_component_value{value};
+               case html_component_enum_type::RSPAMD_HTML_COMPONENT_PLACEHOLDER:
+                       return html_component_placeholder{value};
+               case html_component_enum_type::RSPAMD_HTML_COMPONENT_DISABLED:
+                       return html_component_disabled{};
+               case html_component_enum_type::RSPAMD_HTML_COMPONENT_READONLY:
+                       return html_component_readonly{};
+               case html_component_enum_type::RSPAMD_HTML_COMPONENT_CHECKED:
+                       return html_component_checked{};
+               case html_component_enum_type::RSPAMD_HTML_COMPONENT_SELECTED:
+                       return html_component_selected{};
+               // Link & media
+               case html_component_enum_type::RSPAMD_HTML_COMPONENT_TARGET:
+                       return html_component_target{value};
+               case html_component_enum_type::RSPAMD_HTML_COMPONENT_TITLE:
+                       return html_component_title{value};
+               case html_component_enum_type::RSPAMD_HTML_COMPONENT_SRC:
+                       return html_component_src{value};
+               // Meta
+               case html_component_enum_type::RSPAMD_HTML_COMPONENT_CHARSET:
+                       return html_component_charset{value};
+               case html_component_enum_type::RSPAMD_HTML_COMPONENT_CONTENT:
+                       return html_component_content{value};
+               case html_component_enum_type::RSPAMD_HTML_COMPONENT_HTTP_EQUIV:
+                       return html_component_http_equiv{value};
+               // Accessibility
+               case html_component_enum_type::RSPAMD_HTML_COMPONENT_ROLE:
+                       return html_component_role{value};
+               case html_component_enum_type::RSPAMD_HTML_COMPONENT_TABINDEX:
+                       return html_component_tabindex{value};
+               // Background
+               case html_component_enum_type::RSPAMD_HTML_COMPONENT_BACKGROUND:
+                       return html_component_background{value};
+               case html_component_enum_type::RSPAMD_HTML_COMPONENT_BACKGROUND_IMAGE:
+                       return html_component_background_image{value};
+               case html_component_enum_type::RSPAMD_HTML_COMPONENT_BACKGROUND_COLOR:
+                       return html_component_background_color{value};
+               case html_component_enum_type::RSPAMD_HTML_COMPONENT_BACKGROUND_REPEAT:
+                       return html_component_background_repeat{value};
+               case html_component_enum_type::RSPAMD_HTML_COMPONENT_BACKGROUND_POSITION:
+                       return html_component_background_position{value};
+               // Email tracking
+               case html_component_enum_type::RSPAMD_HTML_COMPONENT_DATA_TRACK:
+                       return html_component_data_track{value};
+               case html_component_enum_type::RSPAMD_HTML_COMPONENT_DATA_ID:
+                       return html_component_data_id{value};
+               case html_component_enum_type::RSPAMD_HTML_COMPONENT_DATA_URL:
+                       return html_component_data_url{value};
                default:
                        return html_component_unknown{name, value};
                }
@@ -240,6 +427,424 @@ auto html_component_from_string(std::string_view name, std::string_view value) -
        }
 }
 
+using component_extractor_func = std::function<std::optional<std::string_view>(const html_tag *)>;
+static const auto component_extractors = frozen::make_unordered_map<frozen::string, component_extractor_func>(
+       {
+               // Basic components
+               {"name", [](const html_tag *tag) -> std::optional<std::string_view> {
+                        if (auto comp = tag->find_component<html_component_name>()) {
+                                return comp.value()->value;
+                        }
+                        return std::nullopt;
+                }},
+               {"href", [](const html_tag *tag) { return tag->find_href(); }},
+               {"src", [](const html_tag *tag) -> std::optional<std::string_view> {
+                        if (auto comp = tag->find_component<html_component_src>()) {
+                                return comp.value()->value;
+                        }
+                        return std::nullopt;
+                }},
+               {"class", [](const html_tag *tag) { return tag->find_class(); }},
+               {"id", [](const html_tag *tag) { return tag->find_id(); }},
+               {"style", [](const html_tag *tag) { return tag->find_style(); }},
+               {"alt", [](const html_tag *tag) { return tag->find_alt(); }},
+               {"rel", [](const html_tag *tag) { return tag->find_rel(); }},
+               {"color", [](const html_tag *tag) -> std::optional<std::string_view> {
+                        if (auto comp = tag->find_component<html_component_color>()) {
+                                return comp.value()->value;
+                        }
+                        return std::nullopt;
+                }},
+               {"bgcolor", [](const html_tag *tag) -> std::optional<std::string_view> {
+                        if (auto comp = tag->find_component<html_component_bgcolor>()) {
+                                return comp.value()->value;
+                        }
+                        return std::nullopt;
+                }},
+
+               // Numeric components (return string representation)
+               {"width", [](const html_tag *tag) -> std::optional<std::string_view> {
+                        if (auto comp = tag->find_component<html_component_width>()) {
+                                return comp.value()->get_string_value();
+                        }
+                        return std::nullopt;
+                }},
+               {"height", [](const html_tag *tag) -> std::optional<std::string_view> {
+                        if (auto comp = tag->find_component<html_component_height>()) {
+                                return comp.value()->get_string_value();
+                        }
+                        return std::nullopt;
+                }},
+               {"size", [](const html_tag *tag) -> std::optional<std::string_view> {
+                        if (auto comp = tag->find_component<html_component_size>()) {
+                                return comp.value()->get_string_value();
+                        }
+                        return std::nullopt;
+                }},
+
+               // Boolean components
+               {"hidden", [](const html_tag *tag) -> std::optional<std::string_view> {
+                        return tag->is_hidden() ? std::optional<std::string_view>{"true"} : std::nullopt;
+                }},
+
+               // Typography components
+               {"font-family", [](const html_tag *tag) -> std::optional<std::string_view> {
+                        if (auto comp = tag->find_component<html_component_font_family>()) {
+                                return comp.value()->value;
+                        }
+                        return std::nullopt;
+                }},
+               {"font-size", [](const html_tag *tag) -> std::optional<std::string_view> {
+                        if (auto comp = tag->find_component<html_component_font_size>()) {
+                                return comp.value()->get_string_value();
+                        }
+                        return std::nullopt;
+                }},
+               {"font-weight", [](const html_tag *tag) -> std::optional<std::string_view> {
+                        if (auto comp = tag->find_component<html_component_font_weight>()) {
+                                return comp.value()->value;
+                        }
+                        return std::nullopt;
+                }},
+               {"font-style", [](const html_tag *tag) -> std::optional<std::string_view> {
+                        if (auto comp = tag->find_component<html_component_font_style>()) {
+                                return comp.value()->value;
+                        }
+                        return std::nullopt;
+                }},
+               {"text-align", [](const html_tag *tag) -> std::optional<std::string_view> {
+                        if (auto comp = tag->find_component<html_component_text_align>()) {
+                                return comp.value()->value;
+                        }
+                        return std::nullopt;
+                }},
+               {"text-decoration", [](const html_tag *tag) -> std::optional<std::string_view> {
+                        if (auto comp = tag->find_component<html_component_text_decoration>()) {
+                                return comp.value()->value;
+                        }
+                        return std::nullopt;
+                }},
+               {"line-height", [](const html_tag *tag) -> std::optional<std::string_view> {
+                        if (auto comp = tag->find_component<html_component_line_height>()) {
+                                return comp.value()->get_string_value();
+                        }
+                        return std::nullopt;
+                }},
+
+               // Layout components
+               {"margin", [](const html_tag *tag) -> std::optional<std::string_view> {
+                        if (auto comp = tag->find_component<html_component_margin>()) {
+                                return comp.value()->value;
+                        }
+                        return std::nullopt;
+                }},
+               {"margin-top", [](const html_tag *tag) -> std::optional<std::string_view> {
+                        if (auto comp = tag->find_component<html_component_margin_top>()) {
+                                return comp.value()->value;
+                        }
+                        return std::nullopt;
+                }},
+               {"margin-bottom", [](const html_tag *tag) -> std::optional<std::string_view> {
+                        if (auto comp = tag->find_component<html_component_margin_bottom>()) {
+                                return comp.value()->value;
+                        }
+                        return std::nullopt;
+                }},
+               {"margin-left", [](const html_tag *tag) -> std::optional<std::string_view> {
+                        if (auto comp = tag->find_component<html_component_margin_left>()) {
+                                return comp.value()->value;
+                        }
+                        return std::nullopt;
+                }},
+               {"margin-right", [](const html_tag *tag) -> std::optional<std::string_view> {
+                        if (auto comp = tag->find_component<html_component_margin_right>()) {
+                                return comp.value()->value;
+                        }
+                        return std::nullopt;
+                }},
+               {"padding", [](const html_tag *tag) -> std::optional<std::string_view> {
+                        if (auto comp = tag->find_component<html_component_padding>()) {
+                                return comp.value()->value;
+                        }
+                        return std::nullopt;
+                }},
+               {"padding-top", [](const html_tag *tag) -> std::optional<std::string_view> {
+                        if (auto comp = tag->find_component<html_component_padding_top>()) {
+                                return comp.value()->value;
+                        }
+                        return std::nullopt;
+                }},
+               {"padding-bottom", [](const html_tag *tag) -> std::optional<std::string_view> {
+                        if (auto comp = tag->find_component<html_component_padding_bottom>()) {
+                                return comp.value()->value;
+                        }
+                        return std::nullopt;
+                }},
+               {"padding-left", [](const html_tag *tag) -> std::optional<std::string_view> {
+                        if (auto comp = tag->find_component<html_component_padding_left>()) {
+                                return comp.value()->value;
+                        }
+                        return std::nullopt;
+                }},
+               {"padding-right", [](const html_tag *tag) -> std::optional<std::string_view> {
+                        if (auto comp = tag->find_component<html_component_padding_right>()) {
+                                return comp.value()->value;
+                        }
+                        return std::nullopt;
+                }},
+               {"border", [](const html_tag *tag) -> std::optional<std::string_view> {
+                        if (auto comp = tag->find_component<html_component_border>()) {
+                                return comp.value()->value;
+                        }
+                        return std::nullopt;
+                }},
+               {"border-color", [](const html_tag *tag) -> std::optional<std::string_view> {
+                        if (auto comp = tag->find_component<html_component_border_color>()) {
+                                return comp.value()->value;
+                        }
+                        return std::nullopt;
+                }},
+               {"border-width", [](const html_tag *tag) -> std::optional<std::string_view> {
+                        if (auto comp = tag->find_component<html_component_border_width>()) {
+                                return comp.value()->get_string_value();
+                        }
+                        return std::nullopt;
+                }},
+               {"border-style", [](const html_tag *tag) -> std::optional<std::string_view> {
+                        if (auto comp = tag->find_component<html_component_border_style>()) {
+                                return comp.value()->value;
+                        }
+                        return std::nullopt;
+                }},
+
+               // Display components
+               {"display", [](const html_tag *tag) -> std::optional<std::string_view> {
+                        if (auto comp = tag->find_component<html_component_display>()) {
+                                return comp.value()->value;
+                        }
+                        return std::nullopt;
+                }},
+               {"visibility", [](const html_tag *tag) -> std::optional<std::string_view> {
+                        if (auto comp = tag->find_component<html_component_visibility>()) {
+                                return comp.value()->value;
+                        }
+                        return std::nullopt;
+                }},
+               {"opacity", [](const html_tag *tag) -> std::optional<std::string_view> {
+                        if (auto comp = tag->find_component<html_component_opacity>()) {
+                                return comp.value()->get_string_value();
+                        }
+                        return std::nullopt;
+                }},
+
+               // Additional dimensions
+               {"min-width", [](const html_tag *tag) -> std::optional<std::string_view> {
+                        if (auto comp = tag->find_component<html_component_min_width>()) {
+                                return comp.value()->get_string_value();
+                        }
+                        return std::nullopt;
+                }},
+               {"max-width", [](const html_tag *tag) -> std::optional<std::string_view> {
+                        if (auto comp = tag->find_component<html_component_max_width>()) {
+                                return comp.value()->get_string_value();
+                        }
+                        return std::nullopt;
+                }},
+               {"min-height", [](const html_tag *tag) -> std::optional<std::string_view> {
+                        if (auto comp = tag->find_component<html_component_min_height>()) {
+                                return comp.value()->get_string_value();
+                        }
+                        return std::nullopt;
+                }},
+               {"max-height", [](const html_tag *tag) -> std::optional<std::string_view> {
+                        if (auto comp = tag->find_component<html_component_max_height>()) {
+                                return comp.value()->get_string_value();
+                        }
+                        return std::nullopt;
+                }},
+
+               // Table components
+               {"cellpadding", [](const html_tag *tag) -> std::optional<std::string_view> {
+                        if (auto comp = tag->find_component<html_component_cellpadding>()) {
+                                return comp.value()->get_string_value();
+                        }
+                        return std::nullopt;
+                }},
+               {"cellspacing", [](const html_tag *tag) -> std::optional<std::string_view> {
+                        if (auto comp = tag->find_component<html_component_cellspacing>()) {
+                                return comp.value()->get_string_value();
+                        }
+                        return std::nullopt;
+                }},
+               {"valign", [](const html_tag *tag) -> std::optional<std::string_view> {
+                        if (auto comp = tag->find_component<html_component_valign>()) {
+                                return comp.value()->value;
+                        }
+                        return std::nullopt;
+                }},
+               {"align", [](const html_tag *tag) -> std::optional<std::string_view> {
+                        if (auto comp = tag->find_component<html_component_align>()) {
+                                return comp.value()->value;
+                        }
+                        return std::nullopt;
+                }},
+
+               // Form components
+               {"type", [](const html_tag *tag) -> std::optional<std::string_view> {
+                        if (auto comp = tag->find_component<html_component_type>()) {
+                                return comp.value()->value;
+                        }
+                        return std::nullopt;
+                }},
+               {"value", [](const html_tag *tag) -> std::optional<std::string_view> {
+                        if (auto comp = tag->find_component<html_component_value>()) {
+                                return comp.value()->value;
+                        }
+                        return std::nullopt;
+                }},
+               {"placeholder", [](const html_tag *tag) -> std::optional<std::string_view> {
+                        if (auto comp = tag->find_component<html_component_placeholder>()) {
+                                return comp.value()->value;
+                        }
+                        return std::nullopt;
+                }},
+               {"disabled", [](const html_tag *tag) -> std::optional<std::string_view> {
+                        if (auto comp = tag->find_component<html_component_disabled>()) {
+                                return comp.value()->is_present() ? std::optional<std::string_view>{"true"} : std::nullopt;
+                        }
+                        return std::nullopt;
+                }},
+               {"readonly", [](const html_tag *tag) -> std::optional<std::string_view> {
+                        if (auto comp = tag->find_component<html_component_readonly>()) {
+                                return comp.value()->is_present() ? std::optional<std::string_view>{"true"} : std::nullopt;
+                        }
+                        return std::nullopt;
+                }},
+               {"checked", [](const html_tag *tag) -> std::optional<std::string_view> {
+                        if (auto comp = tag->find_component<html_component_checked>()) {
+                                return comp.value()->is_present() ? std::optional<std::string_view>{"true"} : std::nullopt;
+                        }
+                        return std::nullopt;
+                }},
+               {"selected", [](const html_tag *tag) -> std::optional<std::string_view> {
+                        if (auto comp = tag->find_component<html_component_selected>()) {
+                                return comp.value()->is_present() ? std::optional<std::string_view>{"true"} : std::nullopt;
+                        }
+                        return std::nullopt;
+                }},
+
+               // Link & media components
+               {"target", [](const html_tag *tag) -> std::optional<std::string_view> {
+                        if (auto comp = tag->find_component<html_component_target>()) {
+                                return comp.value()->value;
+                        }
+                        return std::nullopt;
+                }},
+               {"title", [](const html_tag *tag) -> std::optional<std::string_view> {
+                        if (auto comp = tag->find_component<html_component_title>()) {
+                                return comp.value()->value;
+                        }
+                        return std::nullopt;
+                }},
+
+               // Meta components
+               {"charset", [](const html_tag *tag) -> std::optional<std::string_view> {
+                        if (auto comp = tag->find_component<html_component_charset>()) {
+                                return comp.value()->value;
+                        }
+                        return std::nullopt;
+                }},
+               {"content", [](const html_tag *tag) -> std::optional<std::string_view> {
+                        if (auto comp = tag->find_component<html_component_content>()) {
+                                return comp.value()->value;
+                        }
+                        return std::nullopt;
+                }},
+               {"http-equiv", [](const html_tag *tag) -> std::optional<std::string_view> {
+                        if (auto comp = tag->find_component<html_component_http_equiv>()) {
+                                return comp.value()->value;
+                        }
+                        return std::nullopt;
+                }},
+
+               // Accessibility components
+               {"role", [](const html_tag *tag) -> std::optional<std::string_view> {
+                        if (auto comp = tag->find_component<html_component_role>()) {
+                                return comp.value()->value;
+                        }
+                        return std::nullopt;
+                }},
+               {"tabindex", [](const html_tag *tag) -> std::optional<std::string_view> {
+                        if (auto comp = tag->find_component<html_component_tabindex>()) {
+                                return comp.value()->get_string_value();
+                        }
+                        return std::nullopt;
+                }},
+
+               // Background components
+               {"background", [](const html_tag *tag) -> std::optional<std::string_view> {
+                        if (auto comp = tag->find_component<html_component_background>()) {
+                                return comp.value()->value;
+                        }
+                        return std::nullopt;
+                }},
+               {"background-image", [](const html_tag *tag) -> std::optional<std::string_view> {
+                        if (auto comp = tag->find_component<html_component_background_image>()) {
+                                return comp.value()->value;
+                        }
+                        return std::nullopt;
+                }},
+               {"background-color", [](const html_tag *tag) -> std::optional<std::string_view> {
+                        if (auto comp = tag->find_component<html_component_background_color>()) {
+                                return comp.value()->value;
+                        }
+                        return std::nullopt;
+                }},
+               {"background-repeat", [](const html_tag *tag) -> std::optional<std::string_view> {
+                        if (auto comp = tag->find_component<html_component_background_repeat>()) {
+                                return comp.value()->value;
+                        }
+                        return std::nullopt;
+                }},
+               {"background-position", [](const html_tag *tag) -> std::optional<std::string_view> {
+                        if (auto comp = tag->find_component<html_component_background_position>()) {
+                                return comp.value()->value;
+                        }
+                        return std::nullopt;
+                }},
+
+               // Email tracking components
+               {"data-track", [](const html_tag *tag) -> std::optional<std::string_view> {
+                        if (auto comp = tag->find_component<html_component_data_track>()) {
+                                return comp.value()->value;
+                        }
+                        return std::nullopt;
+                }},
+               {"data-id", [](const html_tag *tag) -> std::optional<std::string_view> {
+                        if (auto comp = tag->find_component<html_component_data_id>()) {
+                                return comp.value()->value;
+                        }
+                        return std::nullopt;
+                }},
+               {"data-url", [](const html_tag *tag) -> std::optional<std::string_view> {
+                        if (auto comp = tag->find_component<html_component_data_url>()) {
+                                return comp.value()->value;
+                        }
+                        return std::nullopt;
+                }},
+       });
+
+auto html_tag::find_component_by_name(std::string_view attr_name) const -> std::optional<std::string_view>
+{
+       auto it = component_extractors.find(attr_name);
+       if (it != component_extractors.end()) {
+               return it->second(this);
+       }
+
+       // Fallback to unknown components
+       return find_unknown_component(attr_name);
+}
+
 enum tag_parser_state {
        parse_start = 0,
        parse_name,
@@ -839,57 +1444,66 @@ html_process_img_tag(rspamd_mempool_t *pool,
        img = rspamd_mempool_alloc0_type(pool, struct html_image);
        img->tag = tag;
 
-       // Process HREF component
-       if (auto href_value = tag->find_href()) {
-               if (href_value->size() > 0) {
-                       rspamd_ftok_t fstr;
-                       fstr.begin = href_value->data();
-                       fstr.len = href_value->size();
-                       img->src = rspamd_mempool_ftokdup(pool, &fstr);
+       // Process SRC component (preferred for img tags) or HREF component (fallback)
+       std::optional<std::string_view> href_value;
+
+       // Try SRC first (standard for img tags)
+       if (auto src_comp = tag->find_component<html_component_src>()) {
+               href_value = src_comp.value()->value;
+       }
+       // Fallback to HREF (for backward compatibility or non-standard usage)
+       else if (auto href_comp = tag->find_href()) {
+               href_value = href_comp;
+       }
+
+       if (href_value && href_value->size() > 0) {
+               rspamd_ftok_t fstr;
+               fstr.begin = href_value->data();
+               fstr.len = href_value->size();
+               img->src = rspamd_mempool_ftokdup(pool, &fstr);
 
-                       if (href_value->size() > sizeof("cid:") - 1 && memcmp(href_value->data(),
-                                                                                                                                 "cid:", sizeof("cid:") - 1) == 0) {
-                               /* We have an embedded image */
-                               img->src += sizeof("cid:") - 1;
-                               img->flags |= RSPAMD_HTML_FLAG_IMAGE_EMBEDDED;
+               if (href_value->size() > sizeof("cid:") - 1 && memcmp(href_value->data(),
+                                                                                                                         "cid:", sizeof("cid:") - 1) == 0) {
+                       /* We have an embedded image */
+                       img->src += sizeof("cid:") - 1;
+                       img->flags |= RSPAMD_HTML_FLAG_IMAGE_EMBEDDED;
+               }
+               else {
+                       if (href_value->size() > sizeof("data:") - 1 && memcmp(href_value->data(),
+                                                                                                                                  "data:", sizeof("data:") - 1) == 0) {
+                               /* We have an embedded image in HTML tag */
+                               img->flags |=
+                                       (RSPAMD_HTML_FLAG_IMAGE_EMBEDDED | RSPAMD_HTML_FLAG_IMAGE_DATA);
+                               html_process_data_image(pool, img, *href_value);
+                               hc->flags |= RSPAMD_HTML_FLAG_HAS_DATA_URLS;
                        }
                        else {
-                               if (href_value->size() > sizeof("data:") - 1 && memcmp(href_value->data(),
-                                                                                                                                          "data:", sizeof("data:") - 1) == 0) {
-                                       /* We have an embedded image in HTML tag */
-                                       img->flags |=
-                                               (RSPAMD_HTML_FLAG_IMAGE_EMBEDDED | RSPAMD_HTML_FLAG_IMAGE_DATA);
-                                       html_process_data_image(pool, img, *href_value);
-                                       hc->flags |= RSPAMD_HTML_FLAG_HAS_DATA_URLS;
-                               }
-                               else {
-                                       img->flags |= RSPAMD_HTML_FLAG_IMAGE_EXTERNAL;
-                                       if (img->src) {
-
-                                               std::string_view cpy{*href_value};
-                                               auto maybe_url = html_process_url(pool, cpy);
-
-                                               if (maybe_url) {
-                                                       img->url = maybe_url.value();
-                                                       struct rspamd_url *existing;
-
-                                                       img->url->flags |= RSPAMD_URL_FLAG_IMAGE;
-                                                       existing = rspamd_url_set_add_or_return(url_set,
-                                                                                                                                       img->url);
-
-                                                       if (existing && existing != img->url) {
-                                                               /*
-                                                                * We have some other URL that could be
-                                                                * found, e.g. from another part. However,
-                                                                * we still want to set an image flag on it
-                                                                */
-                                                               existing->flags |= img->url->flags;
-                                                               existing->count++;
-                                                       }
-                                                       else if (part_urls) {
-                                                               /* New url */
-                                                               g_ptr_array_add(part_urls, img->url);
-                                                       }
+                               img->flags |= RSPAMD_HTML_FLAG_IMAGE_EXTERNAL;
+                               if (img->src) {
+
+                                       std::string_view cpy{*href_value};
+                                       auto maybe_url = html_process_url(pool, cpy);
+
+                                       if (maybe_url) {
+                                               img->url = maybe_url.value();
+                                               struct rspamd_url *existing;
+
+                                               img->url->flags |= RSPAMD_URL_FLAG_IMAGE;
+                                               existing = rspamd_url_set_add_or_return(url_set,
+                                                                                                                               img->url);
+
+                                               if (existing && existing != img->url) {
+                                                       /*
+                                                        * We have some other URL that could be
+                                                        * found, e.g. from another part. However,
+                                                        * we still want to set an image flag on it
+                                                        */
+                                                       existing->flags |= img->url->flags;
+                                                       existing->count++;
+                                               }
+                                               else if (part_urls) {
+                                                       /* New url */
+                                                       g_ptr_array_add(part_urls, img->url);
                                                }
                                        }
                                }
index a6b366a91372ccd8490365aae8c7e15f387420e4..5948b91bf0ea134ddb81448a25a8463c1fe83ef3 100644 (file)
@@ -36,7 +36,7 @@ namespace rspamd::html {
 struct html_content; /* Forward declaration */
 
 // Internal enum for mapping (not exposed in public API)
-enum class html_component_type : std::uint8_t {
+enum class html_component_enum_type : std::uint8_t {
        RSPAMD_HTML_COMPONENT_NAME = 0,
        RSPAMD_HTML_COMPONENT_HREF,
        RSPAMD_HTML_COMPONENT_COLOR,
@@ -50,6 +50,72 @@ enum class html_component_type : std::uint8_t {
        RSPAMD_HTML_COMPONENT_ALT,
        RSPAMD_HTML_COMPONENT_ID,
        RSPAMD_HTML_COMPONENT_HIDDEN,
+       // Typography
+       RSPAMD_HTML_COMPONENT_FONT_FAMILY,
+       RSPAMD_HTML_COMPONENT_FONT_SIZE,
+       RSPAMD_HTML_COMPONENT_FONT_WEIGHT,
+       RSPAMD_HTML_COMPONENT_FONT_STYLE,
+       RSPAMD_HTML_COMPONENT_TEXT_ALIGN,
+       RSPAMD_HTML_COMPONENT_TEXT_DECORATION,
+       RSPAMD_HTML_COMPONENT_LINE_HEIGHT,
+       // Layout & positioning
+       RSPAMD_HTML_COMPONENT_MARGIN,
+       RSPAMD_HTML_COMPONENT_MARGIN_TOP,
+       RSPAMD_HTML_COMPONENT_MARGIN_BOTTOM,
+       RSPAMD_HTML_COMPONENT_MARGIN_LEFT,
+       RSPAMD_HTML_COMPONENT_MARGIN_RIGHT,
+       RSPAMD_HTML_COMPONENT_PADDING,
+       RSPAMD_HTML_COMPONENT_PADDING_TOP,
+       RSPAMD_HTML_COMPONENT_PADDING_BOTTOM,
+       RSPAMD_HTML_COMPONENT_PADDING_LEFT,
+       RSPAMD_HTML_COMPONENT_PADDING_RIGHT,
+       RSPAMD_HTML_COMPONENT_BORDER,
+       RSPAMD_HTML_COMPONENT_BORDER_COLOR,
+       RSPAMD_HTML_COMPONENT_BORDER_WIDTH,
+       RSPAMD_HTML_COMPONENT_BORDER_STYLE,
+       // Display & visibility
+       RSPAMD_HTML_COMPONENT_DISPLAY,
+       RSPAMD_HTML_COMPONENT_VISIBILITY,
+       RSPAMD_HTML_COMPONENT_OPACITY,
+       // Dimensions
+       RSPAMD_HTML_COMPONENT_MIN_WIDTH,
+       RSPAMD_HTML_COMPONENT_MAX_WIDTH,
+       RSPAMD_HTML_COMPONENT_MIN_HEIGHT,
+       RSPAMD_HTML_COMPONENT_MAX_HEIGHT,
+       // Table attributes
+       RSPAMD_HTML_COMPONENT_CELLPADDING,
+       RSPAMD_HTML_COMPONENT_CELLSPACING,
+       RSPAMD_HTML_COMPONENT_VALIGN,
+       RSPAMD_HTML_COMPONENT_ALIGN,
+       // Form attributes
+       RSPAMD_HTML_COMPONENT_TYPE,
+       RSPAMD_HTML_COMPONENT_VALUE,
+       RSPAMD_HTML_COMPONENT_PLACEHOLDER,
+       RSPAMD_HTML_COMPONENT_DISABLED,
+       RSPAMD_HTML_COMPONENT_READONLY,
+       RSPAMD_HTML_COMPONENT_CHECKED,
+       RSPAMD_HTML_COMPONENT_SELECTED,
+       // Link & media
+       RSPAMD_HTML_COMPONENT_TARGET,
+       RSPAMD_HTML_COMPONENT_TITLE,
+       RSPAMD_HTML_COMPONENT_SRC,
+       // Meta & document
+       RSPAMD_HTML_COMPONENT_CHARSET,
+       RSPAMD_HTML_COMPONENT_CONTENT,
+       RSPAMD_HTML_COMPONENT_HTTP_EQUIV,
+       // Accessibility
+       RSPAMD_HTML_COMPONENT_ROLE,
+       RSPAMD_HTML_COMPONENT_TABINDEX,
+       // Background
+       RSPAMD_HTML_COMPONENT_BACKGROUND,
+       RSPAMD_HTML_COMPONENT_BACKGROUND_IMAGE,
+       RSPAMD_HTML_COMPONENT_BACKGROUND_COLOR,
+       RSPAMD_HTML_COMPONENT_BACKGROUND_REPEAT,
+       RSPAMD_HTML_COMPONENT_BACKGROUND_POSITION,
+       // Email-specific tracking
+       RSPAMD_HTML_COMPONENT_DATA_TRACK,
+       RSPAMD_HTML_COMPONENT_DATA_ID,
+       RSPAMD_HTML_COMPONENT_DATA_URL,
 };
 
 // Forward declarations for component types
@@ -71,25 +137,704 @@ struct html_component_unknown;
 // Base interface for all components
 struct html_component_base {
        virtual ~html_component_base() = default;
-       virtual std::string_view get_string_value() const = 0;
+       virtual constexpr std::string_view get_string_value() const = 0;
 };
 
 // String-based components
 struct html_component_name : html_component_base {
        std::string_view value;
-       explicit html_component_name(std::string_view v)
+       explicit constexpr html_component_name(std::string_view v)
                : value(v)
        {
        }
+       constexpr std::string_view get_string_value() const override
+       {
+               return value;
+       }
+};
+
+struct html_component_href : html_component_base {
+       std::string_view value;
+       explicit constexpr html_component_href(std::string_view v)
+               : value(v)
+       {
+       }
+       constexpr std::string_view get_string_value() const override
+       {
+               return value;
+       }
+};
+
+struct html_component_style : html_component_base {
+       std::string_view value;
+       explicit constexpr html_component_style(std::string_view v)
+               : value(v)
+       {
+       }
+       constexpr std::string_view get_string_value() const override
+       {
+               return value;
+       }
+};
+
+struct html_component_class : html_component_base {
+       std::string_view value;
+       explicit constexpr html_component_class(std::string_view v)
+               : value(v)
+       {
+       }
+       constexpr std::string_view get_string_value() const override
+       {
+               return value;
+       }
+};
+
+struct html_component_rel : html_component_base {
+       std::string_view value;
+       explicit constexpr html_component_rel(std::string_view v)
+               : value(v)
+       {
+       }
+       constexpr std::string_view get_string_value() const override
+       {
+               return value;
+       }
+};
+
+struct html_component_alt : html_component_base {
+       std::string_view value;
+       explicit constexpr html_component_alt(std::string_view v)
+               : value(v)
+       {
+       }
+       constexpr std::string_view get_string_value() const override
+       {
+               return value;
+       }
+};
+
+struct html_component_id : html_component_base {
+       std::string_view value;
+       explicit constexpr html_component_id(std::string_view v)
+               : value(v)
+       {
+       }
+       constexpr std::string_view get_string_value() const override
+       {
+               return value;
+       }
+};
+
+// Color components (could be extended to parse actual colors)
+struct html_component_color : html_component_base {
+       std::string_view value;
+       explicit constexpr html_component_color(std::string_view v)
+               : value(v)
+       {
+       }
+       constexpr std::string_view get_string_value() const override
+       {
+               return value;
+       }
+};
+
+struct html_component_bgcolor : html_component_base {
+       std::string_view value;
+       explicit constexpr html_component_bgcolor(std::string_view v)
+               : value(v)
+       {
+       }
+       constexpr std::string_view get_string_value() const override
+       {
+               return value;
+       }
+};
+
+// Numeric components
+struct html_component_width : html_component_base {
+       std::string_view raw_value;
+       std::optional<std::uint32_t> numeric_value;
+
+       explicit html_component_width(const std::string_view v)
+               : raw_value(v)
+       {
+               unsigned long val;
+               if (rspamd_strtoul(v.data(), v.size(), &val)) {
+                       numeric_value = static_cast<std::uint32_t>(val);
+               }
+       }
+
+       constexpr std::string_view get_string_value() const override
+       {
+               return raw_value;
+       }
+       constexpr std::optional<std::uint32_t> get_numeric_value() const
+       {
+               return numeric_value;
+       }
+};
+
+struct html_component_height : html_component_base {
+       std::string_view raw_value;
+       std::optional<std::uint32_t> numeric_value;
+
+       explicit html_component_height(const std::string_view v)
+               : raw_value(v)
+       {
+               unsigned long val;
+               if (rspamd_strtoul(v.data(), v.size(), &val)) {
+                       numeric_value = static_cast<std::uint32_t>(val);
+               }
+       }
+
+       constexpr std::string_view get_string_value() const override
+       {
+               return raw_value;
+       }
+       constexpr std::optional<std::uint32_t> get_numeric_value() const
+       {
+               return numeric_value;
+       }
+};
+
+struct html_component_size : html_component_base {
+       std::string_view raw_value;
+       std::optional<std::uint32_t> numeric_value;
+
+       explicit html_component_size(std::string_view v)
+               : raw_value(v)
+       {
+               unsigned long val;
+               if (rspamd_strtoul(v.data(), v.size(), &val)) {
+                       numeric_value = static_cast<std::uint32_t>(val);
+               }
+       }
+
+       constexpr std::string_view get_string_value() const override
+       {
+               return raw_value;
+       }
+       constexpr std::optional<std::uint32_t> get_numeric_value() const
+       {
+               return numeric_value;
+       }
+};
+
+// Boolean/flag component
+struct html_component_hidden : html_component_base {
+       bool present;
+       explicit constexpr html_component_hidden()
+               : present(true)
+       {
+       }
+       constexpr std::string_view get_string_value() const override
+       {
+               return present ? "true" : "false";
+       }
+       constexpr bool is_present() const
+       {
+               return present;
+       }
+};
+
+// Unknown component with both name and value
+struct html_component_unknown : html_component_base {
+       std::string_view name;
+       std::string_view value;
+
+       constexpr html_component_unknown(std::string_view n, std::string_view v)
+               : name(n), value(v)
+       {
+       }
+       constexpr std::string_view get_string_value() const override
+       {
+               return value;
+       }
+       constexpr std::string_view get_name() const
+       {
+               return name;
+       }
+};
+
+// Typography components
+struct html_component_font_family : html_component_base {
+       std::string_view value;
+       explicit constexpr html_component_font_family(std::string_view v)
+               : value(v)
+       {
+       }
+       constexpr std::string_view get_string_value() const override
+       {
+               return value;
+       }
+};
+
+struct html_component_font_size : html_component_base {
+       std::string_view raw_value;
+       std::optional<std::uint32_t> numeric_value;
+
+       explicit html_component_font_size(std::string_view v)
+               : raw_value(v)
+       {
+               unsigned long val;
+               if (rspamd_strtoul(v.data(), v.size(), &val)) {
+                       numeric_value = static_cast<std::uint32_t>(val);
+               }
+       }
+
+       constexpr std::string_view get_string_value() const override
+       {
+               return raw_value;
+       }
+       constexpr std::optional<std::uint32_t> get_numeric_value() const
+       {
+               return numeric_value;
+       }
+};
+
+struct html_component_font_weight : html_component_base {
+       std::string_view value;
+       explicit constexpr html_component_font_weight(std::string_view v)
+               : value(v)
+       {
+       }
+       constexpr std::string_view get_string_value() const override
+       {
+               return value;
+       }
+};
+
+struct html_component_font_style : html_component_base {
+       std::string_view value;
+       explicit constexpr html_component_font_style(std::string_view v)
+               : value(v)
+       {
+       }
+       constexpr std::string_view get_string_value() const override
+       {
+               return value;
+       }
+};
+
+struct html_component_text_align : html_component_base {
+       std::string_view value;
+       explicit constexpr html_component_text_align(std::string_view v)
+               : value(v)
+       {
+       }
+       constexpr std::string_view get_string_value() const override
+       {
+               return value;
+       }
+};
+
+struct html_component_text_decoration : html_component_base {
+       std::string_view value;
+       explicit constexpr html_component_text_decoration(std::string_view v)
+               : value(v)
+       {
+       }
+       constexpr std::string_view get_string_value() const override
+       {
+               return value;
+       }
+};
+
+struct html_component_line_height : html_component_base {
+       std::string_view raw_value;
+       std::optional<std::uint32_t> numeric_value;
+
+       explicit html_component_line_height(std::string_view v)
+               : raw_value(v)
+       {
+               unsigned long val;
+               if (rspamd_strtoul(v.data(), v.size(), &val)) {
+                       numeric_value = static_cast<std::uint32_t>(val);
+               }
+       }
+
+       std::string_view get_string_value() const override
+       {
+               return raw_value;
+       }
+       std::optional<std::uint32_t> get_numeric_value() const
+       {
+               return numeric_value;
+       }
+};
+
+// Layout components (most are string-based for flexibility)
+struct html_component_margin : html_component_base {
+       std::string_view value;
+       explicit constexpr html_component_margin(std::string_view v)
+               : value(v)
+       {
+       }
+       constexpr std::string_view get_string_value() const override
+       {
+               return value;
+       }
+};
+
+struct html_component_margin_top : html_component_base {
+       std::string_view value;
+       explicit constexpr html_component_margin_top(std::string_view v)
+               : value(v)
+       {
+       }
+       constexpr std::string_view get_string_value() const override
+       {
+               return value;
+       }
+};
+
+struct html_component_margin_bottom : html_component_base {
+       std::string_view value;
+       explicit constexpr html_component_margin_bottom(std::string_view v)
+               : value(v)
+       {
+       }
+       constexpr std::string_view get_string_value() const override
+       {
+               return value;
+       }
+};
+
+struct html_component_margin_left : html_component_base {
+       std::string_view value;
+       explicit constexpr html_component_margin_left(std::string_view v)
+               : value(v)
+       {
+       }
+       constexpr std::string_view get_string_value() const override
+       {
+               return value;
+       }
+};
+
+struct html_component_margin_right : html_component_base {
+       std::string_view value;
+       explicit constexpr html_component_margin_right(std::string_view v)
+               : value(v)
+       {
+       }
+       constexpr std::string_view get_string_value() const override
+       {
+               return value;
+       }
+};
+
+struct html_component_padding : html_component_base {
+       std::string_view value;
+       explicit constexpr html_component_padding(std::string_view v)
+               : value(v)
+       {
+       }
+       constexpr std::string_view get_string_value() const override
+       {
+               return value;
+       }
+};
+
+struct html_component_padding_top : html_component_base {
+       std::string_view value;
+       explicit constexpr html_component_padding_top(std::string_view v)
+               : value(v)
+       {
+       }
+       constexpr std::string_view get_string_value() const override
+       {
+               return value;
+       }
+};
+
+struct html_component_padding_bottom : html_component_base {
+       std::string_view value;
+       explicit constexpr html_component_padding_bottom(std::string_view v)
+               : value(v)
+       {
+       }
+       constexpr std::string_view get_string_value() const override
+       {
+               return value;
+       }
+};
+
+struct html_component_padding_left : html_component_base {
+       std::string_view value;
+       explicit constexpr html_component_padding_left(std::string_view v)
+               : value(v)
+       {
+       }
+       constexpr std::string_view get_string_value() const override
+       {
+               return value;
+       }
+};
+
+struct html_component_padding_right : html_component_base {
+       std::string_view value;
+       explicit constexpr html_component_padding_right(std::string_view v)
+               : value(v)
+       {
+       }
+       constexpr std::string_view get_string_value() const override
+       {
+               return value;
+       }
+};
+
+struct html_component_border : html_component_base {
+       std::string_view value;
+       explicit html_component_border(std::string_view v)
+               : value(v)
+       {
+       }
+       std::string_view get_string_value() const override
+       {
+               return value;
+       }
+};
+
+struct html_component_border_color : html_component_base {
+       std::string_view value;
+       explicit html_component_border_color(std::string_view v)
+               : value(v)
+       {
+       }
+       std::string_view get_string_value() const override
+       {
+               return value;
+       }
+};
+
+struct html_component_border_width : html_component_base {
+       std::string_view raw_value;
+       std::optional<std::uint32_t> numeric_value;
+
+       explicit html_component_border_width(std::string_view v)
+               : raw_value(v)
+       {
+               unsigned long val;
+               if (rspamd_strtoul(v.data(), v.size(), &val)) {
+                       numeric_value = static_cast<std::uint32_t>(val);
+               }
+       }
+
+       std::string_view get_string_value() const override
+       {
+               return raw_value;
+       }
+       std::optional<std::uint32_t> get_numeric_value() const
+       {
+               return numeric_value;
+       }
+};
+
+struct html_component_border_style : html_component_base {
+       std::string_view value;
+       explicit html_component_border_style(std::string_view v)
+               : value(v)
+       {
+       }
+       std::string_view get_string_value() const override
+       {
+               return value;
+       }
+};
+
+// Display components
+struct html_component_display : html_component_base {
+       std::string_view value;
+       explicit html_component_display(std::string_view v)
+               : value(v)
+       {
+       }
+       std::string_view get_string_value() const override
+       {
+               return value;
+       }
+};
+
+struct html_component_visibility : html_component_base {
+       std::string_view value;
+       explicit html_component_visibility(std::string_view v)
+               : value(v)
+       {
+       }
+       std::string_view get_string_value() const override
+       {
+               return value;
+       }
+};
+
+struct html_component_opacity : html_component_base {
+       std::string_view raw_value;
+       std::optional<float> numeric_value;
+
+       explicit html_component_opacity(std::string_view v)
+               : raw_value(v)
+       {
+               char *endptr;
+               auto val = std::strtof(v.data(), &endptr);
+               if (endptr != v.data() && val >= 0.0f && val <= 1.0f) {
+                       numeric_value = val;
+               }
+       }
+
+       std::string_view get_string_value() const override
+       {
+               return raw_value;
+       }
+       std::optional<float> get_numeric_value() const
+       {
+               return numeric_value;
+       }
+};
+
+// Additional dimension components
+struct html_component_min_width : html_component_base {
+       std::string_view raw_value;
+       std::optional<std::uint32_t> numeric_value;
+
+       explicit html_component_min_width(std::string_view v)
+               : raw_value(v)
+       {
+               unsigned long val;
+               if (rspamd_strtoul(v.data(), v.size(), &val)) {
+                       numeric_value = static_cast<std::uint32_t>(val);
+               }
+       }
+
+       std::string_view get_string_value() const override
+       {
+               return raw_value;
+       }
+       std::optional<std::uint32_t> get_numeric_value() const
+       {
+               return numeric_value;
+       }
+};
+
+struct html_component_max_width : html_component_base {
+       std::string_view raw_value;
+       std::optional<std::uint32_t> numeric_value;
+
+       explicit html_component_max_width(std::string_view v)
+               : raw_value(v)
+       {
+               unsigned long val;
+               if (rspamd_strtoul(v.data(), v.size(), &val)) {
+                       numeric_value = static_cast<std::uint32_t>(val);
+               }
+       }
+
+       std::string_view get_string_value() const override
+       {
+               return raw_value;
+       }
+       std::optional<std::uint32_t> get_numeric_value() const
+       {
+               return numeric_value;
+       }
+};
+
+struct html_component_min_height : html_component_base {
+       std::string_view raw_value;
+       std::optional<std::uint32_t> numeric_value;
+
+       explicit html_component_min_height(std::string_view v)
+               : raw_value(v)
+       {
+               unsigned long val;
+               if (rspamd_strtoul(v.data(), v.size(), &val)) {
+                       numeric_value = static_cast<std::uint32_t>(val);
+               }
+       }
+
+       std::string_view get_string_value() const override
+       {
+               return raw_value;
+       }
+       std::optional<std::uint32_t> get_numeric_value() const
+       {
+               return numeric_value;
+       }
+};
+
+struct html_component_max_height : html_component_base {
+       std::string_view raw_value;
+       std::optional<std::uint32_t> numeric_value;
+
+       explicit html_component_max_height(std::string_view v)
+               : raw_value(v)
+       {
+               unsigned long val;
+               if (rspamd_strtoul(v.data(), v.size(), &val)) {
+                       numeric_value = static_cast<std::uint32_t>(val);
+               }
+       }
+
        std::string_view get_string_value() const override
        {
-               return value;
+               return raw_value;
+       }
+       std::optional<std::uint32_t> get_numeric_value() const
+       {
+               return numeric_value;
        }
 };
 
-struct html_component_href : html_component_base {
+// Table components
+struct html_component_cellpadding : html_component_base {
+       std::string_view raw_value;
+       std::optional<std::uint32_t> numeric_value;
+
+       explicit html_component_cellpadding(std::string_view v)
+               : raw_value(v)
+       {
+               unsigned long val;
+               if (rspamd_strtoul(v.data(), v.size(), &val)) {
+                       numeric_value = static_cast<std::uint32_t>(val);
+               }
+       }
+
+       std::string_view get_string_value() const override
+       {
+               return raw_value;
+       }
+       std::optional<std::uint32_t> get_numeric_value() const
+       {
+               return numeric_value;
+       }
+};
+
+struct html_component_cellspacing : html_component_base {
+       std::string_view raw_value;
+       std::optional<std::uint32_t> numeric_value;
+
+       explicit html_component_cellspacing(std::string_view v)
+               : raw_value(v)
+       {
+               unsigned long val;
+               if (rspamd_strtoul(v.data(), v.size(), &val)) {
+                       numeric_value = static_cast<std::uint32_t>(val);
+               }
+       }
+
+       std::string_view get_string_value() const override
+       {
+               return raw_value;
+       }
+       std::optional<std::uint32_t> get_numeric_value() const
+       {
+               return numeric_value;
+       }
+};
+
+struct html_component_valign : html_component_base {
        std::string_view value;
-       explicit html_component_href(std::string_view v)
+       explicit html_component_valign(std::string_view v)
                : value(v)
        {
        }
@@ -99,9 +844,9 @@ struct html_component_href : html_component_base {
        }
 };
 
-struct html_component_style : html_component_base {
+struct html_component_align : html_component_base {
        std::string_view value;
-       explicit html_component_style(std::string_view v)
+       explicit html_component_align(std::string_view v)
                : value(v)
        {
        }
@@ -111,9 +856,10 @@ struct html_component_style : html_component_base {
        }
 };
 
-struct html_component_class : html_component_base {
+// Form components
+struct html_component_type : html_component_base {
        std::string_view value;
-       explicit html_component_class(std::string_view v)
+       explicit html_component_type(std::string_view v)
                : value(v)
        {
        }
@@ -123,9 +869,9 @@ struct html_component_class : html_component_base {
        }
 };
 
-struct html_component_rel : html_component_base {
+struct html_component_value : html_component_base {
        std::string_view value;
-       explicit html_component_rel(std::string_view v)
+       explicit html_component_value(std::string_view v)
                : value(v)
        {
        }
@@ -135,9 +881,9 @@ struct html_component_rel : html_component_base {
        }
 };
 
-struct html_component_alt : html_component_base {
+struct html_component_placeholder : html_component_base {
        std::string_view value;
-       explicit html_component_alt(std::string_view v)
+       explicit html_component_placeholder(std::string_view v)
                : value(v)
        {
        }
@@ -147,9 +893,75 @@ struct html_component_alt : html_component_base {
        }
 };
 
-struct html_component_id : html_component_base {
+// Boolean form components
+struct html_component_disabled : html_component_base {
+       bool present;
+       explicit constexpr html_component_disabled()
+               : present(true)
+       {
+       }
+       constexpr std::string_view get_string_value() const override
+       {
+               return present ? "true" : "false";
+       }
+       constexpr bool is_present() const
+       {
+               return present;
+       }
+};
+
+struct html_component_readonly : html_component_base {
+       bool present;
+       explicit constexpr html_component_readonly()
+               : present(true)
+       {
+       }
+       constexpr std::string_view get_string_value() const override
+       {
+               return present ? "true" : "false";
+       }
+       constexpr bool is_present() const
+       {
+               return present;
+       }
+};
+
+struct html_component_checked : html_component_base {
+       bool present;
+       explicit constexpr html_component_checked()
+               : present(true)
+       {
+       }
+       constexpr std::string_view get_string_value() const override
+       {
+               return present ? "true" : "false";
+       }
+       constexpr bool is_present() const
+       {
+               return present;
+       }
+};
+
+struct html_component_selected : html_component_base {
+       bool present;
+       explicit constexpr html_component_selected()
+               : present(true)
+       {
+       }
+       constexpr std::string_view get_string_value() const override
+       {
+               return present ? "true" : "false";
+       }
+       constexpr bool is_present() const
+       {
+               return present;
+       }
+};
+
+// Link & media components
+struct html_component_target : html_component_base {
        std::string_view value;
-       explicit html_component_id(std::string_view v)
+       explicit html_component_target(std::string_view v)
                : value(v)
        {
        }
@@ -159,10 +971,9 @@ struct html_component_id : html_component_base {
        }
 };
 
-// Color components (could be extended to parse actual colors)
-struct html_component_color : html_component_base {
+struct html_component_title : html_component_base {
        std::string_view value;
-       explicit html_component_color(std::string_view v)
+       explicit html_component_title(std::string_view v)
                : value(v)
        {
        }
@@ -172,9 +983,9 @@ struct html_component_color : html_component_base {
        }
 };
 
-struct html_component_bgcolor : html_component_base {
+struct html_component_src : html_component_base {
        std::string_view value;
-       explicit html_component_bgcolor(std::string_view v)
+       explicit html_component_src(std::string_view v)
                : value(v)
        {
        }
@@ -184,40 +995,66 @@ struct html_component_bgcolor : html_component_base {
        }
 };
 
-// Numeric components
-struct html_component_width : html_component_base {
-       std::string_view raw_value;
-       std::optional<std::uint32_t> numeric_value;
+// Meta components
+struct html_component_charset : html_component_base {
+       std::string_view value;
+       explicit html_component_charset(std::string_view v)
+               : value(v)
+       {
+       }
+       std::string_view get_string_value() const override
+       {
+               return value;
+       }
+};
 
-       explicit html_component_width(const std::string_view v)
-               : raw_value(v)
+struct html_component_content : html_component_base {
+       std::string_view value;
+       explicit html_component_content(std::string_view v)
+               : value(v)
        {
-               unsigned long val;
-               if (rspamd_strtoul(v.data(), v.size(), &val)) {
-                       numeric_value = static_cast<std::uint32_t>(val);
-               }
        }
+       std::string_view get_string_value() const override
+       {
+               return value;
+       }
+};
 
+struct html_component_http_equiv : html_component_base {
+       std::string_view value;
+       explicit html_component_http_equiv(std::string_view v)
+               : value(v)
+       {
+       }
        std::string_view get_string_value() const override
        {
-               return raw_value;
+               return value;
        }
-       std::optional<std::uint32_t> get_numeric_value() const
+};
+
+// Accessibility components
+struct html_component_role : html_component_base {
+       std::string_view value;
+       explicit html_component_role(std::string_view v)
+               : value(v)
        {
-               return numeric_value;
+       }
+       std::string_view get_string_value() const override
+       {
+               return value;
        }
 };
 
-struct html_component_height : html_component_base {
+struct html_component_tabindex : html_component_base {
        std::string_view raw_value;
-       std::optional<std::uint32_t> numeric_value;
+       std::optional<std::int32_t> numeric_value;
 
-       explicit html_component_height(const std::string_view v)
+       explicit html_component_tabindex(std::string_view v)
                : raw_value(v)
        {
-               unsigned long val;
-               if (rspamd_strtoul(v.data(), v.size(), &val)) {
-                       numeric_value = static_cast<std::uint32_t>(val);
+               long val;
+               if (rspamd_strtol(v.data(), v.size(), &val)) {
+                       numeric_value = static_cast<std::int32_t>(val);
                }
        }
 
@@ -225,68 +1062,107 @@ struct html_component_height : html_component_base {
        {
                return raw_value;
        }
-       std::optional<std::uint32_t> get_numeric_value() const
+       std::optional<std::int32_t> get_numeric_value() const
        {
                return numeric_value;
        }
 };
 
-struct html_component_size : html_component_base {
-       std::string_view raw_value;
-       std::optional<std::uint32_t> numeric_value;
-
-       explicit html_component_size(std::string_view v)
-               : raw_value(v)
+// Background components
+struct html_component_background : html_component_base {
+       std::string_view value;
+       explicit html_component_background(std::string_view v)
+               : value(v)
        {
-               unsigned long val;
-               if (rspamd_strtoul(v.data(), v.size(), &val)) {
-                       numeric_value = static_cast<std::uint32_t>(val);
-               }
        }
+       std::string_view get_string_value() const override
+       {
+               return value;
+       }
+};
 
+struct html_component_background_image : html_component_base {
+       std::string_view value;
+       explicit html_component_background_image(std::string_view v)
+               : value(v)
+       {
+       }
        std::string_view get_string_value() const override
        {
-               return raw_value;
+               return value;
        }
-       std::optional<std::uint32_t> get_numeric_value() const
+};
+
+struct html_component_background_color : html_component_base {
+       std::string_view value;
+       explicit html_component_background_color(std::string_view v)
+               : value(v)
        {
-               return numeric_value;
+       }
+       std::string_view get_string_value() const override
+       {
+               return value;
        }
 };
 
-// Boolean/flag component
-struct html_component_hidden : html_component_base {
-       bool present;
-       explicit html_component_hidden()
-               : present(true)
+struct html_component_background_repeat : html_component_base {
+       std::string_view value;
+       explicit html_component_background_repeat(std::string_view v)
+               : value(v)
        {
        }
        std::string_view get_string_value() const override
        {
-               return present ? "true" : "false";
+               return value;
+       }
+};
+
+struct html_component_background_position : html_component_base {
+       std::string_view value;
+       explicit html_component_background_position(std::string_view v)
+               : value(v)
+       {
        }
-       bool is_present() const
+       std::string_view get_string_value() const override
        {
-               return present;
+               return value;
        }
 };
 
-// Unknown component with both name and value
-struct html_component_unknown : html_component_base {
-       std::string_view name;
+// Email tracking components
+struct html_component_data_track : html_component_base {
        std::string_view value;
+       explicit html_component_data_track(std::string_view v)
+               : value(v)
+       {
+       }
+       std::string_view get_string_value() const override
+       {
+               return value;
+       }
+};
 
-       html_component_unknown(std::string_view n, std::string_view v)
-               : name(n), value(v)
+struct html_component_data_id : html_component_base {
+       std::string_view value;
+       explicit html_component_data_id(std::string_view v)
+               : value(v)
        {
        }
        std::string_view get_string_value() const override
        {
                return value;
        }
-       std::string_view get_name() const
+};
+
+struct html_component_data_url : html_component_base {
+       std::string_view value;
+       explicit html_component_data_url(std::string_view v)
+               : value(v)
+       {
+       }
+       std::string_view get_string_value() const override
        {
-               return name;
+               return value;
        }
 };
 
@@ -305,6 +1181,73 @@ using html_tag_component = std::variant<
        html_component_alt,
        html_component_id,
        html_component_hidden,
+       // Typography
+       html_component_font_family,
+       html_component_font_size,
+       html_component_font_weight,
+       html_component_font_style,
+       html_component_text_align,
+       html_component_text_decoration,
+       html_component_line_height,
+       // Layout
+       html_component_margin,
+       html_component_margin_top,
+       html_component_margin_bottom,
+       html_component_margin_left,
+       html_component_margin_right,
+       html_component_padding,
+       html_component_padding_top,
+       html_component_padding_bottom,
+       html_component_padding_left,
+       html_component_padding_right,
+       html_component_border,
+       html_component_border_color,
+       html_component_border_width,
+       html_component_border_style,
+       // Display
+       html_component_display,
+       html_component_visibility,
+       html_component_opacity,
+       // Dimensions
+       html_component_min_width,
+       html_component_max_width,
+       html_component_min_height,
+       html_component_max_height,
+       // Table
+       html_component_cellpadding,
+       html_component_cellspacing,
+       html_component_valign,
+       html_component_align,
+       // Form
+       html_component_type,
+       html_component_value,
+       html_component_placeholder,
+       html_component_disabled,
+       html_component_readonly,
+       html_component_checked,
+       html_component_selected,
+       // Link & media
+       html_component_target,
+       html_component_title,
+       html_component_src,
+       // Meta
+       html_component_charset,
+       html_component_content,
+       html_component_http_equiv,
+       // Accessibility
+       html_component_role,
+       html_component_tabindex,
+       // Background
+       html_component_background,
+       html_component_background_image,
+       html_component_background_color,
+       html_component_background_repeat,
+       html_component_background_position,
+       // Email tracking
+       html_component_data_track,
+       html_component_data_id,
+       html_component_data_url,
+       // Unknown
        html_component_unknown>;
 
 /**
@@ -356,7 +1299,7 @@ struct html_tag {
 
        // Template method to find component by type
        template<typename T>
-       auto find_component() const -> std::optional<const T *>
+       constexpr auto find_component() const -> std::optional<const T *>
        {
                for (const auto &comp: components) {
                        if (std::holds_alternative<T>(comp)) {
@@ -367,7 +1310,7 @@ struct html_tag {
        }
 
        // Helper methods for common component access
-       auto find_href() const -> std::optional<std::string_view>
+       constexpr auto find_href() const -> std::optional<std::string_view>
        {
                if (auto comp = find_component<html_component_href>()) {
                        return comp.value()->value;
@@ -375,7 +1318,7 @@ struct html_tag {
                return std::nullopt;
        }
 
-       auto find_class() const -> std::optional<std::string_view>
+       constexpr auto find_class() const -> std::optional<std::string_view>
        {
                if (auto comp = find_component<html_component_class>()) {
                        return comp.value()->value;
@@ -383,7 +1326,7 @@ struct html_tag {
                return std::nullopt;
        }
 
-       auto find_id() const -> std::optional<std::string_view>
+       constexpr auto find_id() const -> std::optional<std::string_view>
        {
                if (auto comp = find_component<html_component_id>()) {
                        return comp.value()->value;
@@ -391,7 +1334,7 @@ struct html_tag {
                return std::nullopt;
        }
 
-       auto find_width() const -> std::optional<std::uint32_t>
+       constexpr auto find_width() const -> std::optional<std::uint32_t>
        {
                if (auto comp = find_component<html_component_width>()) {
                        return comp.value()->get_numeric_value();
@@ -399,7 +1342,7 @@ struct html_tag {
                return std::nullopt;
        }
 
-       auto find_height() const -> std::optional<std::uint32_t>
+       constexpr auto find_height() const -> std::optional<std::uint32_t>
        {
                if (auto comp = find_component<html_component_height>()) {
                        return comp.value()->get_numeric_value();
@@ -407,7 +1350,7 @@ struct html_tag {
                return std::nullopt;
        }
 
-       auto find_style() const -> std::optional<std::string_view>
+       constexpr auto find_style() const -> std::optional<std::string_view>
        {
                if (auto comp = find_component<html_component_style>()) {
                        return comp.value()->value;
@@ -415,7 +1358,7 @@ struct html_tag {
                return std::nullopt;
        }
 
-       auto find_alt() const -> std::optional<std::string_view>
+       constexpr auto find_alt() const -> std::optional<std::string_view>
        {
                if (auto comp = find_component<html_component_alt>()) {
                        return comp.value()->value;
@@ -423,7 +1366,7 @@ struct html_tag {
                return std::nullopt;
        }
 
-       auto find_rel() const -> std::optional<std::string_view>
+       constexpr auto find_rel() const -> std::optional<std::string_view>
        {
                if (auto comp = find_component<html_component_rel>()) {
                        return comp.value()->value;
@@ -431,12 +1374,12 @@ struct html_tag {
                return std::nullopt;
        }
 
-       auto is_hidden() const -> bool
+       constexpr auto is_hidden() const -> bool
        {
                return find_component<html_component_hidden>().has_value();
        }
 
-       auto find_unknown_component(std::string_view attr_name) const -> std::optional<std::string_view>
+       constexpr auto find_unknown_component(std::string_view attr_name) const -> std::optional<std::string_view>
        {
                for (const auto &comp: components) {
                        if (std::holds_alternative<html_component_unknown>(comp)) {
@@ -449,7 +1392,7 @@ struct html_tag {
                return std::nullopt;
        }
 
-       auto get_unknown_components() const -> std::vector<std::pair<std::string_view, std::string_view>>
+       constexpr auto get_unknown_components() const -> std::vector<std::pair<std::string_view, std::string_view>>
        {
                std::vector<std::pair<std::string_view, std::string_view>> unknown_attrs;
                for (const auto &comp: components) {
@@ -470,57 +1413,8 @@ struct html_tag {
                }
        }
 
-       // Find any component by attribute name (for Lua bindings and generic access)
-       auto find_component_by_name(std::string_view attr_name) const -> std::optional<std::string_view>
-       {
-               // Check known component types first using their helper methods
-               if (attr_name == "href") return find_href();
-               if (attr_name == "class") return find_class();
-               if (attr_name == "id") return find_id();
-               if (attr_name == "style") return find_style();
-               if (attr_name == "alt") return find_alt();
-               if (attr_name == "rel") return find_rel();
-               if (attr_name == "hidden") return is_hidden() ? std::optional<std::string_view>{"true"} : std::nullopt;
-
-               // Handle numeric components that need string conversion
-               if (attr_name == "width") {
-                       if (auto comp = find_component<html_component_width>()) {
-                               return comp.value()->get_string_value();
-                       }
-               }
-               if (attr_name == "height") {
-                       if (auto comp = find_component<html_component_height>()) {
-                               return comp.value()->get_string_value();
-                       }
-               }
-               if (attr_name == "size") {
-                       if (auto comp = find_component<html_component_size>()) {
-                               return comp.value()->get_string_value();
-                       }
-               }
-
-               // Handle color components
-               if (attr_name == "color") {
-                       if (auto comp = find_component<html_component_color>()) {
-                               return comp.value()->value;
-                       }
-               }
-               if (attr_name == "bgcolor") {
-                       if (auto comp = find_component<html_component_bgcolor>()) {
-                               return comp.value()->value;
-                       }
-               }
-
-               // Handle name component
-               if (attr_name == "name") {
-                       if (auto comp = find_component<html_component_name>()) {
-                               return comp.value()->value;
-                       }
-               }
-
-               // Finally check unknown components
-               return find_unknown_component(attr_name);
-       }
+       // Find any component by attribute name
+       auto find_component_by_name(std::string_view attr_name) const -> std::optional<std::string_view>;
 
        auto clear(void) -> void
        {