return nullptr;
}
-auto html_component_from_string(const std::string_view &st) -> std::optional<html_component_type>
+auto html_component_from_string(std::string_view name, std::string_view value) -> html_tag_component
{
- auto known_component_it = html_components_map.find(st);
+ auto known_component_it = html_components_map.find(name);
if (known_component_it != html_components_map.end()) {
- return known_component_it->second;
+ switch (known_component_it->second) {
+ case html_component_type::RSPAMD_HTML_COMPONENT_NAME:
+ return html_component_name{value};
+ case html_component_type::RSPAMD_HTML_COMPONENT_HREF:
+ return html_component_href{value};
+ case html_component_type::RSPAMD_HTML_COMPONENT_COLOR:
+ return html_component_color{value};
+ case html_component_type::RSPAMD_HTML_COMPONENT_BGCOLOR:
+ return html_component_bgcolor{value};
+ case html_component_type::RSPAMD_HTML_COMPONENT_STYLE:
+ return html_component_style{value};
+ case html_component_type::RSPAMD_HTML_COMPONENT_CLASS:
+ return html_component_class{value};
+ case html_component_type::RSPAMD_HTML_COMPONENT_WIDTH:
+ return html_component_width{value};
+ case html_component_type::RSPAMD_HTML_COMPONENT_HEIGHT:
+ return html_component_height{value};
+ case html_component_type::RSPAMD_HTML_COMPONENT_SIZE:
+ return html_component_size{value};
+ case html_component_type::RSPAMD_HTML_COMPONENT_REL:
+ return html_component_rel{value};
+ case html_component_type::RSPAMD_HTML_COMPONENT_ALT:
+ return html_component_alt{value};
+ case html_component_type::RSPAMD_HTML_COMPONENT_ID:
+ return html_component_id{value};
+ case html_component_type::RSPAMD_HTML_COMPONENT_HIDDEN:
+ return html_component_hidden{};
+ default:
+ return html_component_unknown{name, value};
+ }
}
else {
- return std::nullopt;
+ return html_component_unknown{name, value};
}
}
struct tag_content_parser_state {
tag_parser_state cur_state = parse_start;
std::string buf;
- std::optional<html_component_type> cur_component;
+ std::string attr_name;// Store current attribute name
void reset()
{
cur_state = parse_start;
buf.clear();
- cur_component = std::nullopt;
+ attr_name.clear();
}
};
auto state = parser_env.cur_state;
/*
- * Stores tag component if it doesn't exist, performing copy of the
- * value + decoding of the entities
- * Parser env is set to clear the current html attribute fields (saved_p and
- * cur_component)
+ * Stores tag component creating the appropriate variant type
+ * Parser env is cleared after storing
*/
auto store_component_value = [&]() -> void {
- if (parser_env.cur_component) {
+ if (!parser_env.attr_name.empty()) {
+ std::string_view attr_name_view, value_view;
- if (parser_env.buf.empty()) {
- tag->components.emplace_back(parser_env.cur_component.value(),
- std::string_view{});
+ // Store attribute name in persistent memory
+ if (!parser_env.attr_name.empty()) {
+ auto *name_storage = rspamd_mempool_alloc_buffer(pool, parser_env.attr_name.size());
+ memcpy(name_storage, parser_env.attr_name.data(), parser_env.attr_name.size());
+ attr_name_view = {name_storage, parser_env.attr_name.size()};
}
- else {
- /* We need to copy buf to a persistent storage */
- auto *s = rspamd_mempool_alloc_buffer(pool, parser_env.buf.size());
- if (parser_env.cur_component.value() == html_component_type::RSPAMD_HTML_COMPONENT_ID ||
- parser_env.cur_component.value() == html_component_type::RSPAMD_HTML_COMPONENT_CLASS) {
- /* Lowercase */
- rspamd_str_copy_lc(parser_env.buf.data(), s, parser_env.buf.size());
+ // Store value in persistent memory if not empty
+ if (!parser_env.buf.empty()) {
+ auto *value_storage = rspamd_mempool_alloc_buffer(pool, parser_env.buf.size());
+
+ // Lowercase for id and class attributes
+ if (parser_env.attr_name == "id" || parser_env.attr_name == "class") {
+ rspamd_str_copy_lc(parser_env.buf.data(), value_storage, parser_env.buf.size());
}
else {
- memcpy(s, parser_env.buf.data(), parser_env.buf.size());
+ memcpy(value_storage, parser_env.buf.data(), parser_env.buf.size());
}
- auto sz = rspamd_html_decode_entitles_inplace(s, parser_env.buf.size());
- tag->components.emplace_back(parser_env.cur_component.value(),
- std::string_view{s, sz});
+ auto sz = rspamd_html_decode_entitles_inplace(value_storage, parser_env.buf.size());
+ value_view = {value_storage, sz};
}
+
+ // Create the appropriate component variant
+ auto component = html_component_from_string(attr_name_view, value_view);
+ tag->components.emplace_back(std::move(component));
}
parser_env.buf.clear();
- parser_env.cur_component = std::nullopt;
+ parser_env.attr_name.clear();
};
auto store_component_name = [&]() -> bool {
decode_html_entitles_inplace(parser_env.buf);
- auto known_component_it = html_components_map.find(std::string_view{parser_env.buf});
+ parser_env.attr_name = parser_env.buf;
parser_env.buf.clear();
-
- if (known_component_it != html_components_map.end()) {
- parser_env.cur_component = known_component_it->second;
-
- return true;
- }
- else {
- parser_env.cur_component = std::nullopt;
- }
-
- return false;
+ return true;
};
auto store_value_character = [&](bool lc) -> void {
struct html_tag *tag,
struct html_content *hc) -> std::optional<struct rspamd_url *>
{
- auto found_href_maybe = tag->find_component(html_component_type::RSPAMD_HTML_COMPONENT_HREF);
+ auto found_href_maybe = tag->find_href();
if (found_href_maybe) {
/* Check base url */
img = rspamd_mempool_alloc0_type(pool, struct html_image);
img->tag = tag;
- for (const auto ¶m: tag->components) {
+ // Process HREF component
+ if (auto href_value = tag->find_href()) {
+ if (href_value->size() > 0) {
+ rspamd_ftok_t fstr;
+ fstr.begin = href_value->data();
+ fstr.len = href_value->size();
+ img->src = rspamd_mempool_ftokdup(pool, &fstr);
- if (param.type == html_component_type::RSPAMD_HTML_COMPONENT_HREF) {
- /* Check base url */
- const auto &href_value = param.value;
-
- if (href_value.size() > 0) {
- rspamd_ftok_t fstr;
- fstr.begin = href_value.data();
- fstr.len = href_value.size();
- img->src = rspamd_mempool_ftokdup(pool, &fstr);
-
- if (href_value.size() > sizeof("cid:") - 1 && memcmp(href_value.data(),
- "cid:", sizeof("cid:") - 1) == 0) {
- /* We have an embedded image */
- img->src += sizeof("cid:") - 1;
- img->flags |= RSPAMD_HTML_FLAG_IMAGE_EMBEDDED;
+ if (href_value->size() > sizeof("cid:") - 1 && memcmp(href_value->data(),
+ "cid:", sizeof("cid:") - 1) == 0) {
+ /* We have an embedded image */
+ img->src += sizeof("cid:") - 1;
+ img->flags |= RSPAMD_HTML_FLAG_IMAGE_EMBEDDED;
+ }
+ else {
+ if (href_value->size() > sizeof("data:") - 1 && memcmp(href_value->data(),
+ "data:", sizeof("data:") - 1) == 0) {
+ /* We have an embedded image in HTML tag */
+ img->flags |=
+ (RSPAMD_HTML_FLAG_IMAGE_EMBEDDED | RSPAMD_HTML_FLAG_IMAGE_DATA);
+ html_process_data_image(pool, img, *href_value);
+ hc->flags |= RSPAMD_HTML_FLAG_HAS_DATA_URLS;
}
else {
- if (href_value.size() > sizeof("data:") - 1 && memcmp(href_value.data(),
- "data:", sizeof("data:") - 1) == 0) {
- /* We have an embedded image in HTML tag */
- img->flags |=
- (RSPAMD_HTML_FLAG_IMAGE_EMBEDDED | RSPAMD_HTML_FLAG_IMAGE_DATA);
- html_process_data_image(pool, img, href_value);
- hc->flags |= RSPAMD_HTML_FLAG_HAS_DATA_URLS;
- }
- else {
- img->flags |= RSPAMD_HTML_FLAG_IMAGE_EXTERNAL;
- if (img->src) {
-
- std::string_view cpy{href_value};
- auto maybe_url = html_process_url(pool, cpy);
-
- if (maybe_url) {
- img->url = maybe_url.value();
- struct rspamd_url *existing;
-
- img->url->flags |= RSPAMD_URL_FLAG_IMAGE;
- existing = rspamd_url_set_add_or_return(url_set,
- img->url);
-
- if (existing && existing != img->url) {
- /*
- * We have some other URL that could be
- * found, e.g. from another part. However,
- * we still want to set an image flag on it
- */
- existing->flags |= img->url->flags;
- existing->count++;
- }
- else if (part_urls) {
- /* New url */
- g_ptr_array_add(part_urls, img->url);
- }
+ img->flags |= RSPAMD_HTML_FLAG_IMAGE_EXTERNAL;
+ if (img->src) {
+
+ std::string_view cpy{*href_value};
+ auto maybe_url = html_process_url(pool, cpy);
+
+ if (maybe_url) {
+ img->url = maybe_url.value();
+ struct rspamd_url *existing;
+
+ img->url->flags |= RSPAMD_URL_FLAG_IMAGE;
+ existing = rspamd_url_set_add_or_return(url_set,
+ img->url);
+
+ if (existing && existing != img->url) {
+ /*
+ * We have some other URL that could be
+ * found, e.g. from another part. However,
+ * we still want to set an image flag on it
+ */
+ existing->flags |= img->url->flags;
+ existing->count++;
+ }
+ else if (part_urls) {
+ /* New url */
+ g_ptr_array_add(part_urls, img->url);
}
}
}
}
}
}
+ }
+ // Process numeric dimensions using the new helper methods
+ if (auto height = tag->find_height()) {
+ img->height = height.value();
+ }
- if (param.type == html_component_type::RSPAMD_HTML_COMPONENT_HEIGHT) {
- unsigned long val;
-
- rspamd_strtoul(param.value.data(), param.value.size(), &val);
- img->height = val;
- }
-
- if (param.type == html_component_type::RSPAMD_HTML_COMPONENT_WIDTH) {
- unsigned long val;
-
- rspamd_strtoul(param.value.data(), param.value.size(), &val);
- img->width = val;
- }
-
- /* TODO: rework to css at some time */
- if (param.type == html_component_type::RSPAMD_HTML_COMPONENT_STYLE) {
- if (img->height == 0) {
- auto style_st = param.value;
- auto pos = rspamd_substring_search_caseless(style_st.data(),
- style_st.size(),
- "height", sizeof("height") - 1);
- if (pos != -1) {
- auto substr = style_st.substr(pos + sizeof("height") - 1);
+ if (auto width = tag->find_width()) {
+ img->width = width.value();
+ }
- for (auto i = 0; i < substr.size(); i++) {
- auto t = substr[i];
- if (g_ascii_isdigit(t)) {
- unsigned long val;
- rspamd_strtoul(substr.data(),
- substr.size(), &val);
- img->height = val;
- break;
- }
- else if (!g_ascii_isspace(t) && t != '=' && t != ':') {
- /* Fallback */
- break;
- }
+ // Process style component for dimensions
+ if (auto style_value = tag->find_style()) {
+ if (img->height == 0) {
+ auto pos = rspamd_substring_search_caseless(style_value->data(),
+ style_value->size(),
+ "height", sizeof("height") - 1);
+ if (pos != -1) {
+ auto substr = style_value->substr(pos + sizeof("height") - 1);
+
+ for (auto i = 0; i < substr.size(); i++) {
+ auto t = substr[i];
+ if (g_ascii_isdigit(t)) {
+ unsigned long val;
+ rspamd_strtoul(substr.data(),
+ substr.size(), &val);
+ img->height = val;
+ break;
+ }
+ else if (!g_ascii_isspace(t) && t != '=' && t != ':') {
+ /* Fallback */
+ break;
}
}
}
- if (img->width == 0) {
- auto style_st = param.value;
- auto pos = rspamd_substring_search_caseless(style_st.data(),
- style_st.size(),
- "width", sizeof("width") - 1);
- if (pos != -1) {
- auto substr = style_st.substr(pos + sizeof("width") - 1);
-
- for (auto i = 0; i < substr.size(); i++) {
- auto t = substr[i];
- if (g_ascii_isdigit(t)) {
- unsigned long val;
- rspamd_strtoul(substr.data(),
- substr.size(), &val);
- img->width = val;
- break;
- }
- else if (!g_ascii_isspace(t) && t != '=' && t != ':') {
- /* Fallback */
- break;
- }
+ }
+ if (img->width == 0) {
+ auto pos = rspamd_substring_search_caseless(style_value->data(),
+ style_value->size(),
+ "width", sizeof("width") - 1);
+ if (pos != -1) {
+ auto substr = style_value->substr(pos + sizeof("width") - 1);
+
+ for (auto i = 0; i < substr.size(); i++) {
+ auto t = substr[i];
+ if (g_ascii_isdigit(t)) {
+ unsigned long val;
+ rspamd_strtoul(substr.data(),
+ substr.size(), &val);
+ img->width = val;
+ break;
+ }
+ else if (!g_ascii_isspace(t) && t != '=' && t != ':') {
+ /* Fallback */
+ break;
}
}
}
khash_t(rspamd_url_hash) * url_set,
GPtrArray *part_urls) -> void
{
- auto found_rel_maybe = tag->find_component(html_component_type::RSPAMD_HTML_COMPONENT_REL);
+ auto found_rel_maybe = tag->find_rel();
if (found_rel_maybe) {
if (found_rel_maybe.value() == "icon") {
std::optional<css::css_value> maybe_fgcolor, maybe_bgcolor;
bool hidden = false;
- for (const auto ¶m: tag->components) {
- if (param.type == html_component_type::RSPAMD_HTML_COMPONENT_COLOR) {
- maybe_fgcolor = css::css_value::maybe_color_from_string(param.value);
- }
-
- if (param.type == html_component_type::RSPAMD_HTML_COMPONENT_BGCOLOR) {
- maybe_bgcolor = css::css_value::maybe_color_from_string(param.value);
- }
+ // Process color components
+ if (auto color_comp = tag->find_component<html_component_color>()) {
+ maybe_fgcolor = css::css_value::maybe_color_from_string(color_comp.value()->value);
+ }
- if (param.type == html_component_type::RSPAMD_HTML_COMPONENT_STYLE) {
- tag->block = rspamd::css::parse_css_declaration(pool, param.value);
- }
+ if (auto bgcolor_comp = tag->find_component<html_component_bgcolor>()) {
+ maybe_bgcolor = css::css_value::maybe_color_from_string(bgcolor_comp.value()->value);
+ }
- if (param.type == html_component_type::RSPAMD_HTML_COMPONENT_HIDDEN) {
- hidden = true;
- }
+ // Process style component
+ if (auto style_value = tag->find_style()) {
+ tag->block = rspamd::css::parse_css_declaration(pool, *style_value);
}
+ // Check if hidden
+ hidden = tag->is_hidden();
+
if (!tag->block) {
tag->block = html_block::undefined_html_block_pool(pool);
}
}
else if (tag->id == Tag_IMG) {
/* Process ALT if presented */
- auto maybe_alt = tag->find_component(html_component_type::RSPAMD_HTML_COMPONENT_ALT);
+ auto maybe_alt = tag->find_alt();
if (maybe_alt) {
if (!hc->parsed.empty() && !g_ascii_isspace(hc->parsed.back())) {
overflow_input = true;
}
- auto new_tag = [&](int flags = 0) -> struct html_tag *
- {
-
+ auto new_tag = [&](int flags = 0) -> struct html_tag * {
if (hc->all_tags.size() > rspamd::html::max_tags) {
hc->flags |= RSPAMD_HTML_FLAG_TOO_MANY_TAGS;
/* Leftover after content */
switch (state) {
case tags_limit_overflow:
- html_append_parsed(hc, {c, (std::size_t)(end - c)},
+ html_append_parsed(hc, {c, (std::size_t) (end - c)},
false, end - start, hc->parsed);
break;
default:
}
return hc->all_tags.size();
-}
\ No newline at end of file
+}
-/*-
- * Copyright 2021 Vsevolod Stakhov
+/*
+ * Copyright 2025 Vsevolod Stakhov
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
- * http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
#include <cstdint>
#include "html_tags.h"
+#include "libutil/str_util.h"
struct rspamd_url;
struct html_image;
struct html_content; /* Forward declaration */
+// Internal enum for mapping (not exposed in public API)
enum class html_component_type : std::uint8_t {
RSPAMD_HTML_COMPONENT_NAME = 0,
RSPAMD_HTML_COMPONENT_HREF,
RSPAMD_HTML_COMPONENT_HIDDEN,
};
+// Forward declarations for component types
+struct html_component_name;
+struct html_component_href;
+struct html_component_color;
+struct html_component_bgcolor;
+struct html_component_style;
+struct html_component_class;
+struct html_component_width;
+struct html_component_height;
+struct html_component_size;
+struct html_component_rel;
+struct html_component_alt;
+struct html_component_id;
+struct html_component_hidden;
+struct html_component_unknown;
+
+// Base interface for all components
+struct html_component_base {
+ virtual ~html_component_base() = default;
+ virtual std::string_view get_string_value() const = 0;
+};
+
+// String-based components
+struct html_component_name : html_component_base {
+ std::string_view value;
+ explicit html_component_name(std::string_view v)
+ : value(v)
+ {
+ }
+ std::string_view get_string_value() const override
+ {
+ return value;
+ }
+};
+
+struct html_component_href : html_component_base {
+ std::string_view value;
+ explicit html_component_href(std::string_view v)
+ : value(v)
+ {
+ }
+ std::string_view get_string_value() const override
+ {
+ return value;
+ }
+};
+
+struct html_component_style : html_component_base {
+ std::string_view value;
+ explicit html_component_style(std::string_view v)
+ : value(v)
+ {
+ }
+ std::string_view get_string_value() const override
+ {
+ return value;
+ }
+};
+
+struct html_component_class : html_component_base {
+ std::string_view value;
+ explicit html_component_class(std::string_view v)
+ : value(v)
+ {
+ }
+ std::string_view get_string_value() const override
+ {
+ return value;
+ }
+};
+
+struct html_component_rel : html_component_base {
+ std::string_view value;
+ explicit html_component_rel(std::string_view v)
+ : value(v)
+ {
+ }
+ std::string_view get_string_value() const override
+ {
+ return value;
+ }
+};
+
+struct html_component_alt : html_component_base {
+ std::string_view value;
+ explicit html_component_alt(std::string_view v)
+ : value(v)
+ {
+ }
+ std::string_view get_string_value() const override
+ {
+ return value;
+ }
+};
+
+struct html_component_id : html_component_base {
+ std::string_view value;
+ explicit html_component_id(std::string_view v)
+ : value(v)
+ {
+ }
+ std::string_view get_string_value() const override
+ {
+ return value;
+ }
+};
+
+// Color components (could be extended to parse actual colors)
+struct html_component_color : html_component_base {
+ std::string_view value;
+ explicit html_component_color(std::string_view v)
+ : value(v)
+ {
+ }
+ std::string_view get_string_value() const override
+ {
+ return value;
+ }
+};
+
+struct html_component_bgcolor : html_component_base {
+ std::string_view value;
+ explicit html_component_bgcolor(std::string_view v)
+ : value(v)
+ {
+ }
+ std::string_view get_string_value() const override
+ {
+ return value;
+ }
+};
+
+// Numeric components
+struct html_component_width : html_component_base {
+ std::string_view raw_value;
+ std::optional<std::uint32_t> numeric_value;
+
+ explicit html_component_width(const std::string_view v)
+ : raw_value(v)
+ {
+ unsigned long val;
+ if (rspamd_strtoul(v.data(), v.size(), &val)) {
+ numeric_value = static_cast<std::uint32_t>(val);
+ }
+ }
+
+ std::string_view get_string_value() const override
+ {
+ return raw_value;
+ }
+ std::optional<std::uint32_t> get_numeric_value() const
+ {
+ return numeric_value;
+ }
+};
+
+struct html_component_height : html_component_base {
+ std::string_view raw_value;
+ std::optional<std::uint32_t> numeric_value;
+
+ explicit html_component_height(const std::string_view v)
+ : raw_value(v)
+ {
+ unsigned long val;
+ if (rspamd_strtoul(v.data(), v.size(), &val)) {
+ numeric_value = static_cast<std::uint32_t>(val);
+ }
+ }
+
+ std::string_view get_string_value() const override
+ {
+ return raw_value;
+ }
+ std::optional<std::uint32_t> get_numeric_value() const
+ {
+ return numeric_value;
+ }
+};
+
+struct html_component_size : html_component_base {
+ std::string_view raw_value;
+ std::optional<std::uint32_t> numeric_value;
+
+ explicit html_component_size(std::string_view v)
+ : raw_value(v)
+ {
+ unsigned long val;
+ if (rspamd_strtoul(v.data(), v.size(), &val)) {
+ numeric_value = static_cast<std::uint32_t>(val);
+ }
+ }
+
+ std::string_view get_string_value() const override
+ {
+ return raw_value;
+ }
+ std::optional<std::uint32_t> get_numeric_value() const
+ {
+ return numeric_value;
+ }
+};
+
+// Boolean/flag component
+struct html_component_hidden : html_component_base {
+ bool present;
+ explicit html_component_hidden()
+ : present(true)
+ {
+ }
+ std::string_view get_string_value() const override
+ {
+ return present ? "true" : "false";
+ }
+ bool is_present() const
+ {
+ return present;
+ }
+};
+
+// Unknown component with both name and value
+struct html_component_unknown : html_component_base {
+ std::string_view name;
+ std::string_view value;
+
+ html_component_unknown(std::string_view n, std::string_view v)
+ : name(n), value(v)
+ {
+ }
+ std::string_view get_string_value() const override
+ {
+ return value;
+ }
+ std::string_view get_name() const
+ {
+ return name;
+ }
+};
+
+// The variant type that holds all possible components
+using html_tag_component = std::variant<
+ html_component_name,
+ html_component_href,
+ html_component_color,
+ html_component_bgcolor,
+ html_component_style,
+ html_component_class,
+ html_component_width,
+ html_component_height,
+ html_component_size,
+ html_component_rel,
+ html_component_alt,
+ html_component_id,
+ html_component_hidden,
+ html_component_unknown>;
+
+/**
+ * Returns component variant from a string
+ * @param name attribute name
+ * @param value attribute value
+ * @return variant component
+ */
+auto html_component_from_string(std::string_view name, std::string_view value) -> html_tag_component;
+
/* Public tags flags */
/* XML tag */
#define FL_XML (1u << CM_USER_SHIFT)
#define FL_COMMENT (1 << (CM_USER_SHIFT + 6))
#define FL_VIRTUAL (1 << (CM_USER_SHIFT + 7))
-/**
- * Returns component type from a string
- * @param st
- * @return
- */
-auto html_component_from_string(const std::string_view &st) -> std::optional<html_component_type>;
-
using html_tag_extra_t = std::variant<std::monostate, struct rspamd_url *, struct html_image *>;
-struct html_tag_component {
- html_component_type type;
- std::string_view value;
-
- html_tag_component(html_component_type type, std::string_view value)
- : type(type), value(value)
- {
- }
-};
/* Pairing closing tag representation */
struct html_closing_tag {
std::vector<struct html_tag *> children;
struct html_tag *parent;
- auto find_component(html_component_type what) const -> std::optional<std::string_view>
+ // Template method to find component by type
+ template<typename T>
+ auto find_component() const -> std::optional<const T *>
{
for (const auto &comp: components) {
- if (comp.type == what) {
- return comp.value;
+ if (std::holds_alternative<T>(comp)) {
+ return &std::get<T>(comp);
}
}
+ return std::nullopt;
+ }
+ // Helper methods for common component access
+ auto find_href() const -> std::optional<std::string_view>
+ {
+ if (auto comp = find_component<html_component_href>()) {
+ return comp.value()->value;
+ }
return std::nullopt;
}
- auto find_component(std::optional<html_component_type> what) const -> std::optional<std::string_view>
+ auto find_class() const -> std::optional<std::string_view>
{
- if (what) {
- return find_component(what.value());
+ if (auto comp = find_component<html_component_class>()) {
+ return comp.value()->value;
}
+ return std::nullopt;
+ }
+
+ auto find_id() const -> std::optional<std::string_view>
+ {
+ if (auto comp = find_component<html_component_id>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }
+ auto find_width() const -> std::optional<std::uint32_t>
+ {
+ if (auto comp = find_component<html_component_width>()) {
+ return comp.value()->get_numeric_value();
+ }
+ return std::nullopt;
+ }
+
+ auto find_height() const -> std::optional<std::uint32_t>
+ {
+ if (auto comp = find_component<html_component_height>()) {
+ return comp.value()->get_numeric_value();
+ }
+ return std::nullopt;
+ }
+
+ auto find_style() const -> std::optional<std::string_view>
+ {
+ if (auto comp = find_component<html_component_style>()) {
+ return comp.value()->value;
+ }
return std::nullopt;
}
+ auto find_alt() const -> std::optional<std::string_view>
+ {
+ if (auto comp = find_component<html_component_alt>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }
+
+ auto find_rel() const -> std::optional<std::string_view>
+ {
+ if (auto comp = find_component<html_component_rel>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }
+
+ auto is_hidden() const -> bool
+ {
+ return find_component<html_component_hidden>().has_value();
+ }
+
+ auto find_unknown_component(std::string_view attr_name) const -> std::optional<std::string_view>
+ {
+ for (const auto &comp: components) {
+ if (std::holds_alternative<html_component_unknown>(comp)) {
+ const auto &unknown = std::get<html_component_unknown>(comp);
+ if (unknown.name == attr_name) {
+ return unknown.value;
+ }
+ }
+ }
+ return std::nullopt;
+ }
+
+ auto get_unknown_components() const -> std::vector<std::pair<std::string_view, std::string_view>>
+ {
+ std::vector<std::pair<std::string_view, std::string_view>> unknown_attrs;
+ for (const auto &comp: components) {
+ if (std::holds_alternative<html_component_unknown>(comp)) {
+ const auto &unknown = std::get<html_component_unknown>(comp);
+ unknown_attrs.emplace_back(unknown.name, unknown.value);
+ }
+ }
+ return unknown_attrs;
+ }
+
+ // Generic visitor method for processing all components
+ template<typename Visitor>
+ auto visit_components(Visitor &&visitor) const
+ {
+ for (const auto &comp: components) {
+ std::visit(std::forward<Visitor>(visitor), comp);
+ }
+ }
+
+ // Find any component by attribute name (for Lua bindings and generic access)
+ auto find_component_by_name(std::string_view attr_name) const -> std::optional<std::string_view>
+ {
+ // Check known component types first using their helper methods
+ if (attr_name == "href") return find_href();
+ if (attr_name == "class") return find_class();
+ if (attr_name == "id") return find_id();
+ if (attr_name == "style") return find_style();
+ if (attr_name == "alt") return find_alt();
+ if (attr_name == "rel") return find_rel();
+ if (attr_name == "hidden") return is_hidden() ? std::optional<std::string_view>{"true"} : std::nullopt;
+
+ // Handle numeric components that need string conversion
+ if (attr_name == "width") {
+ if (auto comp = find_component<html_component_width>()) {
+ return comp.value()->get_string_value();
+ }
+ }
+ if (attr_name == "height") {
+ if (auto comp = find_component<html_component_height>()) {
+ return comp.value()->get_string_value();
+ }
+ }
+ if (attr_name == "size") {
+ if (auto comp = find_component<html_component_size>()) {
+ return comp.value()->get_string_value();
+ }
+ }
+
+ // Handle color components
+ if (attr_name == "color") {
+ if (auto comp = find_component<html_component_color>()) {
+ return comp.value()->value;
+ }
+ }
+ if (attr_name == "bgcolor") {
+ if (auto comp = find_component<html_component_bgcolor>()) {
+ return comp.value()->value;
+ }
+ }
+
+ // Handle name component
+ if (attr_name == "name") {
+ if (auto comp = find_component<html_component_name>()) {
+ return comp.value()->value;
+ }
+ }
+
+ // Finally check unknown components
+ return find_unknown_component(attr_name);
+ }
+
auto clear(void) -> void
{
id = Tag_UNKNOWN;