From: Vsevolod Stakhov Date: Tue, 16 Mar 2021 15:58:34 +0000 (+0000) Subject: [Project] Css: Start css selectors parsing logic X-Git-Tag: 3.0~576 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=20b0002b125ec1315ca558b1233d34fb616817ac;p=thirdparty%2Frspamd.git [Project] Css: Start css selectors parsing logic --- diff --git a/src/libserver/css/css_parser.cxx b/src/libserver/css/css_parser.cxx index 0198491220..915e04f123 100644 --- a/src/libserver/css/css_parser.cxx +++ b/src/libserver/css/css_parser.cxx @@ -83,9 +83,6 @@ auto css_consumed_block::token_type_str(void) const -> const char * case parser_tag_type::css_component: ret = "component"; break; - case parser_tag_type::css_selector: - ret = "selector"; - break; case parser_tag_type::css_eof_block: ret = "eof"; break; diff --git a/src/libserver/css/css_parser.hxx b/src/libserver/css/css_parser.hxx index 034c135c62..f51960b71d 100644 --- a/src/libserver/css/css_parser.hxx +++ b/src/libserver/css/css_parser.hxx @@ -46,7 +46,6 @@ public: css_function, css_function_arg, css_component, - css_selector, css_eof_block, }; using consumed_block_ptr = std::unique_ptr; diff --git a/src/libserver/css/css_selector.cxx b/src/libserver/css/css_selector.cxx index 2f1f29aca9..a1ed0e234b 100644 --- a/src/libserver/css/css_selector.cxx +++ b/src/libserver/css/css_selector.cxx @@ -15,6 +15,7 @@ */ #include "css_selector.hxx" +#include "fmt/core.h" namespace rspamd::css { @@ -23,9 +24,165 @@ auto process_selector_tokens(rspamd_mempool_t *pool, -> selectors_vec { selectors_vec ret; + bool can_continue = true; + enum class selector_process_state { + selector_parse_start = 0, + selector_expect_ident, + selector_ident_consumed, + selector_ignore_attribute, + selector_ignore_function, + selector_ignore_combination + } state = selector_process_state::selector_parse_start; + std::unique_ptr cur_selector; + + + while (can_continue) { + const auto &next_tok = next_token_functor(); + + if (next_tok.tag == css_consumed_block::parser_tag_type::css_component) { + const auto &parser_tok = next_tok.get_token_or_empty(); + + if (state == selector_process_state::selector_parse_start) { + /* + * At the beginning of the parsing we can expect either + * delim or an ident, everything else is discarded for now + */ + msg_debug_css("start consume selector"); + + switch (parser_tok.type) { + case css_parser_token::token_type::delim_token: { + auto delim_c = parser_tok.get_delim(); + + if (delim_c == '.') { + cur_selector = std::make_unique( + css_selector::selector_type::SELECTOR_CLASS); + state = selector_process_state::selector_expect_ident; + } + else if (delim_c == '#') { + cur_selector = std::make_unique( + css_selector::selector_type::SELECTOR_ID); + state = selector_process_state::selector_expect_ident; + } + else if (delim_c == '*') { + cur_selector = std::make_unique( + css_selector::selector_type::SELECTOR_ALL); + state = selector_process_state::selector_ident_consumed; + } + break; + } + case css_parser_token::token_type::ident_token: + cur_selector = std::make_unique( + css_selector::selector_type::SELECTOR_ELEMENT); + cur_selector->value = parser_tok.get_string_or_default(""); + state = selector_process_state::selector_ident_consumed; + break; + case css_parser_token::token_type::hash_token: + cur_selector = std::make_unique( + css_selector::selector_type::SELECTOR_ID); + cur_selector->value = + parser_tok.get_string_or_default("").substr(1); + state = selector_process_state::selector_ident_consumed; + break; + default: + msg_debug_css("cannot consume more of a selector, invalid parser token: %*s; expected start", + next_tok.token_type_str()); + can_continue = false; + break; + } + } + else if (state == selector_process_state::selector_expect_ident) { + /* + * We got something like a selector start, so we expect + * a plain ident + */ + if (parser_tok.type == css_parser_token::token_type::ident_token && cur_selector) { + cur_selector->value = parser_tok.get_string_or_default(""); + state = selector_process_state::selector_ident_consumed; + } + else { + msg_debug_css("cannot consume more of a selector, invalid parser token: %*s; expected ident", + next_tok.token_type_str()); + can_continue = false; + } + } + else if (state == selector_process_state::selector_ident_consumed) { + if (parser_tok.type == css_parser_token::token_type::comma_token) { + /* Got full selector, attach it to the vector and go further */ + msg_debug_css("attached selector: %s", cur_selector->debug_str().c_str()); + ret.push_back(std::move(cur_selector)); + state = selector_process_state::selector_parse_start; + } + else if (parser_tok.type == css_parser_token::token_type::semicolon_token) { + /* TODO: implement adjustments */ + state = selector_process_state::selector_ignore_function; + } + else if (parser_tok.type == css_parser_token::token_type::osqbrace_token) { + /* TODO: implement attributes checks */ + state = selector_process_state::selector_ignore_attribute; + } + else { + /* TODO: implement selectors combinations */ + state = selector_process_state::selector_ignore_combination; + } + } + else { + /* Ignore state; ignore all till ',' token or eof token */ + if (parser_tok.type == css_parser_token::token_type::comma_token) { + /* Got full selector, attach it to the vector and go further */ + ret.push_back(std::move(cur_selector)); + state = selector_process_state::selector_parse_start; + } + else { + auto debug_str = parser_tok.get_string_or_default(""); + msg_debug_css("ignore token %*s", (int)debug_str.size(), + debug_str.data()); + } + } + } + else { + /* End of parsing */ + if (state == selector_process_state::selector_ident_consumed && cur_selector) { + msg_debug_css("attached selector: %s", cur_selector->debug_str().c_str()); + ret.push_back(std::move(cur_selector)); + } + can_continue = false; + } + + } return ret; /* copy elision */ } +auto +css_selector::debug_str() const -> std::string +{ + std::string ret; + + if (type == selector_type::SELECTOR_ID) { + ret += "#"; + } + else if (type == selector_type::SELECTOR_CLASS) { + ret += "."; + } + else if (type == selector_type::SELECTOR_ALL) { + ret = "*"; + + return ret; + } + + std::visit([&](auto arg) -> void { + using T = std::decay_t; + + if constexpr (std::is_same_v) { + ret += fmt::format("tag: {}", static_cast(arg)); + } + else { + ret += arg; + } + }, value); + + return ret; +} + } diff --git a/src/libserver/css/css_selector.hxx b/src/libserver/css/css_selector.hxx index a701e20f68..5ed4d54b82 100644 --- a/src/libserver/css/css_selector.hxx +++ b/src/libserver/css/css_selector.hxx @@ -37,13 +37,27 @@ namespace rspamd::css { */ struct css_selector { enum class selector_type { - SELECTOR_ELEMENT, /* e.g. .tr, for this value we use tag_id_t */ - SELECTOR_CLASS, /* generic class */ - SELECTOR_ID /* e.g. #id */ + SELECTOR_ELEMENT, /* e.g. tr, for this value we use tag_id_t */ + SELECTOR_CLASS, /* generic class, e.g. .class */ + SELECTOR_ID, /* e.g. #id */ + SELECTOR_ALL /* * selector */ }; selector_type type; - std::variant value; + std::variant value; + + /* Conditions for the css selector */ + /* Dependency on attributes */ + struct css_attribute_condition { + std::string_view attribute; + std::string_view op = ""; + std::string_view value = ""; + }; + + /* General dependency chain */ + using css_selector_ptr = std::unique_ptr; + using css_selector_dep = std::variant; + std::vector dependencies; auto to_tag(void) const -> std::optional { if (type == selector_type::SELECTOR_ELEMENT) { @@ -54,12 +68,17 @@ struct css_selector { auto to_string(void) const -> std::optional { if (type == selector_type::SELECTOR_ELEMENT) { - return std::string_view(std::get(value)); + return std::string_view(std::get(value)); } return std::nullopt; }; + + explicit css_selector(selector_type t) : type(t) {} + + auto debug_str(void) const -> std::string; }; + using selectors_vec = std::vector>; /* diff --git a/src/libserver/css/css_tokeniser.hxx b/src/libserver/css/css_tokeniser.hxx index 53ec4f2db0..4a484ecd6e 100644 --- a/src/libserver/css/css_tokeniser.hxx +++ b/src/libserver/css/css_tokeniser.hxx @@ -111,6 +111,14 @@ struct css_parser_token { return def; } + auto get_delim() const -> char { + if (std::holds_alternative(value)) { + return std::get(value); + } + + return (char)-1; + } + auto get_number_or_default(double def) const -> double { if (std::holds_alternative(value)) { auto dbl = std::get(value);