]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
[Project] Css: Start css selectors parsing logic
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 16 Mar 2021 15:58:34 +0000 (15:58 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 16 Mar 2021 15:58:34 +0000 (15:58 +0000)
src/libserver/css/css_parser.cxx
src/libserver/css/css_parser.hxx
src/libserver/css/css_selector.cxx
src/libserver/css/css_selector.hxx
src/libserver/css/css_tokeniser.hxx

index 0198491220d4628f64ef7fe4644700c2624ef972..915e04f123a3f35e4fb0507c359a1764c744b25d 100644 (file)
@@ -83,9 +83,6 @@ auto css_consumed_block::token_type_str(void) const -> const char *
        case parser_tag_type::css_component:
                ret = "component";
                break;
-       case parser_tag_type::css_selector:
-               ret = "selector";
-               break;
        case parser_tag_type::css_eof_block:
                ret = "eof";
                break;
index 034c135c62f497043f1f456fb771bffd483273a6..f51960b71d874c12a659ab19ac4426bf71ecf0b0 100644 (file)
@@ -46,7 +46,6 @@ public:
                css_function,
                css_function_arg,
                css_component,
-               css_selector,
                css_eof_block,
        };
        using consumed_block_ptr = std::unique_ptr<css_consumed_block>;
index 2f1f29aca902400bf522512dd0454318772db4f0..a1ed0e234b735952878538c89deea57db8532a32 100644 (file)
@@ -15,6 +15,7 @@
  */
 
 #include "css_selector.hxx"
+#include "fmt/core.h"
 
 namespace rspamd::css {
 
@@ -23,9 +24,165 @@ auto process_selector_tokens(rspamd_mempool_t *pool,
        -> selectors_vec
 {
        selectors_vec ret;
+       bool can_continue = true;
+       enum class selector_process_state {
+               selector_parse_start = 0,
+               selector_expect_ident,
+               selector_ident_consumed,
+               selector_ignore_attribute,
+               selector_ignore_function,
+               selector_ignore_combination
+       } state = selector_process_state::selector_parse_start;
+       std::unique_ptr<css_selector> cur_selector;
+
+
+       while (can_continue) {
+               const auto &next_tok = next_token_functor();
+
+               if (next_tok.tag == css_consumed_block::parser_tag_type::css_component) {
+                       const auto &parser_tok = next_tok.get_token_or_empty();
+
+                       if (state == selector_process_state::selector_parse_start) {
+                               /*
+                                * At the beginning of the parsing we can expect either
+                                * delim or an ident, everything else is discarded for now
+                                */
+                               msg_debug_css("start consume selector");
+
+                               switch (parser_tok.type) {
+                               case css_parser_token::token_type::delim_token: {
+                                       auto delim_c = parser_tok.get_delim();
+
+                                       if (delim_c == '.') {
+                                               cur_selector = std::make_unique<css_selector>(
+                                                               css_selector::selector_type::SELECTOR_CLASS);
+                                               state = selector_process_state::selector_expect_ident;
+                                       }
+                                       else if (delim_c == '#') {
+                                               cur_selector = std::make_unique<css_selector>(
+                                                               css_selector::selector_type::SELECTOR_ID);
+                                               state = selector_process_state::selector_expect_ident;
+                                       }
+                                       else if (delim_c == '*') {
+                                               cur_selector = std::make_unique<css_selector>(
+                                                               css_selector::selector_type::SELECTOR_ALL);
+                                               state = selector_process_state::selector_ident_consumed;
+                                       }
+                                       break;
+                               }
+                               case css_parser_token::token_type::ident_token:
+                                       cur_selector = std::make_unique<css_selector>(
+                                                       css_selector::selector_type::SELECTOR_ELEMENT);
+                                       cur_selector->value = parser_tok.get_string_or_default("");
+                                       state = selector_process_state::selector_ident_consumed;
+                                       break;
+                               case css_parser_token::token_type::hash_token:
+                                       cur_selector = std::make_unique<css_selector>(
+                                                       css_selector::selector_type::SELECTOR_ID);
+                                       cur_selector->value =
+                                                       parser_tok.get_string_or_default("").substr(1);
+                                       state = selector_process_state::selector_ident_consumed;
+                                       break;
+                               default:
+                                       msg_debug_css("cannot consume more of a selector, invalid parser token: %*s; expected start",
+                                                       next_tok.token_type_str());
+                                       can_continue = false;
+                                       break;
+                               }
+                       }
+                       else if (state == selector_process_state::selector_expect_ident) {
+                               /*
+                                * We got something like a selector start, so we expect
+                                * a plain ident
+                                */
+                               if (parser_tok.type == css_parser_token::token_type::ident_token && cur_selector) {
+                                       cur_selector->value = parser_tok.get_string_or_default("");
+                                       state = selector_process_state::selector_ident_consumed;
+                               }
+                               else {
+                                       msg_debug_css("cannot consume more of a selector, invalid parser token: %*s; expected ident",
+                                                       next_tok.token_type_str());
+                                       can_continue = false;
+                               }
+                       }
+                       else if (state == selector_process_state::selector_ident_consumed) {
+                               if (parser_tok.type == css_parser_token::token_type::comma_token) {
+                                       /* Got full selector, attach it to the vector and go further */
+                                       msg_debug_css("attached selector: %s", cur_selector->debug_str().c_str());
+                                       ret.push_back(std::move(cur_selector));
+                                       state = selector_process_state::selector_parse_start;
+                               }
+                               else if (parser_tok.type == css_parser_token::token_type::semicolon_token) {
+                                       /* TODO: implement adjustments */
+                                       state = selector_process_state::selector_ignore_function;
+                               }
+                               else if (parser_tok.type == css_parser_token::token_type::osqbrace_token) {
+                                       /* TODO: implement attributes checks */
+                                       state = selector_process_state::selector_ignore_attribute;
+                               }
+                               else {
+                                       /* TODO: implement selectors combinations */
+                                       state = selector_process_state::selector_ignore_combination;
+                               }
+                       }
+                       else {
+                               /* Ignore state; ignore all till ',' token or eof token */
+                               if (parser_tok.type == css_parser_token::token_type::comma_token) {
+                                       /* Got full selector, attach it to the vector and go further */
+                                       ret.push_back(std::move(cur_selector));
+                                       state = selector_process_state::selector_parse_start;
+                               }
+                               else {
+                                       auto debug_str = parser_tok.get_string_or_default("");
+                                       msg_debug_css("ignore token %*s", (int)debug_str.size(),
+                                                       debug_str.data());
+                               }
+                       }
+               }
+               else {
+                       /* End of parsing */
+                       if (state == selector_process_state::selector_ident_consumed && cur_selector) {
+                               msg_debug_css("attached selector: %s", cur_selector->debug_str().c_str());
+                               ret.push_back(std::move(cur_selector));
+                       }
+                       can_continue = false;
+               }
+
+       }
 
        return ret; /* copy elision */
 }
 
+auto
+css_selector::debug_str() const -> std::string
+{
+       std::string ret;
+
+       if (type == selector_type::SELECTOR_ID) {
+               ret += "#";
+       }
+       else if (type == selector_type::SELECTOR_CLASS) {
+               ret += ".";
+       }
+       else if (type == selector_type::SELECTOR_ALL) {
+               ret = "*";
+
+               return ret;
+       }
+
+       std::visit([&](auto arg) -> void {
+               using T = std::decay_t<decltype(arg)>;
+
+               if constexpr (std::is_same_v<T, tag_id_t>) {
+                       ret += fmt::format("tag: {}", static_cast<int>(arg));
+               }
+               else {
+                       ret += arg;
+               }
+       }, value);
+
+       return ret;
+}
+
 }
 
index a701e20f683ffbe19fb82ed31fc882a9597376cc..5ed4d54b820c72f03472b049e45aa7f5d826e196 100644 (file)
@@ -37,13 +37,27 @@ namespace rspamd::css {
  */
 struct css_selector {
        enum class selector_type {
-               SELECTOR_ELEMENT, /* e.g. .tr, for this value we use tag_id_t */
-               SELECTOR_CLASS, /* generic class */
-               SELECTOR_ID /* e.g. #id */
+               SELECTOR_ELEMENT, /* e.g. tr, for this value we use tag_id_t */
+               SELECTOR_CLASS, /* generic class, e.g. .class */
+               SELECTOR_ID, /* e.g. #id */
+               SELECTOR_ALL /* * selector */
        };
 
        selector_type type;
-       std::variant<tag_id_t, std::string> value;
+       std::variant<tag_id_t, std::string_view> value;
+
+       /* Conditions for the css selector */
+       /* Dependency on attributes */
+       struct css_attribute_condition {
+               std::string_view attribute;
+               std::string_view op = "";
+               std::string_view value = "";
+       };
+
+       /* General dependency chain */
+       using css_selector_ptr = std::unique_ptr<css_selector>;
+       using css_selector_dep = std::variant<css_attribute_condition, css_selector_ptr>;
+       std::vector<css_selector_dep> dependencies;
 
         auto to_tag(void) const -> std::optional<tag_id_t> {
                if (type == selector_type::SELECTOR_ELEMENT) {
@@ -54,12 +68,17 @@ struct css_selector {
 
        auto to_string(void) const -> std::optional<const std::string_view> {
                if (type == selector_type::SELECTOR_ELEMENT) {
-                       return std::string_view(std::get<std::string>(value));
+                       return std::string_view(std::get<std::string_view>(value));
                }
                return std::nullopt;
        };
+
+       explicit css_selector(selector_type t) : type(t) {}
+
+       auto debug_str(void) const -> std::string;
 };
 
+
 using selectors_vec = std::vector<std::unique_ptr<css_selector>>;
 
 /*
index 53ec4f2db03bb4cc3fb6fe0da6f6a9c9a8678dc2..4a484ecd6e839e62f2f889c1a7ae926c03a863c4 100644 (file)
@@ -111,6 +111,14 @@ struct css_parser_token {
                return def;
        }
 
+       auto get_delim() const -> char {
+               if (std::holds_alternative<char>(value)) {
+                       return std::get<char>(value);
+               }
+
+               return (char)-1;
+       }
+
        auto get_number_or_default(double def) const -> double {
                if (std::holds_alternative<double>(value)) {
                        auto dbl = std::get<double>(value);