]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
[Project] Css: Projected a parser
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 2 Feb 2021 18:18:45 +0000 (18:18 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 2 Feb 2021 18:18:45 +0000 (18:18 +0000)
src/libserver/css/css_parser.cxx
src/libserver/css/css_tokeniser.hxx
src/libserver/css/parse_error.hxx

index 54ccccd23cbe3c1ec3978c811a4873cda56c6c93..2133a7b3653b5557c49f150ee0221e058bb483e1 100644 (file)
 
 #include "css_parser.hxx"
 #include "css_tokeniser.hxx"
+#include <vector>
 #include <unicode/utf8.h>
 
 
 namespace rspamd::css {
 
+/*
+ * Represents a consumed token by a parser
+ */
+struct css_consumed_block {
+       enum class parser_tag_type : std::uint8_t  {
+               css_top_block,
+               css_qualified_rule,
+               css_at_rule,
+               css_simple_block,
+               css_function,
+               css_component
+       };
+
+       using consumed_block_ptr = std::unique_ptr<css_consumed_block>;
+
+       parser_tag_type tag;
+       std::variant<std::monostate,
+               std::vector<consumed_block_ptr>,
+               css_parser_token> content;
+
+       css_consumed_block() = delete;
+
+       css_consumed_block(parser_tag_type tag) : tag(tag) {
+               if (tag == parser_tag_type::css_top_block ||
+                       tag == parser_tag_type::css_qualified_rule ||
+                       tag == parser_tag_type::css_simple_block) {
+                       /* Pre-allocate content for known vector blocks */
+                       content = std::vector<consumed_block_ptr>(4);
+               }
+       }
+       /* Construct a block from a single lexer token (for trivial blocks) */
+       explicit css_consumed_block(parser_tag_type tag, css_parser_token &&tok) :
+                       tag(tag), content(std::move(tok)) {}
+
+       /* Attach a new block to the compound block, consuming block inside */
+       auto attach_block(consumed_block_ptr &&block) -> bool {
+               if (content.index() == 0) {
+                       /* Switch from monostate */
+                       content = std::vector<consumed_block_ptr>(1);
+               }
+               else if (content.index() == 2) {
+                       /* A single component, cannot attach a block ! */
+                       return false;
+               }
+
+               std::get<std::vector<consumed_block_ptr>>(content)
+                       .push_back(std::move(block));
+
+               return true;
+       }
+};
+
 class css_parser {
 public:
        css_parser(void) = delete; /* Require mempool to be set for logging */
@@ -31,10 +84,10 @@ public:
        auto get_object_maybe(void) -> tl::expected<std::unique_ptr<css_style_sheet>, css_parse_error> {
                if (state == parser_state::parse_done) {
                        state = parser_state::initial_state;
-                       return std::move (style_object);
+                       return std::move(style_object);
                }
 
-               return tl::make_unexpected (error);
+               return tl::make_unexpected(error);
        }
 
 private:
@@ -93,17 +146,90 @@ bool css_parser::consume_input(const std::string_view &sv)
        bool eof = false;
        css_tokeniser css_tokeniser(pool, sv);
 
-       while (!eof) {
+       auto consumed_blocks = std::make_unique<css_consumed_block>(
+                       css_consumed_block::parser_tag_type::css_top_block);
+       auto rec_level = 0;
+       const auto max_rec = 20;
+
+       auto component_value_consumer = [&](std::unique_ptr<css_consumed_block> &top) -> bool {
+
+               if (++rec_level > max_rec) {
+                       error = css_parse_error(css_parse_error_type::PARSE_ERROR_BAD_NESTING);
+                       return false;
+               }
+
                auto next_token = css_tokeniser.next_token();
 
-               /* Top level parser */
                switch (next_token.type) {
-               case css_parser_token::token_type::eof_token:
-                       eof = true;
+
+               }
+
+               --rec_level;
+
+               return true;
+       };
+
+       auto qualified_rule_consumer = [&](std::unique_ptr<css_consumed_block> &top) -> bool {
+               if (++rec_level > max_rec) {
+                       msg_err_css("max nesting reached, ignore style");
+                       error = css_parse_error(css_parse_error_type::PARSE_ERROR_BAD_NESTING);
+                       return false;
+               }
+
+               auto ret = true;
+               auto block = std::make_unique<css_consumed_block>(
+                               css_consumed_block::parser_tag_type::css_qualified_rule);
+
+               while (ret && !eof) {
+                       auto &&next_token = css_tokeniser.next_token();
+                       switch (next_token.type) {
+                       case css_parser_token::token_type::eof_token:
+                               eof = true;
+                               break;
+                       case css_parser_token::token_type::ident_token:
+                       case css_parser_token::token_type::hash_token:
+                               /* Consume allowed complex tokens as a rule preamble */
+                               ret = component_value_consumer(block);
+                               break;
+                       case css_parser_token::token_type::cdo_token:
+                       case css_parser_token::token_type::cdc_token:
+                               if (top->tag == css_consumed_block::parser_tag_type::css_top_block) {
+                                       /* Ignore */
+                                       ret = true;
+                               }
+                               else {
+
+                               }
+                               break;
+                       };
+               }
+
+               if (ret) {
+                       if (top->tag == css_consumed_block::parser_tag_type::css_top_block) {
+                               top->attach_block(std::move(block));
+                       }
+               }
+
+               --rec_level;
+
+               return ret;
+       };
+
+       auto get_parser_consumer = [&]() -> auto {
+               switch (state) {
+               case parser_state::initial_state:
+                       /* Top level qualified parser */
+                       return qualified_rule_consumer;
                        break;
-               default:
-                       /* Ignore tokens */
-                       msg_debug_css("got token: %s", next_token.debug_token_str().c_str());
+               }
+       };
+
+       while (!eof) {
+               /* Get a token and a consumer lambda for the current parser state */
+
+               auto consumer = get_parser_consumer();
+
+               if (!consumer(consumed_blocks)) {
                        break;
                }
        }
index b2da885006d6b4a58302d07a198020cec3679b9e..7ef5f464380a6fe47b635a496b34463435844fa0 100644 (file)
@@ -90,6 +90,7 @@ struct css_parser_token {
        css_parser_token() = delete;
        explicit css_parser_token(token_type type, const value_type &value) :
                        value(value), type(type) {}
+       css_parser_token(css_parser_token &&other) = default;
        auto adjust_dim(const css_parser_token &dim_token) -> bool;
 
        /* Debugging routines */
index 0a2cbc750e2415b3763dabb17890ab4ad7e29a2f..458469afc74e44934e84ae0bf46161e4d8744349 100644 (file)
@@ -30,6 +30,7 @@ namespace rspamd::css {
 enum class css_parse_error_type {
        PARSE_ERROR_UNKNOWN_OPTION,
        PARSE_ERROR_INVALID_SYNTAX,
+       PARSE_ERROR_BAD_NESTING,
        PARSE_ERROR_NYI,
        PARSE_ERROR_UNKNOWN_ERROR,
 };