]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
[Project] Css: Rework tokens structure
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Mon, 25 Jan 2021 16:35:23 +0000 (16:35 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Mon, 25 Jan 2021 16:35:38 +0000 (16:35 +0000)
src/libserver/css/css_parser.cxx
src/libserver/css/css_tokeniser.cxx
src/libserver/css/css_tokeniser.hxx

index 207cfcb9d3c72f7fe4db0f3d86fb3ea6d48ee970..68f03cdfab278c0fc04bea8a1ac5a5278625c287 100644 (file)
@@ -94,16 +94,16 @@ bool css_parser::consume_input(const std::string_view &sv)
        css_tokeniser css_tokeniser(pool, sv);
 
        while (!eof) {
-               auto token_pair = css_tokeniser.next_token();
+               auto next_token = css_tokeniser.next_token();
 
                /* Top level parser */
-               switch (token_pair.first) {
-               case css_parser_token::eof_token:
+               switch (next_token.type) {
+               case css_parser_token::token_type::eof_token:
                        eof = true;
                        break;
-               case css_parser_token::whitespace_token:
-               case css_parser_token::cdc_token:
-               case css_parser_token::cdo_token:
+               case css_parser_token::token_type::whitespace_token:
+               case css_parser_token::token_type::cdc_token:
+               case css_parser_token::token_type::cdo_token:
                        /* Ignore tokens */
                        break;
                }
index 40f202b01b64771bee787092b33b7038ebbe512d..058f7504e76bbe98bec05ddcfb46b005de62761c 100644 (file)
 
 namespace rspamd::css {
 
+/* Helpers to create tokens */
 
-auto css_tokeniser::next_token (void) -> std::pair<css_parser_token, std::string_view>
+/*
+ * This helper is intended to create tokens either with a tag and value
+ * or with just a tag.
+ */
+template<css_parser_token::token_type T, typename ...Args>
+auto make_token(const Args&... args) -> css_parser_token;
+
+template<>
+auto make_token<css_parser_token::token_type::string_token, std::string_view>(const std::string_view &s)
+        -> css_parser_token
+{
+       return css_parser_token{css_parser_token::token_type::string_token, s};
+}
+
+template<>
+auto make_token<css_parser_token::token_type::whitespace_token, std::string_view>(const std::string_view &s)
+        -> css_parser_token
+{
+       return css_parser_token{css_parser_token::token_type::whitespace_token, s};
+}
+
+template<>
+auto make_token<css_parser_token::token_type::delim_token, char>(const char &c)
+        -> css_parser_token
+{
+       return css_parser_token{css_parser_token::token_type::delim_token, c};
+}
+
+/*
+ * Generic tokens with no value (non-terminals)
+ */
+template<css_parser_token::token_type T>
+auto make_token(void) -> css_parser_token
+{
+       return css_parser_token{T, css_parser_token_placeholder()};
+}
+
+auto css_tokeniser::next_token(void) -> struct css_parser_token
 {
        /* Helpers */
 
@@ -29,7 +67,7 @@ auto css_tokeniser::next_token (void) -> std::pair<css_parser_token, std::string
         * offset is set to the next character after a comment (or eof)
         * Nothing is returned
         */
-       auto consume_comment = [this] () {
+       auto consume_comment = [this]() {
                auto i = offset;
                auto nested = 0;
 
@@ -64,7 +102,7 @@ auto css_tokeniser::next_token (void) -> std::pair<css_parser_token, std::string
         * is set one character after the string. Css unescaping is done automatically
         * Accepts a quote char to find end of string
         */
-       auto consume_string = [this] (auto quote_char) -> auto {
+       auto consume_string = [this](auto quote_char) -> auto {
                auto i = offset;
                bool need_unescape = false;
 
@@ -122,8 +160,7 @@ auto css_tokeniser::next_token (void) -> std::pair<css_parser_token, std::string
                        }
                        else {
                                offset = i + 1;
-                               return std::make_pair (css_parser_token::delim_token,
-                                               std::string_view (&input[offset - 1], 1));
+                               return make_token<css_parser_token::token_type::delim_token>(c);
                        }
                        break;
                case ' ':
@@ -136,48 +173,41 @@ auto css_tokeniser::next_token (void) -> std::pair<css_parser_token, std::string
                                c = input[++i];
                        } while (i < input.size () && g_ascii_isspace (c));
 
-                       auto ret = std::make_pair (css_parser_token::whitespace_token,
-                                       std::string_view (&input[offset], i - offset));
+                       auto ret = make_token<css_parser_token::token_type::whitespace_token>(
+                                       std::string_view(&input[offset], i - offset));
                        offset = i;
                        return ret;
                }
                case '"':
                case '\'':
                        offset = i + 1;
-                       return std::make_pair (css_parser_token::string_token,
-                                       consume_string (c));
+                       return make_token<css_parser_token::token_type::string_token>(consume_string(c));
                case '(':
                        offset = i + 1;
-                       return std::make_pair (css_parser_token::obrace_token,
-                                       std::string_view (&input[offset - 1], 1));
+                       return make_token<css_parser_token::token_type::obrace_token>();
                case ')':
                        offset = i + 1;
-                       return std::make_pair (css_parser_token::ebrace_token,
-                                       std::string_view (&input[offset - 1], 1));
+                       return make_token<css_parser_token::token_type::ebrace_token>();
                case ',':
-                       offset = i + 1;
-                       return std::make_pair (css_parser_token::comma_token,
-                                       std::string_view (&input[offset - 1], 1));
+                       return make_token<css_parser_token::token_type::comma_token>();
                case '<':
                        /* Maybe an xml like comment */
                        if (i + 3 < input.size () && input[i + 1] == '!'
                                && input[i + 2] == '-' && input[i + 3] == '-') {
                                offset += 3;
 
-                               return std::make_pair (css_parser_token::cdo_token,
-                                               std::string_view (&input[offset - 3], 3));
+                               return make_token<css_parser_token::token_type::cdo_token>();
                        }
                        else {
                                offset = i + 1;
-                               return std::make_pair (css_parser_token::delim_token,
-                                               std::string_view (&input[offset - 1], 1));
+                               return make_token<css_parser_token::token_type::delim_token>(c);
                        }
                        break;
                }
 
        }
 
-       return std::make_pair (css_parser_token::eof_token, std::string_view ());
+       return make_token<css_parser_token::token_type::eof_token>();
 }
 
 }
\ No newline at end of file
index 4c68243897b7e50ca418808682bc5c337c50c7ca..cff5877c23eb9b598da06a2e91592708e37c8b26 100644 (file)
 
 #include <string_view>
 #include <utility>
+#include <variant>
 #include "mem_pool.h"
 
 namespace rspamd::css {
 
-enum class css_parser_token {
-       whitespace_token,
-       ident_token,
-       function_token,
-       at_keyword_token,
-       hash_token,
-       string_token,
-       number_token,
-       url_token,
-       dimension_token,
-       percentage_token,
-       cdo_token, /* xml open comment */
-       cdc_token, /* xml close comment */
-       delim_token,
-       obrace_token, /* ( */
-       ebrace_token, /* ) */
-       osqbrace_token, /* [ */
-       esqbrace_token, /* ] */
-       comma_token,
-       colon_token,
-       semicolon_token,
-       eof_token,
+struct css_parser_token_placeholder {}; /* For empty tokens */
+
+struct css_parser_token {
+       enum class token_type : std::uint8_t {
+               whitespace_token,
+               ident_token,
+               function_token,
+               at_keyword_token,
+               hash_token,
+               string_token,
+               number_token,
+               url_token,
+               dimension_token,
+               percentage_token,
+               cdo_token, /* xml open comment */
+               cdc_token, /* xml close comment */
+               delim_token,
+               obrace_token, /* ( */
+               ebrace_token, /* ) */
+               osqbrace_token, /* [ */
+               esqbrace_token, /* ] */
+               comma_token,
+               colon_token,
+               semicolon_token,
+               eof_token,
+       };
+
+       static const std::uint8_t default_flags = 0;
+       static const std::uint8_t flag_bad_string = (1u << 0u);
+       using value_type = std::variant<std::string_view, /* For strings and string like tokens */
+                       char, /* For delimiters (might need to move to unicode point) */
+                       double, /* For numeric stuff */
+                       css_parser_token_placeholder /* For general no token stuff */
+       >;
+
+       /* Typed storage */
+       value_type value;
+       token_type type;
+       std::uint8_t flags = default_flags;
+
+       css_parser_token() = delete;
+       explicit css_parser_token(token_type type, const value_type &value) :
+                       value(value), type(type) {}
 };
 
+/* Ensure that parser tokens are simple enough */
+static_assert(std::is_trivially_copyable_v<css_parser_token>);
+
 class css_tokeniser {
 public:
        css_tokeniser() = delete;
        css_tokeniser(rspamd_mempool_t *pool, const std::string_view &sv) :
                        input(sv), offset(0), pool(pool) {}
 
-       auto next_token(void) -> std::pair<css_parser_token, std::string_view>;
+       auto next_token(void) -> struct css_parser_token;
+       auto get_offset(void) const { return offset; }
 private:
        std::string_view input;
        std::size_t offset;