From: Vsevolod Stakhov Date: Mon, 25 Jan 2021 16:35:23 +0000 (+0000) Subject: [Project] Css: Rework tokens structure X-Git-Tag: 3.0~730 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=fcfaab40b8ea772ce9d72773930c329a6277da6d;p=thirdparty%2Frspamd.git [Project] Css: Rework tokens structure --- diff --git a/src/libserver/css/css_parser.cxx b/src/libserver/css/css_parser.cxx index 207cfcb9d3..68f03cdfab 100644 --- a/src/libserver/css/css_parser.cxx +++ b/src/libserver/css/css_parser.cxx @@ -94,16 +94,16 @@ bool css_parser::consume_input(const std::string_view &sv) css_tokeniser css_tokeniser(pool, sv); while (!eof) { - auto token_pair = css_tokeniser.next_token(); + auto next_token = css_tokeniser.next_token(); /* Top level parser */ - switch (token_pair.first) { - case css_parser_token::eof_token: + switch (next_token.type) { + case css_parser_token::token_type::eof_token: eof = true; break; - case css_parser_token::whitespace_token: - case css_parser_token::cdc_token: - case css_parser_token::cdo_token: + case css_parser_token::token_type::whitespace_token: + case css_parser_token::token_type::cdc_token: + case css_parser_token::token_type::cdo_token: /* Ignore tokens */ break; } diff --git a/src/libserver/css/css_tokeniser.cxx b/src/libserver/css/css_tokeniser.cxx index 40f202b01b..058f7504e7 100644 --- a/src/libserver/css/css_tokeniser.cxx +++ b/src/libserver/css/css_tokeniser.cxx @@ -19,8 +19,46 @@ namespace rspamd::css { +/* Helpers to create tokens */ -auto css_tokeniser::next_token (void) -> std::pair +/* + * This helper is intended to create tokens either with a tag and value + * or with just a tag. + */ +template +auto make_token(const Args&... args) -> css_parser_token; + +template<> +auto make_token(const std::string_view &s) + -> css_parser_token +{ + return css_parser_token{css_parser_token::token_type::string_token, s}; +} + +template<> +auto make_token(const std::string_view &s) + -> css_parser_token +{ + return css_parser_token{css_parser_token::token_type::whitespace_token, s}; +} + +template<> +auto make_token(const char &c) + -> css_parser_token +{ + return css_parser_token{css_parser_token::token_type::delim_token, c}; +} + +/* + * Generic tokens with no value (non-terminals) + */ +template +auto make_token(void) -> css_parser_token +{ + return css_parser_token{T, css_parser_token_placeholder()}; +} + +auto css_tokeniser::next_token(void) -> struct css_parser_token { /* Helpers */ @@ -29,7 +67,7 @@ auto css_tokeniser::next_token (void) -> std::pair std::pair auto { + auto consume_string = [this](auto quote_char) -> auto { auto i = offset; bool need_unescape = false; @@ -122,8 +160,7 @@ auto css_tokeniser::next_token (void) -> std::pair(c); } break; case ' ': @@ -136,48 +173,41 @@ auto css_tokeniser::next_token (void) -> std::pair( + std::string_view(&input[offset], i - offset)); offset = i; return ret; } case '"': case '\'': offset = i + 1; - return std::make_pair (css_parser_token::string_token, - consume_string (c)); + return make_token(consume_string(c)); case '(': offset = i + 1; - return std::make_pair (css_parser_token::obrace_token, - std::string_view (&input[offset - 1], 1)); + return make_token(); case ')': offset = i + 1; - return std::make_pair (css_parser_token::ebrace_token, - std::string_view (&input[offset - 1], 1)); + return make_token(); case ',': - offset = i + 1; - return std::make_pair (css_parser_token::comma_token, - std::string_view (&input[offset - 1], 1)); + return make_token(); case '<': /* Maybe an xml like comment */ if (i + 3 < input.size () && input[i + 1] == '!' && input[i + 2] == '-' && input[i + 3] == '-') { offset += 3; - return std::make_pair (css_parser_token::cdo_token, - std::string_view (&input[offset - 3], 3)); + return make_token(); } else { offset = i + 1; - return std::make_pair (css_parser_token::delim_token, - std::string_view (&input[offset - 1], 1)); + return make_token(c); } break; } } - return std::make_pair (css_parser_token::eof_token, std::string_view ()); + return make_token(); } } \ No newline at end of file diff --git a/src/libserver/css/css_tokeniser.hxx b/src/libserver/css/css_tokeniser.hxx index 4c68243897..cff5877c23 100644 --- a/src/libserver/css/css_tokeniser.hxx +++ b/src/libserver/css/css_tokeniser.hxx @@ -21,41 +21,67 @@ #include #include +#include #include "mem_pool.h" namespace rspamd::css { -enum class css_parser_token { - whitespace_token, - ident_token, - function_token, - at_keyword_token, - hash_token, - string_token, - number_token, - url_token, - dimension_token, - percentage_token, - cdo_token, /* xml open comment */ - cdc_token, /* xml close comment */ - delim_token, - obrace_token, /* ( */ - ebrace_token, /* ) */ - osqbrace_token, /* [ */ - esqbrace_token, /* ] */ - comma_token, - colon_token, - semicolon_token, - eof_token, +struct css_parser_token_placeholder {}; /* For empty tokens */ + +struct css_parser_token { + enum class token_type : std::uint8_t { + whitespace_token, + ident_token, + function_token, + at_keyword_token, + hash_token, + string_token, + number_token, + url_token, + dimension_token, + percentage_token, + cdo_token, /* xml open comment */ + cdc_token, /* xml close comment */ + delim_token, + obrace_token, /* ( */ + ebrace_token, /* ) */ + osqbrace_token, /* [ */ + esqbrace_token, /* ] */ + comma_token, + colon_token, + semicolon_token, + eof_token, + }; + + static const std::uint8_t default_flags = 0; + static const std::uint8_t flag_bad_string = (1u << 0u); + using value_type = std::variant; + + /* Typed storage */ + value_type value; + token_type type; + std::uint8_t flags = default_flags; + + css_parser_token() = delete; + explicit css_parser_token(token_type type, const value_type &value) : + value(value), type(type) {} }; +/* Ensure that parser tokens are simple enough */ +static_assert(std::is_trivially_copyable_v); + class css_tokeniser { public: css_tokeniser() = delete; css_tokeniser(rspamd_mempool_t *pool, const std::string_view &sv) : input(sv), offset(0), pool(pool) {} - auto next_token(void) -> std::pair; + auto next_token(void) -> struct css_parser_token; + auto get_offset(void) const { return offset; } private: std::string_view input; std::size_t offset;