css_tokeniser css_tokeniser(pool, sv);
while (!eof) {
- auto token_pair = css_tokeniser.next_token();
+ auto next_token = css_tokeniser.next_token();
/* Top level parser */
- switch (token_pair.first) {
- case css_parser_token::eof_token:
+ switch (next_token.type) {
+ case css_parser_token::token_type::eof_token:
eof = true;
break;
- case css_parser_token::whitespace_token:
- case css_parser_token::cdc_token:
- case css_parser_token::cdo_token:
+ case css_parser_token::token_type::whitespace_token:
+ case css_parser_token::token_type::cdc_token:
+ case css_parser_token::token_type::cdo_token:
/* Ignore tokens */
break;
}
namespace rspamd::css {
+/* Helpers to create tokens */
-auto css_tokeniser::next_token (void) -> std::pair<css_parser_token, std::string_view>
+/*
+ * This helper is intended to create tokens either with a tag and value
+ * or with just a tag.
+ */
+template<css_parser_token::token_type T, typename ...Args>
+auto make_token(const Args&... args) -> css_parser_token;
+
+template<>
+auto make_token<css_parser_token::token_type::string_token, std::string_view>(const std::string_view &s)
+ -> css_parser_token
+{
+ return css_parser_token{css_parser_token::token_type::string_token, s};
+}
+
+template<>
+auto make_token<css_parser_token::token_type::whitespace_token, std::string_view>(const std::string_view &s)
+ -> css_parser_token
+{
+ return css_parser_token{css_parser_token::token_type::whitespace_token, s};
+}
+
+template<>
+auto make_token<css_parser_token::token_type::delim_token, char>(const char &c)
+ -> css_parser_token
+{
+ return css_parser_token{css_parser_token::token_type::delim_token, c};
+}
+
+/*
+ * Generic tokens with no value (non-terminals)
+ */
+template<css_parser_token::token_type T>
+auto make_token(void) -> css_parser_token
+{
+ return css_parser_token{T, css_parser_token_placeholder()};
+}
+
+auto css_tokeniser::next_token(void) -> struct css_parser_token
{
/* Helpers */
* offset is set to the next character after a comment (or eof)
* Nothing is returned
*/
- auto consume_comment = [this] () {
+ auto consume_comment = [this]() {
auto i = offset;
auto nested = 0;
* is set one character after the string. Css unescaping is done automatically
* Accepts a quote char to find end of string
*/
- auto consume_string = [this] (auto quote_char) -> auto {
+ auto consume_string = [this](auto quote_char) -> auto {
auto i = offset;
bool need_unescape = false;
}
else {
offset = i + 1;
- return std::make_pair (css_parser_token::delim_token,
- std::string_view (&input[offset - 1], 1));
+ return make_token<css_parser_token::token_type::delim_token>(c);
}
break;
case ' ':
c = input[++i];
} while (i < input.size () && g_ascii_isspace (c));
- auto ret = std::make_pair (css_parser_token::whitespace_token,
- std::string_view (&input[offset], i - offset));
+ auto ret = make_token<css_parser_token::token_type::whitespace_token>(
+ std::string_view(&input[offset], i - offset));
offset = i;
return ret;
}
case '"':
case '\'':
offset = i + 1;
- return std::make_pair (css_parser_token::string_token,
- consume_string (c));
+ return make_token<css_parser_token::token_type::string_token>(consume_string(c));
case '(':
offset = i + 1;
- return std::make_pair (css_parser_token::obrace_token,
- std::string_view (&input[offset - 1], 1));
+ return make_token<css_parser_token::token_type::obrace_token>();
case ')':
offset = i + 1;
- return std::make_pair (css_parser_token::ebrace_token,
- std::string_view (&input[offset - 1], 1));
+ return make_token<css_parser_token::token_type::ebrace_token>();
case ',':
- offset = i + 1;
- return std::make_pair (css_parser_token::comma_token,
- std::string_view (&input[offset - 1], 1));
+ return make_token<css_parser_token::token_type::comma_token>();
case '<':
/* Maybe an xml like comment */
if (i + 3 < input.size () && input[i + 1] == '!'
&& input[i + 2] == '-' && input[i + 3] == '-') {
offset += 3;
- return std::make_pair (css_parser_token::cdo_token,
- std::string_view (&input[offset - 3], 3));
+ return make_token<css_parser_token::token_type::cdo_token>();
}
else {
offset = i + 1;
- return std::make_pair (css_parser_token::delim_token,
- std::string_view (&input[offset - 1], 1));
+ return make_token<css_parser_token::token_type::delim_token>(c);
}
break;
}
}
- return std::make_pair (css_parser_token::eof_token, std::string_view ());
+ return make_token<css_parser_token::token_type::eof_token>();
}
}
\ No newline at end of file
#include <string_view>
#include <utility>
+#include <variant>
#include "mem_pool.h"
namespace rspamd::css {
-enum class css_parser_token {
- whitespace_token,
- ident_token,
- function_token,
- at_keyword_token,
- hash_token,
- string_token,
- number_token,
- url_token,
- dimension_token,
- percentage_token,
- cdo_token, /* xml open comment */
- cdc_token, /* xml close comment */
- delim_token,
- obrace_token, /* ( */
- ebrace_token, /* ) */
- osqbrace_token, /* [ */
- esqbrace_token, /* ] */
- comma_token,
- colon_token,
- semicolon_token,
- eof_token,
+struct css_parser_token_placeholder {}; /* For empty tokens */
+
+struct css_parser_token {
+ enum class token_type : std::uint8_t {
+ whitespace_token,
+ ident_token,
+ function_token,
+ at_keyword_token,
+ hash_token,
+ string_token,
+ number_token,
+ url_token,
+ dimension_token,
+ percentage_token,
+ cdo_token, /* xml open comment */
+ cdc_token, /* xml close comment */
+ delim_token,
+ obrace_token, /* ( */
+ ebrace_token, /* ) */
+ osqbrace_token, /* [ */
+ esqbrace_token, /* ] */
+ comma_token,
+ colon_token,
+ semicolon_token,
+ eof_token,
+ };
+
+ static const std::uint8_t default_flags = 0;
+ static const std::uint8_t flag_bad_string = (1u << 0u);
+ using value_type = std::variant<std::string_view, /* For strings and string like tokens */
+ char, /* For delimiters (might need to move to unicode point) */
+ double, /* For numeric stuff */
+ css_parser_token_placeholder /* For general no token stuff */
+ >;
+
+ /* Typed storage */
+ value_type value;
+ token_type type;
+ std::uint8_t flags = default_flags;
+
+ css_parser_token() = delete;
+ explicit css_parser_token(token_type type, const value_type &value) :
+ value(value), type(type) {}
};
+/* Ensure that parser tokens are simple enough */
+static_assert(std::is_trivially_copyable_v<css_parser_token>);
+
class css_tokeniser {
public:
css_tokeniser() = delete;
css_tokeniser(rspamd_mempool_t *pool, const std::string_view &sv) :
input(sv), offset(0), pool(pool) {}
- auto next_token(void) -> std::pair<css_parser_token, std::string_view>;
+ auto next_token(void) -> struct css_parser_token;
+ auto get_offset(void) const { return offset; }
private:
std::string_view input;
std::size_t offset;