*/
#include "css_selector.hxx"
+#include "fmt/core.h"
namespace rspamd::css {
-> selectors_vec
{
selectors_vec ret;
+ bool can_continue = true;
+ enum class selector_process_state {
+ selector_parse_start = 0,
+ selector_expect_ident,
+ selector_ident_consumed,
+ selector_ignore_attribute,
+ selector_ignore_function,
+ selector_ignore_combination
+ } state = selector_process_state::selector_parse_start;
+ std::unique_ptr<css_selector> cur_selector;
+
+
+ while (can_continue) {
+ const auto &next_tok = next_token_functor();
+
+ if (next_tok.tag == css_consumed_block::parser_tag_type::css_component) {
+ const auto &parser_tok = next_tok.get_token_or_empty();
+
+ if (state == selector_process_state::selector_parse_start) {
+ /*
+ * At the beginning of the parsing we can expect either
+ * delim or an ident, everything else is discarded for now
+ */
+ msg_debug_css("start consume selector");
+
+ switch (parser_tok.type) {
+ case css_parser_token::token_type::delim_token: {
+ auto delim_c = parser_tok.get_delim();
+
+ if (delim_c == '.') {
+ cur_selector = std::make_unique<css_selector>(
+ css_selector::selector_type::SELECTOR_CLASS);
+ state = selector_process_state::selector_expect_ident;
+ }
+ else if (delim_c == '#') {
+ cur_selector = std::make_unique<css_selector>(
+ css_selector::selector_type::SELECTOR_ID);
+ state = selector_process_state::selector_expect_ident;
+ }
+ else if (delim_c == '*') {
+ cur_selector = std::make_unique<css_selector>(
+ css_selector::selector_type::SELECTOR_ALL);
+ state = selector_process_state::selector_ident_consumed;
+ }
+ break;
+ }
+ case css_parser_token::token_type::ident_token:
+ cur_selector = std::make_unique<css_selector>(
+ css_selector::selector_type::SELECTOR_ELEMENT);
+ cur_selector->value = parser_tok.get_string_or_default("");
+ state = selector_process_state::selector_ident_consumed;
+ break;
+ case css_parser_token::token_type::hash_token:
+ cur_selector = std::make_unique<css_selector>(
+ css_selector::selector_type::SELECTOR_ID);
+ cur_selector->value =
+ parser_tok.get_string_or_default("").substr(1);
+ state = selector_process_state::selector_ident_consumed;
+ break;
+ default:
+ msg_debug_css("cannot consume more of a selector, invalid parser token: %*s; expected start",
+ next_tok.token_type_str());
+ can_continue = false;
+ break;
+ }
+ }
+ else if (state == selector_process_state::selector_expect_ident) {
+ /*
+ * We got something like a selector start, so we expect
+ * a plain ident
+ */
+ if (parser_tok.type == css_parser_token::token_type::ident_token && cur_selector) {
+ cur_selector->value = parser_tok.get_string_or_default("");
+ state = selector_process_state::selector_ident_consumed;
+ }
+ else {
+ msg_debug_css("cannot consume more of a selector, invalid parser token: %*s; expected ident",
+ next_tok.token_type_str());
+ can_continue = false;
+ }
+ }
+ else if (state == selector_process_state::selector_ident_consumed) {
+ if (parser_tok.type == css_parser_token::token_type::comma_token) {
+ /* Got full selector, attach it to the vector and go further */
+ msg_debug_css("attached selector: %s", cur_selector->debug_str().c_str());
+ ret.push_back(std::move(cur_selector));
+ state = selector_process_state::selector_parse_start;
+ }
+ else if (parser_tok.type == css_parser_token::token_type::semicolon_token) {
+ /* TODO: implement adjustments */
+ state = selector_process_state::selector_ignore_function;
+ }
+ else if (parser_tok.type == css_parser_token::token_type::osqbrace_token) {
+ /* TODO: implement attributes checks */
+ state = selector_process_state::selector_ignore_attribute;
+ }
+ else {
+ /* TODO: implement selectors combinations */
+ state = selector_process_state::selector_ignore_combination;
+ }
+ }
+ else {
+ /* Ignore state; ignore all till ',' token or eof token */
+ if (parser_tok.type == css_parser_token::token_type::comma_token) {
+ /* Got full selector, attach it to the vector and go further */
+ ret.push_back(std::move(cur_selector));
+ state = selector_process_state::selector_parse_start;
+ }
+ else {
+ auto debug_str = parser_tok.get_string_or_default("");
+ msg_debug_css("ignore token %*s", (int)debug_str.size(),
+ debug_str.data());
+ }
+ }
+ }
+ else {
+ /* End of parsing */
+ if (state == selector_process_state::selector_ident_consumed && cur_selector) {
+ msg_debug_css("attached selector: %s", cur_selector->debug_str().c_str());
+ ret.push_back(std::move(cur_selector));
+ }
+ can_continue = false;
+ }
+
+ }
return ret; /* copy elision */
}
+auto
+css_selector::debug_str() const -> std::string
+{
+ std::string ret;
+
+ if (type == selector_type::SELECTOR_ID) {
+ ret += "#";
+ }
+ else if (type == selector_type::SELECTOR_CLASS) {
+ ret += ".";
+ }
+ else if (type == selector_type::SELECTOR_ALL) {
+ ret = "*";
+
+ return ret;
+ }
+
+ std::visit([&](auto arg) -> void {
+ using T = std::decay_t<decltype(arg)>;
+
+ if constexpr (std::is_same_v<T, tag_id_t>) {
+ ret += fmt::format("tag: {}", static_cast<int>(arg));
+ }
+ else {
+ ret += arg;
+ }
+ }, value);
+
+ return ret;
+}
+
}
*/
struct css_selector {
enum class selector_type {
- SELECTOR_ELEMENT, /* e.g. .tr, for this value we use tag_id_t */
- SELECTOR_CLASS, /* generic class */
- SELECTOR_ID /* e.g. #id */
+ SELECTOR_ELEMENT, /* e.g. tr, for this value we use tag_id_t */
+ SELECTOR_CLASS, /* generic class, e.g. .class */
+ SELECTOR_ID, /* e.g. #id */
+ SELECTOR_ALL /* * selector */
};
selector_type type;
- std::variant<tag_id_t, std::string> value;
+ std::variant<tag_id_t, std::string_view> value;
+
+ /* Conditions for the css selector */
+ /* Dependency on attributes */
+ struct css_attribute_condition {
+ std::string_view attribute;
+ std::string_view op = "";
+ std::string_view value = "";
+ };
+
+ /* General dependency chain */
+ using css_selector_ptr = std::unique_ptr<css_selector>;
+ using css_selector_dep = std::variant<css_attribute_condition, css_selector_ptr>;
+ std::vector<css_selector_dep> dependencies;
auto to_tag(void) const -> std::optional<tag_id_t> {
if (type == selector_type::SELECTOR_ELEMENT) {
auto to_string(void) const -> std::optional<const std::string_view> {
if (type == selector_type::SELECTOR_ELEMENT) {
- return std::string_view(std::get<std::string>(value));
+ return std::string_view(std::get<std::string_view>(value));
}
return std::nullopt;
};
+
+ explicit css_selector(selector_type t) : type(t) {}
+
+ auto debug_str(void) const -> std::string;
};
+
using selectors_vec = std::vector<std::unique_ptr<css_selector>>;
/*