From 5b703a46b4b06dcdc2d1910940cd3ea105ab7e39 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Mon, 7 Jun 2021 15:13:55 +0100 Subject: [PATCH] [Rework] Html: Add traverse function --- src/libserver/html/html.cxx | 2 +- src/libserver/html/html.hxx | 46 ++++++++++++++- src/lua/lua_html.cxx | 111 ++++++++++++++---------------------- 3 files changed, 86 insertions(+), 73 deletions(-) diff --git a/src/libserver/html/html.cxx b/src/libserver/html/html.cxx index 40ef240d5d..a459ee0c60 100644 --- a/src/libserver/html/html.cxx +++ b/src/libserver/html/html.cxx @@ -108,7 +108,7 @@ html_check_balance(struct html_tag *tag, } /* Misuse */ - return false; + RSPAMD_UNREACHABLE; } static auto diff --git a/src/libserver/html/html.hxx b/src/libserver/html/html.hxx index c3b65a06f5..5624f5d2e7 100644 --- a/src/libserver/html/html.hxx +++ b/src/libserver/html/html.hxx @@ -26,6 +26,7 @@ #include #include +#include "function2/function2.hpp" namespace rspamd::html { @@ -64,12 +65,51 @@ struct html_content { return static_cast(ptr); } -private: - ~html_content() { - g_node_destroy(html_tags); + enum class traverse_type { + PRE_ORDER, + POST_ORDER + }; + auto traverse_tags(fu2::function &&func, + traverse_type how = traverse_type::PRE_ORDER) const -> bool { + + auto rec_functor_pre_order = [&](const html_tag *root, auto &&rec) -> bool { + if (func(root)) { + + for (const auto *c : root->children) { + if (!rec(c, rec)) { + return false; + } + } + + return true; + } + return false; + }; + auto rec_functor_post_order = [&](const html_tag *root, auto &&rec) -> bool { + for (const auto *c : root->children) { + if (!rec(c, rec)) { + return false; + } + } + + return func(root); + }; + + switch(how) { + case traverse_type::PRE_ORDER: + return rec_functor_pre_order(root_tag, rec_functor_pre_order); + case traverse_type::POST_ORDER: + return rec_functor_post_order(root_tag, rec_functor_post_order); + default: + RSPAMD_UNREACHABLE; + } } + +private: + ~html_content() = default; }; + } #endif //RSPAMD_HTML_HXX diff --git a/src/lua/lua_html.cxx b/src/lua/lua_html.cxx index 91df192a91..76ca56de5f 100644 --- a/src/lua/lua_html.cxx +++ b/src/lua/lua_html.cxx @@ -191,7 +191,7 @@ lua_check_html (lua_State * L, gint pos) struct lua_html_tag { rspamd::html::html_content *html; - rspamd::html::html_tag *tag; + const rspamd::html::html_tag *tag; }; static struct lua_html_tag * @@ -427,72 +427,23 @@ lua_html_get_blocks (lua_State *L) return 1; } -struct lua_html_traverse_ud { - lua_State *L; - rspamd::html::html_content *html; - gint cbref; - robin_hood::unordered_flat_set tags; - gboolean any; -}; - -static gboolean -lua_html_node_foreach_cb (GNode *n, gpointer d) -{ - struct lua_html_traverse_ud *ud = (struct lua_html_traverse_ud *)d; - auto *tag = (rspamd::html::html_tag *)n->data; - struct lua_html_tag *ltag; - - if (tag && (ud->any || ud->tags.contains(tag->id))) { - - lua_rawgeti (ud->L, LUA_REGISTRYINDEX, ud->cbref); - - ltag = static_cast(lua_newuserdata(ud->L, sizeof(*ltag))); - ltag->tag = tag; - ltag->html = ud->html; - rspamd_lua_setclass (ud->L, "rspamd{html_tag}", -1); - lua_pushinteger (ud->L, tag->content_length); - - /* Leaf flag */ - if (g_node_first_child (n)) { - lua_pushboolean (ud->L, false); - } - else { - lua_pushboolean (ud->L, true); - } - - if (lua_pcall (ud->L, 3, 1, 0) != 0) { - msg_err ("error in foreach_tag callback: %s", lua_tostring (ud->L, -1)); - lua_pop (ud->L, 1); - return TRUE; - } - if (lua_toboolean (ud->L, -1)) { - lua_pop (ud->L, 1); - return TRUE; - } - - lua_pop (ud->L, 1); - } - - return FALSE; -} static gint lua_html_foreach_tag (lua_State *L) { LUA_TRACE_POINT; auto *hc = lua_check_html (L, 1); - struct lua_html_traverse_ud ud; const gchar *tagname; gint id; + auto any = false; + robin_hood::unordered_flat_set tags; - ud.any = FALSE; - ud.html = hc; if (lua_type (L, 2) == LUA_TSTRING) { tagname = luaL_checkstring (L, 2); if (strcmp (tagname, "any") == 0) { - ud.any = TRUE; + any = true; } else { id = rspamd_html_tag_by_name(tagname); @@ -502,7 +453,7 @@ lua_html_foreach_tag (lua_State *L) } - ud.tags.insert(id); + tags.insert(id); } } else if (lua_type (L, 2) == LUA_TTABLE) { @@ -511,7 +462,7 @@ lua_html_foreach_tag (lua_State *L) for (lua_pushnil (L); lua_next (L, -2); lua_pop (L, 1)) { tagname = luaL_checkstring (L, -1); if (strcmp (tagname, "any") == 0) { - ud.any = TRUE; + any = TRUE; } else { id = rspamd_html_tag_by_name (tagname); @@ -519,25 +470,48 @@ lua_html_foreach_tag (lua_State *L) if (id == -1) { return luaL_error (L, "invalid tagname: %s", tagname); } - ud.tags.insert(id); + tags.insert(id); } } lua_pop (L, 1); } - if (hc && (ud.any || !ud.tags.empty()) && lua_isfunction (L, 3)) { - if (hc->html_tags) { + if (hc && (any || !tags.empty()) && lua_isfunction (L, 3)) { + hc->traverse_tags([&](const rspamd::html::html_tag *tag) -> bool { + if (tag && (any || tags.contains(tag->id))) { + lua_pushvalue(L, 3); - lua_pushvalue (L, 3); - ud.cbref = luaL_ref (L, LUA_REGISTRYINDEX); - ud.L = L; + auto *ltag = static_cast(lua_newuserdata(L, sizeof(lua_html_tag))); + ltag->tag = tag; + ltag->html = hc; + rspamd_lua_setclass (L, "rspamd{html_tag}", -1); + lua_pushinteger (L, tag->content_length); - g_node_traverse (hc->html_tags, G_PRE_ORDER, G_TRAVERSE_ALL, -1, - lua_html_node_foreach_cb, &ud); + /* Leaf flag */ + if (tag->children.empty()) { + lua_pushboolean (L, true); + } + else { + lua_pushboolean (L, false); + } - luaL_unref (L, LUA_REGISTRYINDEX, ud.cbref); - } + if (lua_pcall (L, 3, 1, 0) != 0) { + msg_err ("error in foreach_tag callback: %s", lua_tostring (L, -1)); + lua_pop (L, 1); + return false; + } + + if (lua_toboolean (L, -1)) { + lua_pop(L, 1); + return false; + } + + lua_pop(L, 1); + } + + return true; + }); } else { return luaL_error (L, "invalid arguments"); @@ -575,14 +549,13 @@ lua_html_tag_get_parent (lua_State *L) { LUA_TRACE_POINT; struct lua_html_tag *ltag = lua_check_html_tag (L, 1), *ptag; - GNode *node; if (ltag != NULL) { - node = ltag->tag->parent; + auto *parent = ltag->tag->parent; - if (node && node->data) { + if (parent) { ptag = static_cast(lua_newuserdata(L, sizeof(*ptag))); - ptag->tag = static_cast(node->data); + ptag->tag = static_cast(parent); ptag->html = ltag->html; rspamd_lua_setclass (L, "rspamd{html_tag}", -1); } -- 2.47.3