static auto
html_process_url_tag(rspamd_mempool_t *pool,
struct html_tag *tag,
- struct html_content *hc) -> std::optional<struct rspamd_url *>
+ struct html_content *hc,
+ lua_State *L) -> std::optional<struct rspamd_url *>
{
auto found_href_maybe = tag->find_href();
}
}
- auto url = html_process_url(pool, href_value).value_or(nullptr);
+ auto url = html_process_url(pool, href_value, L).value_or(nullptr);
if (url) {
if (tag->id != Tag_A) {
static void
html_process_query_url(rspamd_mempool_t *pool, struct rspamd_url *url,
khash_t(rspamd_url_hash) * url_set,
- GPtrArray *part_urls)
+ GPtrArray *part_urls,
+ lua_State *L)
{
if (url->querylen > 0) {
struct rspamd_html_url_query_cbd qcbd;
rspamd_url_find_multiple(pool,
rspamd_url_query_unsafe(url), url->querylen,
RSPAMD_URL_FIND_ALL, NULL,
- html_url_query_callback, &qcbd, NULL);
+ html_url_query_callback, &qcbd, L);
}
if (part_urls) {
if (img->src) {
std::string_view cpy{*href_value};
- auto maybe_url = html_process_url(pool, cpy);
+ auto maybe_url = html_process_url(pool, cpy, L);
if (maybe_url) {
img->url = maybe_url.value();
const struct html_tag *cur_tag,
GList **exceptions,
khash_t(rspamd_url_hash) * url_set,
- goffset dest_offset) -> void
+ goffset dest_offset,
+ lua_State *L) -> void
{
if (std::holds_alternative<rspamd_url *>(cur_tag->extra)) {
exceptions, url_set,
data,
dest_offset,
- url);
+ url, L);
}
}
html_process_displayed_href_tag(pool, hc,
{hc->parsed.data() + initial_parsed_offset, std::size_t(written_len)},
tag, exceptions,
- url_set, initial_parsed_offset);
+ url_set, initial_parsed_offset,
+ task->cfg ? task->cfg->lua_state : NULL);
/* Count display URL mismatches when URL is present */
if (std::holds_alternative<rspamd_url *>(tag->extra)) {
auto *u = std::get<rspamd_url *>(tag->extra);
/* If action present and absolute, compare eTLD+1 with first-party */
if (auto href = cur_tag->find_href()) {
if (html_is_absolute_url(*href)) {
- auto maybe_url = html_process_url(pool, *href);
+ auto maybe_url = html_process_url(pool, *href,
+ task->cfg ? task->cfg->lua_state : NULL);
if (maybe_url) {
struct rspamd_url *u = maybe_url.value();
if (u->hostlen > 0) {
if (!urlv.empty()) {
/* validate and count; do not add to urls set */
- auto maybe_url = html_process_url(pool, urlv);
+ auto maybe_url = html_process_url(pool, urlv,
+ task->cfg ? task->cfg->lua_state : NULL);
if (maybe_url) {
hc->features.meta_refresh_urls++;
}
}
if (cur_tag->flags & FL_HREF && html_document_state == html_document_state::body) {
- auto maybe_url = html_process_url_tag(pool, cur_tag, hc);
+ auto maybe_url = html_process_url_tag(pool, cur_tag, hc,
+ task->cfg ? task->cfg->lua_state : NULL);
if (maybe_url.has_value()) {
url = maybe_url.value();
}
url->part_order = cur_url_part_order++;
html_process_query_url(pool, url, url_set,
- part_urls);
+ part_urls,
+ task->cfg ? task->cfg->lua_state : NULL);
}
else {
url = maybe_existing;
/*
* Base is allowed only within head tag but HTML is retarded
*/
- auto maybe_url = html_process_url_tag(pool, cur_tag, hc);
+ auto maybe_url = html_process_url_tag(pool, cur_tag, hc,
+ task->cfg ? task->cfg->lua_state : NULL);
if (maybe_url) {
msg_debug_html("got valid base tag");
text_url = rspamd_mempool_alloc0_type(pool, struct rspamd_url);
auto rc = rspamd_url_parse(text_url, url_str, strlen(url_str), pool,
- RSPAMD_URL_PARSE_TEXT, NULL);
+ RSPAMD_URL_PARSE_TEXT, L);
if (rc == URI_ERRNO_OK) {
text_url->flags |= RSPAMD_URL_FLAG_HTML_DISPLAYED;
void *url_set,
std::string_view visible_part,
goffset href_offset,
- struct rspamd_url *url)
+ struct rspamd_url *url,
+ lua_State *L)
{
struct rspamd_url *displayed_url = nullptr;
struct rspamd_url *turl;
rspamd_normalise_unicode_inplace(url->ext->visible_part, &dlen);
}
-auto html_process_url(rspamd_mempool_t *pool, std::string_view &input)
+auto html_process_url(rspamd_mempool_t *pool, std::string_view &input, lua_State *L)
-> std::optional<struct rspamd_url *>
{
struct rspamd_url *url;
url = rspamd_mempool_alloc0_type(pool, struct rspamd_url);
rspamd_url_normalise_propagate_flags(pool, decoded, &dlen, saved_flags);
- rc = rspamd_url_parse(url, decoded, dlen, pool, RSPAMD_URL_PARSE_HREF, NULL);
+ rc = rspamd_url_parse(url, decoded, dlen, pool, RSPAMD_URL_PARSE_HREF, L);
/* Filter some completely damaged urls */
if (rc == URI_ERRNO_OK && url->hostlen > 0 &&