ret = TRUE;
}
}
+ else if (tag->id == Tag_IMG) {
+ /* Check width and height if presented */
+ if (len == 5 && g_ascii_strncasecmp (begin, "width", len) == 0) {
+ comp = rspamd_mempool_alloc (pool, sizeof (*comp));
+ comp->type = RSPAMD_HTML_COMPONENT_WIDTH;
+ comp->start = NULL;
+ comp->len = 0;
+ tag->params = g_list_prepend (tag->params, comp);
+ ret = TRUE;
+ }
+ else if (len == 5 && g_ascii_strncasecmp (begin, "height", len) == 0) {
+ comp = rspamd_mempool_alloc (pool, sizeof (*comp));
+ comp->type = RSPAMD_HTML_COMPONENT_HEIGHT;
+ comp->start = NULL;
+ comp->len = 0;
+ tag->params = g_list_prepend (tag->params, comp);
+ ret = TRUE;
+ }
+ }
return ret;
}
return NULL;
}
+static void
+rspamd_html_process_img_tag (rspamd_mempool_t *pool, struct html_tag *tag,
+ struct html_content *hc)
+{
+ struct html_tag_component *comp;
+ struct html_image *img;
+ rspamd_fstring_t fstr;
+ GList *cur;
+ gulong val;
+
+ cur = tag->params;
+ img = rspamd_mempool_alloc0 (pool, sizeof (*img));
+
+ while (cur) {
+ comp = cur->data;
+
+ if (comp->type == RSPAMD_HTML_COMPONENT_HREF && comp->len > 0) {
+ fstr.begin = (gchar *)comp->start;
+ fstr.len = comp->len;
+ img->src = rspamd_mempool_fstrdup (pool, &fstr);
+
+ if (comp->len > sizeof ("cid:") - 1 && memcmp (comp->start,
+ "cid:", sizeof ("cid:") - 1) == 0) {
+ /* We have an embedded image */
+ img->flags |= RSPAMD_HTML_FLAG_IMAGE_EMBEDDED;
+ }
+ else {
+ img->flags |= RSPAMD_HTML_FLAG_IMAGE_EXTERNAL;
+ }
+ }
+ else if (comp->type == RSPAMD_HTML_COMPONENT_HEIGHT) {
+ if (rspamd_strtoul (comp->start, comp->len, &val)) {
+ img->height = val;
+ }
+ }
+ else if (comp->type == RSPAMD_HTML_COMPONENT_WIDTH) {
+ if (rspamd_strtoul (comp->start, comp->len, &val)) {
+ img->width = val;
+ }
+ }
+ }
+
+ if (hc->images == NULL) {
+ hc->images = g_ptr_array_sized_new (4);
+ rspamd_mempool_add_destructor (pool, rspamd_ptr_array_free_hard,
+ hc->images);
+ }
+
+ g_ptr_array_add (hc->images, img);
+}
+
GByteArray*
rspamd_html_process_part_full (rspamd_mempool_t *pool, struct html_content *hc,
GByteArray *in, GList **exceptions, GHashTable *urls, GHashTable *emails)
target_tbl = urls;
}
- turl = g_hash_table_lookup (target_tbl, url);
-
- if (turl != NULL && turl->phished_url == NULL) {
- g_hash_table_insert (target_tbl, url, url);
- }
- else if (turl == NULL) {
- g_hash_table_insert (target_tbl, url, url);
- }
- else {
- url = NULL;
+ if (target_tbl != NULL) {
+ turl = g_hash_table_lookup (target_tbl, url);
+
+ if (turl != NULL && turl->phished_url == NULL) {
+ g_hash_table_insert (target_tbl, url, url);
+ }
+ else if (turl == NULL) {
+ g_hash_table_insert (target_tbl, url, url);
+ }
+ else {
+ url = NULL;
+ }
}
href_offset = dest->len;
url = NULL;
}
}
+ else if (cur_tag->id == Tag_IMG && !(cur_tag->flags & FL_CLOSING)) {
+ rspamd_html_process_img_tag (pool, cur_tag, hc);
+ }
}
else {
state = content_write;
#include "config.h"
#include "mem_pool.h"
+/*
+ * HTML content flags
+ */
#define RSPAMD_HTML_FLAG_BAD_START (1 << 0)
#define RSPAMD_HTML_FLAG_BAD_ELEMENTS (1 << 1)
#define RSPAMD_HTML_FLAG_XML (1 << 2)
#define RSPAMD_HTML_FLAG_UNKNOWN_ELEMENTS (1 << 4)
#define RSPAMD_HTML_FLAG_DUPLICATE_ELEMENTS (1 << 5)
+/*
+ * Image flags
+ */
+#define RSPAMD_HTML_FLAG_IMAGE_EMBEDDED (1 << 0)
+#define RSPAMD_HTML_FLAG_IMAGE_EXTERNAL (1 << 1)
+
enum html_component_type {
RSPAMD_HTML_COMPONENT_NAME = 0,
RSPAMD_HTML_COMPONENT_HREF,
guint len;
};
+struct html_image {
+ guint height;
+ guint width;
+ guint flags;
+ gchar *src;
+};
+
struct html_tag {
gint id;
struct html_tag_component name;
GNode *html_tags;
gint flags;
guchar *tags_seen;
+ GPtrArray *images;
};
/*