]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
[Feature] html: Add HTML5 tag definitions
authorVsevolod Stakhov <vsevolod@rspamd.com>
Thu, 16 Apr 2026 18:38:36 +0000 (19:38 +0100)
committerVsevolod Stakhov <vsevolod@rspamd.com>
Thu, 16 Apr 2026 18:42:24 +0000 (19:42 +0100)
Add 32 HTML5 tags used in modern email (sectioning, media, text-level,
interactive, forms, web components). Notably adds video/audio/source/
track/picture/svg recognition so their URLs and structure are visible
to the parser. Also fixes latent bug where Tag_KEYGEN existed in the
enum but was missing from the defs array.

New tag IDs are appended after Tag_NEXTID so existing tag IDs remain
stable.

src/libserver/html/html_tag_defs.hxx
src/libserver/html/html_tags.h

index 05a5e3cde901a387334b1830aaa121718e5cce2a..88b084cd89e7d970be15db68d18e62ad3d449d3d 100644 (file)
@@ -139,7 +139,42 @@ static const auto html_tag_defs_array = rspamd::array_of(
        TAG_DEF(Tag_UL, "ul", (CM_BLOCK | FL_BLOCK)),
        TAG_DEF(Tag_VAR, "var", (CM_INLINE)),
        TAG_DEF(Tag_XMP, "xmp", (CM_BLOCK)),
-       TAG_DEF(Tag_NEXTID, "nextid", (CM_HEAD | CM_EMPTY)));
+       TAG_DEF(Tag_NEXTID, "nextid", (CM_HEAD | CM_EMPTY)),
+
+       /* HTML5 additions */
+       TAG_DEF(Tag_ARTICLE, "article", (CM_BLOCK | FL_BLOCK)),
+       TAG_DEF(Tag_ASIDE, "aside", (CM_BLOCK | FL_BLOCK)),
+       TAG_DEF(Tag_AUDIO, "audio", (CM_INLINE | FL_HREF)),
+       TAG_DEF(Tag_BDI, "bdi", (CM_INLINE)),
+       TAG_DEF(Tag_CANVAS, "canvas", (CM_INLINE)),
+       TAG_DEF(Tag_DATA, "data", (CM_INLINE)),
+       TAG_DEF(Tag_DATALIST, "datalist", (CM_INLINE)),
+       TAG_DEF(Tag_DETAILS, "details", (CM_BLOCK | FL_BLOCK)),
+       TAG_DEF(Tag_DIALOG, "dialog", (CM_BLOCK | FL_BLOCK)),
+       TAG_DEF(Tag_EMBED, "embed", (CM_INLINE | CM_EMPTY | FL_HREF)),
+       TAG_DEF(Tag_FIGCAPTION, "figcaption", (CM_BLOCK)),
+       TAG_DEF(Tag_FIGURE, "figure", (CM_BLOCK | FL_BLOCK)),
+       TAG_DEF(Tag_FOOTER, "footer", (CM_BLOCK | FL_BLOCK)),
+       TAG_DEF(Tag_HEADER, "header", (CM_BLOCK | FL_BLOCK)),
+       TAG_DEF(Tag_HGROUP, "hgroup", (CM_BLOCK)),
+       TAG_DEF(Tag_KEYGEN, "keygen", (CM_INLINE | CM_EMPTY)),
+       TAG_DEF(Tag_MAIN, "main", (CM_BLOCK | FL_BLOCK)),
+       TAG_DEF(Tag_MARK, "mark", (CM_INLINE)),
+       TAG_DEF(Tag_METER, "meter", (CM_INLINE)),
+       TAG_DEF(Tag_NAV, "nav", (CM_BLOCK | FL_BLOCK)),
+       TAG_DEF(Tag_OUTPUT, "output", (CM_INLINE)),
+       TAG_DEF(Tag_PICTURE, "picture", (CM_INLINE)),
+       TAG_DEF(Tag_PROGRESS, "progress", (CM_INLINE)),
+       TAG_DEF(Tag_SECTION, "section", (CM_BLOCK | FL_BLOCK)),
+       TAG_DEF(Tag_SLOT, "slot", (CM_INLINE)),
+       TAG_DEF(Tag_SOURCE, "source", (CM_INLINE | CM_EMPTY | FL_HREF)),
+       TAG_DEF(Tag_SUMMARY, "summary", (CM_BLOCK)),
+       TAG_DEF(Tag_SVG, "svg", (CM_INLINE)),
+       TAG_DEF(Tag_TEMPLATE, "template", (CM_INLINE | CM_RAW)),
+       TAG_DEF(Tag_TIME, "time", (CM_INLINE)),
+       TAG_DEF(Tag_TRACK, "track", (CM_INLINE | CM_EMPTY | FL_HREF)),
+       TAG_DEF(Tag_VIDEO, "video", (CM_INLINE | FL_HREF)),
+       TAG_DEF(Tag_WBR, "wbr", (CM_INLINE | CM_EMPTY)));
 
 class html_tags_storage {
        ankerl::unordered_dense::map<std::string_view, html_tag_def> tag_by_name;
index c1863149464e40aa31d4857390799ff076b68c17..b5b971bb4459bbf6fcc9128828684ba7f6b0757f 100644 (file)
@@ -125,6 +125,41 @@ typedef enum {
        Tag_VAR,         /**< VAR */
        Tag_XMP,         /**< XMP */
        Tag_NEXTID,      /**< NEXTID */
+
+       /* HTML5 additions */
+       Tag_ARTICLE,    /**< ARTICLE */
+       Tag_ASIDE,      /**< ASIDE */
+       Tag_AUDIO,      /**< AUDIO */
+       Tag_BDI,        /**< BDI */
+       Tag_CANVAS,     /**< CANVAS */
+       Tag_DATA,       /**< DATA */
+       Tag_DATALIST,   /**< DATALIST */
+       Tag_DETAILS,    /**< DETAILS */
+       Tag_DIALOG,     /**< DIALOG */
+       Tag_EMBED,      /**< EMBED */
+       Tag_FIGCAPTION, /**< FIGCAPTION */
+       Tag_FIGURE,     /**< FIGURE */
+       Tag_FOOTER,     /**< FOOTER */
+       Tag_HEADER,     /**< HEADER */
+       Tag_HGROUP,     /**< HGROUP */
+       Tag_MAIN,       /**< MAIN */
+       Tag_MARK,       /**< MARK */
+       Tag_METER,      /**< METER */
+       Tag_NAV,        /**< NAV */
+       Tag_OUTPUT,     /**< OUTPUT */
+       Tag_PICTURE,    /**< PICTURE */
+       Tag_PROGRESS,   /**< PROGRESS */
+       Tag_SECTION,    /**< SECTION */
+       Tag_SLOT,       /**< SLOT */
+       Tag_SOURCE,     /**< SOURCE */
+       Tag_SUMMARY,    /**< SUMMARY */
+       Tag_SVG,        /**< SVG */
+       Tag_TEMPLATE,   /**< TEMPLATE */
+       Tag_TIME,       /**< TIME */
+       Tag_TRACK,      /**< TRACK */
+       Tag_VIDEO,      /**< VIDEO */
+       Tag_WBR,        /**< WBR */
+
        Tag_MAX,
 
        N_TAGS = -1 /**< Must be -1 */