From: Vsevolod Stakhov Date: Thu, 16 Apr 2026 18:38:36 +0000 (+0100) Subject: [Feature] html: Add HTML5 tag definitions X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=36b1ade596622346d4e12dad5e7c98e1a783bf05;p=thirdparty%2Frspamd.git [Feature] html: Add HTML5 tag definitions Add 32 HTML5 tags used in modern email (sectioning, media, text-level, interactive, forms, web components). Notably adds video/audio/source/ track/picture/svg recognition so their URLs and structure are visible to the parser. Also fixes latent bug where Tag_KEYGEN existed in the enum but was missing from the defs array. New tag IDs are appended after Tag_NEXTID so existing tag IDs remain stable. --- diff --git a/src/libserver/html/html_tag_defs.hxx b/src/libserver/html/html_tag_defs.hxx index 05a5e3cde9..88b084cd89 100644 --- a/src/libserver/html/html_tag_defs.hxx +++ b/src/libserver/html/html_tag_defs.hxx @@ -139,7 +139,42 @@ static const auto html_tag_defs_array = rspamd::array_of( TAG_DEF(Tag_UL, "ul", (CM_BLOCK | FL_BLOCK)), TAG_DEF(Tag_VAR, "var", (CM_INLINE)), TAG_DEF(Tag_XMP, "xmp", (CM_BLOCK)), - TAG_DEF(Tag_NEXTID, "nextid", (CM_HEAD | CM_EMPTY))); + TAG_DEF(Tag_NEXTID, "nextid", (CM_HEAD | CM_EMPTY)), + + /* HTML5 additions */ + TAG_DEF(Tag_ARTICLE, "article", (CM_BLOCK | FL_BLOCK)), + TAG_DEF(Tag_ASIDE, "aside", (CM_BLOCK | FL_BLOCK)), + TAG_DEF(Tag_AUDIO, "audio", (CM_INLINE | FL_HREF)), + TAG_DEF(Tag_BDI, "bdi", (CM_INLINE)), + TAG_DEF(Tag_CANVAS, "canvas", (CM_INLINE)), + TAG_DEF(Tag_DATA, "data", (CM_INLINE)), + TAG_DEF(Tag_DATALIST, "datalist", (CM_INLINE)), + TAG_DEF(Tag_DETAILS, "details", (CM_BLOCK | FL_BLOCK)), + TAG_DEF(Tag_DIALOG, "dialog", (CM_BLOCK | FL_BLOCK)), + TAG_DEF(Tag_EMBED, "embed", (CM_INLINE | CM_EMPTY | FL_HREF)), + TAG_DEF(Tag_FIGCAPTION, "figcaption", (CM_BLOCK)), + TAG_DEF(Tag_FIGURE, "figure", (CM_BLOCK | FL_BLOCK)), + TAG_DEF(Tag_FOOTER, "footer", (CM_BLOCK | FL_BLOCK)), + TAG_DEF(Tag_HEADER, "header", (CM_BLOCK | FL_BLOCK)), + TAG_DEF(Tag_HGROUP, "hgroup", (CM_BLOCK)), + TAG_DEF(Tag_KEYGEN, "keygen", (CM_INLINE | CM_EMPTY)), + TAG_DEF(Tag_MAIN, "main", (CM_BLOCK | FL_BLOCK)), + TAG_DEF(Tag_MARK, "mark", (CM_INLINE)), + TAG_DEF(Tag_METER, "meter", (CM_INLINE)), + TAG_DEF(Tag_NAV, "nav", (CM_BLOCK | FL_BLOCK)), + TAG_DEF(Tag_OUTPUT, "output", (CM_INLINE)), + TAG_DEF(Tag_PICTURE, "picture", (CM_INLINE)), + TAG_DEF(Tag_PROGRESS, "progress", (CM_INLINE)), + TAG_DEF(Tag_SECTION, "section", (CM_BLOCK | FL_BLOCK)), + TAG_DEF(Tag_SLOT, "slot", (CM_INLINE)), + TAG_DEF(Tag_SOURCE, "source", (CM_INLINE | CM_EMPTY | FL_HREF)), + TAG_DEF(Tag_SUMMARY, "summary", (CM_BLOCK)), + TAG_DEF(Tag_SVG, "svg", (CM_INLINE)), + TAG_DEF(Tag_TEMPLATE, "template", (CM_INLINE | CM_RAW)), + TAG_DEF(Tag_TIME, "time", (CM_INLINE)), + TAG_DEF(Tag_TRACK, "track", (CM_INLINE | CM_EMPTY | FL_HREF)), + TAG_DEF(Tag_VIDEO, "video", (CM_INLINE | FL_HREF)), + TAG_DEF(Tag_WBR, "wbr", (CM_INLINE | CM_EMPTY))); class html_tags_storage { ankerl::unordered_dense::map tag_by_name; diff --git a/src/libserver/html/html_tags.h b/src/libserver/html/html_tags.h index c186314946..b5b971bb44 100644 --- a/src/libserver/html/html_tags.h +++ b/src/libserver/html/html_tags.h @@ -125,6 +125,41 @@ typedef enum { Tag_VAR, /**< VAR */ Tag_XMP, /**< XMP */ Tag_NEXTID, /**< NEXTID */ + + /* HTML5 additions */ + Tag_ARTICLE, /**< ARTICLE */ + Tag_ASIDE, /**< ASIDE */ + Tag_AUDIO, /**< AUDIO */ + Tag_BDI, /**< BDI */ + Tag_CANVAS, /**< CANVAS */ + Tag_DATA, /**< DATA */ + Tag_DATALIST, /**< DATALIST */ + Tag_DETAILS, /**< DETAILS */ + Tag_DIALOG, /**< DIALOG */ + Tag_EMBED, /**< EMBED */ + Tag_FIGCAPTION, /**< FIGCAPTION */ + Tag_FIGURE, /**< FIGURE */ + Tag_FOOTER, /**< FOOTER */ + Tag_HEADER, /**< HEADER */ + Tag_HGROUP, /**< HGROUP */ + Tag_MAIN, /**< MAIN */ + Tag_MARK, /**< MARK */ + Tag_METER, /**< METER */ + Tag_NAV, /**< NAV */ + Tag_OUTPUT, /**< OUTPUT */ + Tag_PICTURE, /**< PICTURE */ + Tag_PROGRESS, /**< PROGRESS */ + Tag_SECTION, /**< SECTION */ + Tag_SLOT, /**< SLOT */ + Tag_SOURCE, /**< SOURCE */ + Tag_SUMMARY, /**< SUMMARY */ + Tag_SVG, /**< SVG */ + Tag_TEMPLATE, /**< TEMPLATE */ + Tag_TIME, /**< TIME */ + Tag_TRACK, /**< TRACK */ + Tag_VIDEO, /**< VIDEO */ + Tag_WBR, /**< WBR */ + Tag_MAX, N_TAGS = -1 /**< Must be -1 */