From: Vsevolod Stakhov Date: Mon, 3 Oct 2022 22:16:33 +0000 (+0100) Subject: [Fix] Fix emoji joiner FP X-Git-Tag: 3.4~75 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=e5328bd63e30aba25e20fb94a21927a5eef61e50;p=thirdparty%2Frspamd.git [Fix] Fix emoji joiner FP Issue: #4290 --- diff --git a/src/libutil/cxx/utf8_util.cxx b/src/libutil/cxx/utf8_util.cxx index 8c727e9ad6..0e7cd39d7d 100644 --- a/src/libutil/cxx/utf8_util.cxx +++ b/src/libutil/cxx/utf8_util.cxx @@ -85,8 +85,10 @@ rspamd_normalise_unicode_inplace(char *start, size_t *len) if (!zw_spaces.isFrozen()) { /* Add zw spaces to the set */ zw_spaces.add(0x200B); + /* TODO: ZW non joiner, it might be used for ligatures, so it should possibly be excluded as well */ zw_spaces.add(0x200C); - zw_spaces.add(0x200D); + /* See github issue #4290 for explanation. It seems that the ZWJ has many legit use cases */ + //zw_spaces.add(0x200D); zw_spaces.add(0xFEF); zw_spaces.add(0x00AD); zw_spaces.freeze();