--- /dev/null
+/*--------------------------------------------------------------------------
+// Copyright (C) 2021-2021 Cisco and/or its affiliates. All rights reserved.
+//
+// This program is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License Version 2 as published
+// by the Free Software Foundation. You may not use, modify or distribute
+// this program under any other version of the GNU General Public License.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this program; if not, write to the Free Software Foundation, Inc.,
+// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+//--------------------------------------------------------------------------
+// js_tokenizer.l author Oleksandr Serhiienko <oserhiie@cisco.com>
+*/
+
+/* Define JSTokenizer as yyClass */
+%option yyclass="JSTokenizer"
+/* Disable yywrap() generation */
+%option noyywrap
+
+%{
+ #ifdef HAVE_CONFIG_H
+ #include "config.h"
+ #endif
+
+ #include "utils/js_tokenizer.h"
+%}
+
+/* The following grammar was created based on ECMAScript specification */
+/* source https://ecma-international.org/ecma-262/5.1/ */
+
+/* whitespaces */
+/* according to https://ecma-international.org/ecma-262/5.1/#sec-7.2 */
+TAB \x9
+VT \xB
+FF \xC
+SP \x20
+NBSP \xA0
+BOM \xEF\xBB\xBF
+WHITESPACES {TAB}|{VT}|{FF}|{SP}|{NBSP}|{BOM}
+
+/* single char escape sequences */
+/* according to https://ecma-international.org/ecma-262/5.1/#sec-7.8.4 */
+NUL \x0
+BS \x8
+HT \x9
+CHAR_ESCAPE_SEQUENCES {NUL}|{BS}|{HT}
+
+/* line terminators */
+/* according to https://ecma-international.org/ecma-262/5.1/#sec-7.3 */
+LF \xA
+CR \xD
+LS \xE2\x80\xA8
+PS \xE2\x80\xA9
+LINE_TERMINATORS {LF}|{CR}|{LS}|{PS}
+
+/* comments */
+/* according to https://ecma-international.org/ecma-262/5.1/#sec-7.4 */
+SINGLE_LINE_COMMENT "//"
+MULTI_LINE_COMMENT "/\*"
+
+/* directives */
+/* according to https://ecma-international.org/ecma-262/5.1/#sec-14.1 */
+USE_STRICT_DIRECTIVE "\"use strict\"";*|"\'use strict\'";*
+
+/* keywords */
+/* according to https://ecma-international.org/ecma-262/5.1/#sec-7.6.1.1 */
+KEYWORD break|case|debugger|in|import|protected|do|else|function|try|implements|static|instanceof|new|this|class|let|typeof|var|with|enum|private|catch|continue|default|extends|public|finally|for|if|super|yield|return|switch|throw|const|interface|void|while|delete|export|package
+
+/* punctuators */
+/* according to https://ecma-international.org/ecma-262/5.1/#sec-7.7 */
+CLOSING_BRACES ")"|"]"
+PUNCTUATOR "{"|"}"|"("|"["|">="|"=="|"!="|"==="|"!=="|"."|";"|","|"<"|">"|"<="|"<<"|">>"|">>>"|"&"|"|"|"^"|"!"|"&&"|"||"|"?"|":"|"="|"+="|"-="|"*="|"%="|"<<="|">>="|">>>="|"&="|"|="|"^="|"~"
+OPERATOR "+"|"-"|"*"|"++"|"--"|"%"
+DIV_OPERATOR "/"
+DIV_ASSIGNMENT_OPERATOR "/="
+
+/* Unicode letter ranges (categories Lu, Ll, Lt, Lm, Lo and Nl) */
+/* generated with unicode_range_generator.l */
+/* UTF-8 ranges generated with https://lists.gnu.org/archive/html/help-flex/2005-01/msg00043.html */
+/* the script above converts Unicode multi-byte ranges into UTF-8 encoding regex ranges since Flex doesn't support Unicode */
+/* for example, the Unicode range from 0x00D1 to 0x00D6 will look like this: \xC3[\x91-\x96] */
+/* just because each character in this range consists of two UTF-8 characters: \xC3 and the one of the range [\x91-\x96] */
+/* using this trick it's possible to handle unicode character ranges within the Flex regular expressions */
+/* i.e. the idea is to represent Unicode as a UTF-8 character sequence */
+LETTER_RNG_1 [A-Z]
+LETTER_RNG_2 [a-z]
+LETTER_RNG_3 \xC2\xAA
+LETTER_RNG_4 \xC2\xB5
+LETTER_RNG_5 \xC2\xBA
+LETTER_RNG_6 \xC3[\x80-\x96]
+LETTER_RNG_7 \xC3[\x98-\xB6]
+LETTER_RNG_8 \xC3[\xB8-\xBF]|\xCB[\x80-\x81]|[\xC4-\xCA][\x80-\xBF]
+LETTER_RNG_9 \xCB[\x86-\x91]
+LETTER_RNG_10 \xCB[\xA0-\xA4]
+LETTER_RNG_11 \xCB\xAC
+LETTER_RNG_12 \xCB\xAE
+LETTER_RNG_13 \xCD[\xB0-\xB4]
+LETTER_RNG_14 \xCD[\xB6-\xBD]
+LETTER_RNG_15 \xCD\xBF
+LETTER_RNG_16 \xCE\x86
+LETTER_RNG_17 \xCE[\x88-\xBF]|\xCF[\x80-\xB5]
+LETTER_RNG_18 \xCF[\xB7-\xBF]|\xD2[\x80-\x81]|[\xD0-\xD1][\x80-\xBF]
+LETTER_RNG_19 \xD2[\x8A-\xBF]|\xD5[\x80-\x99]|[\xD3-\xD4][\x80-\xBF]
+LETTER_RNG_20 \xD5[\xA0-\xBF]|\xD6[\x80-\x88]
+LETTER_RNG_21 \xD7[\x90-\xB2]
+LETTER_RNG_22 \xD8[\xA0-\xBF]|\xD9[\x80-\x8A]
+LETTER_RNG_23 \xD9[\xAE-\xAF]
+LETTER_RNG_24 \xD9[\xB1-\xBF]|\xDB[\x80-\x93]|\xDA[\x80-\xBF]
+LETTER_RNG_25 \xDB\x95
+LETTER_RNG_26 \xDB[\xA5-\xA6]
+LETTER_RNG_27 \xDB[\xAE-\xAF]
+LETTER_RNG_28 \xDB[\xBA-\xBC]
+LETTER_RNG_29 \xDB\xBF
+LETTER_RNG_30 \xDC\x90
+LETTER_RNG_31 \xDC[\x92-\xAF]
+LETTER_RNG_32 \xDD[\x8D-\xBF]|\xDE[\x80-\xA5]
+LETTER_RNG_33 \xDE\xB1
+LETTER_RNG_34 \xDF[\x8A-\xAA]
+LETTER_RNG_35 \xDF[\xB4-\xB5]
+LETTER_RNG_36 \xDF\xBA
+LETTER_RNG_37 \xE0\xA0[\x80-\x95]
+LETTER_RNG_38 \xE0\xA0\x9A
+LETTER_RNG_39 \xE0\xA0\xA4
+LETTER_RNG_40 \xE0\xA0\xA8
+LETTER_RNG_41 \xE0\xA1[\x80-\x98]
+LETTER_RNG_42 \xE0(\xA1[\xA0-\xBF]|\xA3[\x80-\x87]|\xA2[\x80-\xBF])
+LETTER_RNG_43 \xE0\xA4[\x84-\xB9]
+LETTER_RNG_44 \xE0\xA4\xBD
+LETTER_RNG_45 \xE0\xA5\x90
+LETTER_RNG_46 \xE0\xA5[\x98-\xA1]
+LETTER_RNG_47 \xE0(\xA5[\xB1-\xBF]|\xA6\x80)
+LETTER_RNG_48 \xE0\xA6[\x85-\xB9]
+LETTER_RNG_49 \xE0\xA6\xBD
+LETTER_RNG_50 \xE0\xA7\x8E
+LETTER_RNG_51 \xE0\xA7[\x9C-\xA1]
+LETTER_RNG_52 \xE0\xA7[\xB0-\xB1]
+LETTER_RNG_53 \xE0\xA7\xBC
+LETTER_RNG_54 \xE0\xA8[\x85-\xB9]
+LETTER_RNG_55 \xE0\xA9[\x99-\x9E]
+LETTER_RNG_56 \xE0\xA9[\xB2-\xB4]
+LETTER_RNG_57 \xE0\xAA[\x85-\xB9]
+LETTER_RNG_58 \xE0\xAA\xBD
+LETTER_RNG_59 \xE0\xAB[\x90-\xA1]
+LETTER_RNG_60 \xE0\xAB\xB9
+LETTER_RNG_61 \xE0\xAC[\x85-\xB9]
+LETTER_RNG_62 \xE0\xAC\xBD
+LETTER_RNG_63 \xE0\xAD[\x9C-\xA1]
+LETTER_RNG_64 \xE0\xAD\xB1
+LETTER_RNG_65 \xE0\xAE[\x83-\xB9]
+LETTER_RNG_66 \xE0\xAF\x90
+LETTER_RNG_67 \xE0\xB0[\x85-\xBD]
+LETTER_RNG_68 \xE0\xB1[\x98-\xA1]
+LETTER_RNG_69 \xE0\xB2\x80
+LETTER_RNG_70 \xE0\xB2[\x85-\xB9]
+LETTER_RNG_71 \xE0\xB2\xBD
+LETTER_RNG_72 \xE0\xB3[\x9E-\xA1]
+LETTER_RNG_73 \xE0\xB3[\xB1-\xB2]
+LETTER_RNG_74 \xE0\xB4[\x84-\xBA]
+LETTER_RNG_75 \xE0\xB4\xBD
+LETTER_RNG_76 \xE0\xB5\x8E
+LETTER_RNG_77 \xE0\xB5[\x94-\x96]
+LETTER_RNG_78 \xE0\xB5[\x9F-\xA1]
+LETTER_RNG_79 \xE0\xB5[\xBA-\xBF]
+LETTER_RNG_80 \xE0(\xB6[\x85-\xBF]|\xB7[\x80-\x86])
+LETTER_RNG_81 \xE0\xB8[\x81-\xB0]
+LETTER_RNG_82 \xE0\xB8[\xB2-\xB3]
+LETTER_RNG_83 \xE0\xB9[\x80-\x86]
+LETTER_RNG_84 \xE0\xBA[\x81-\xB0]
+LETTER_RNG_85 \xE0\xBA[\xB2-\xB3]
+LETTER_RNG_86 \xE0(\xBA[\xBD-\xBF]|\xBB[\x80-\x86])
+LETTER_RNG_87 \xE0(\xBB[\x9C-\xBF]|\xBC\x80)
+LETTER_RNG_88 \xE0\xBD[\x80-\xAC]
+LETTER_RNG_89 \xE0\xBE[\x88-\x8C]
+LETTER_RNG_90 \xE1\x80[\x80-\xAA]
+LETTER_RNG_91 \xE1\x80\xBF
+LETTER_RNG_92 \xE1\x81[\x90-\x95]
+LETTER_RNG_93 \xE1\x81[\x9A-\x9D]
+LETTER_RNG_94 \xE1\x81\xA1
+LETTER_RNG_95 \xE1\x81[\xA5-\xA6]
+LETTER_RNG_96 \xE1\x81[\xAE-\xB0]
+LETTER_RNG_97 \xE1(\x81[\xB5-\xBF]|\x82[\x80-\x81])
+LETTER_RNG_98 \xE1\x82\x8E
+LETTER_RNG_99 \xE1(\x82[\xA0-\xBF]|\x83[\x80-\xBA])
+LETTER_RNG_100 \xE1(\x83[\xBC-\xBF]|\x8D[\x80-\x9A]|[\x84-\x8C][\x80-\xBF])
+LETTER_RNG_101 \xE1\x8E[\x80-\x8F]
+LETTER_RNG_102 \xE1(\x8E[\xA0-\xBF]|\x8F[\x80-\xBD])
+LETTER_RNG_103 \xE1(\x90[\x81-\xBF]|\x99[\x80-\xAC]|[\x91-\x98][\x80-\xBF])
+LETTER_RNG_104 \xE1\x99[\xAF-\xBF]
+LETTER_RNG_105 \xE1\x9A[\x81-\x9A]
+LETTER_RNG_106 \xE1(\x9A[\xA0-\xBF]|\x9B[\x80-\xAA])
+LETTER_RNG_107 \xE1(\x9B[\xAE-\xBF]|\x9C[\x80-\x91])
+LETTER_RNG_108 \xE1\x9C[\xA0-\xB1]
+LETTER_RNG_109 \xE1\x9D[\x80-\x91]
+LETTER_RNG_110 \xE1\x9D[\xA0-\xB0]
+LETTER_RNG_111 \xE1\x9E[\x80-\xB3]
+LETTER_RNG_112 \xE1\x9F\x97
+LETTER_RNG_113 \xE1\x9F\x9C
+LETTER_RNG_114 \xE1(\xA0[\xA0-\xBF]|\xA2[\x80-\x84]|\xA1[\x80-\xBF])
+LETTER_RNG_115 \xE1\xA2[\x87-\xA8]
+LETTER_RNG_116 \xE1(\xA2[\xAA-\xBF]|\xA4[\x80-\x9E]|\xA3[\x80-\xBF])
+LETTER_RNG_117 \xE1(\xA5[\x90-\xBF]|\xA7[\x80-\x89]|\xA6[\x80-\xBF])
+LETTER_RNG_118 \xE1\xA8[\x80-\x96]
+LETTER_RNG_119 \xE1(\xA8[\xA0-\xBF]|\xA9[\x80-\x94])
+LETTER_RNG_120 \xE1\xAA\xA7
+LETTER_RNG_121 \xE1\xAC[\x85-\xB3]
+LETTER_RNG_122 \xE1\xAD[\x85-\x8B]
+LETTER_RNG_123 \xE1\xAE[\x83-\xA0]
+LETTER_RNG_124 \xE1\xAE[\xAE-\xAF]
+LETTER_RNG_125 \xE1(\xAE[\xBA-\xBF]|\xAF[\x80-\xA5])
+LETTER_RNG_126 \xE1\xB0[\x80-\xA3]
+LETTER_RNG_127 \xE1\xB1[\x8D-\x8F]
+LETTER_RNG_128 \xE1\xB1[\x9A-\xBD]
+LETTER_RNG_129 \xE1\xB2[\x80-\xBF]
+LETTER_RNG_130 \xE1\xB3[\xA9-\xAC]
+LETTER_RNG_131 \xE1\xB3[\xAE-\xB3]
+LETTER_RNG_132 \xE1\xB3[\xB5-\xB6]
+LETTER_RNG_133 \xE1(\xB3[\xBA-\xBF]|[\xB4-\xB6][\x80-\xBF])
+LETTER_RNG_134 \xE1(\xBE[\x80-\xBC]|[\xB8-\xBD][\x80-\xBF])
+LETTER_RNG_135 \xE1\xBE\xBE
+LETTER_RNG_136 \xE1\xBF[\x82-\x8C]
+LETTER_RNG_137 \xE1\xBF[\x90-\x9B]
+LETTER_RNG_138 \xE1\xBF[\xA0-\xAC]
+LETTER_RNG_139 \xE1\xBF[\xB2-\xBC]
+LETTER_RNG_140 \xE2\x81\xB1
+LETTER_RNG_141 \xE2\x81\xBF
+LETTER_RNG_142 \xE2\x82[\x90-\x9C]
+LETTER_RNG_143 \xE2\x84\x82
+LETTER_RNG_144 \xE2\x84\x87
+LETTER_RNG_145 \xE2\x84[\x8A-\x93]
+LETTER_RNG_146 \xE2\x84\x95
+LETTER_RNG_147 \xE2\x84[\x99-\x9D]
+LETTER_RNG_148 \xE2\x84\xA4
+LETTER_RNG_149 \xE2\x84\xA6
+LETTER_RNG_150 \xE2\x84\xA8
+LETTER_RNG_151 \xE2\x84[\xAA-\xAD]
+LETTER_RNG_152 \xE2\x84[\xAF-\xB9]
+LETTER_RNG_153 \xE2\x84[\xBC-\xBF]
+LETTER_RNG_154 \xE2\x85[\x85-\x89]
+LETTER_RNG_155 \xE2\x85\x8E
+LETTER_RNG_156 \xE2(\x85[\xA0-\xBF]|\x86[\x80-\x88])
+LETTER_RNG_157 \xE2(\xB3[\x80-\xA4]|[\xB0-\xB2][\x80-\xBF])
+LETTER_RNG_158 \xE2\xB3[\xAB-\xAE]
+LETTER_RNG_159 \xE2\xB3[\xB2-\xB3]
+LETTER_RNG_160 \xE2(\xB5[\x80-\xAF]|\xB4[\x80-\xBF])
+LETTER_RNG_161 \xE2(\xB7[\x80-\x9E]|\xB6[\x80-\xBF])
+LETTER_RNG_162 \xE2\xB8\xAF
+LETTER_RNG_163 \xE3\x80[\x85-\x87]
+LETTER_RNG_164 \xE3\x80[\xA1-\xA9]
+LETTER_RNG_165 \xE3\x80[\xB1-\xB5]
+LETTER_RNG_166 \xE3\x80[\xB8-\xBC]
+LETTER_RNG_167 \xE3(\x81[\x81-\xBF]|\x82[\x80-\x96])
+LETTER_RNG_168 \xE3\x82[\x9D-\x9F]
+LETTER_RNG_169 \xE3(\x82[\xA1-\xBF]|\x83[\x80-\xBA])
+LETTER_RNG_170 \xE3(\x83[\xBC-\xBF]|\x86[\x80-\x8E]|[\x84-\x85][\x80-\xBF])
+LETTER_RNG_171 \xE3\x86[\xA0-\xBF]
+LETTER_RNG_172 \xE3\x87[\xB0-\xBF]
+LETTER_RNG_173 (\xE3[\x90-\xBF]|\xE4[\x80-\xB6])[\x80-\xBF]
+LETTER_RNG_174 \xEA\x92[\x80-\x8C]|(\xE4[\xB8-\xBF]|\xEA[\x80-\x91]|[\xE5-\xE9][\x80-\xBF])[\x80-\xBF]
+LETTER_RNG_175 \xEA\x93[\x90-\xBD]
+LETTER_RNG_176 \xEA(\x98[\x80-\x8C]|[\x94-\x97][\x80-\xBF])
+LETTER_RNG_177 \xEA\x98[\x90-\x9F]
+LETTER_RNG_178 \xEA(\x98[\xAA-\xBF]|\x99[\x80-\xAE])
+LETTER_RNG_179 \xEA(\x99\xBF|\x9A[\x80-\x9D])
+LETTER_RNG_180 \xEA(\x9A[\xA0-\xBF]|\x9B[\x80-\xAF])
+LETTER_RNG_181 \xEA\x9C[\x97-\x9F]
+LETTER_RNG_182 \xEA(\x9C[\xA2-\xBF]|\x9E[\x80-\x88]|\x9D[\x80-\xBF])
+LETTER_RNG_183 \xEA(\x9E[\x8B-\xBF]|\xA0[\x80-\x81]|\x9F[\x80-\xBF])
+LETTER_RNG_184 \xEA\xA0[\x83-\x85]
+LETTER_RNG_185 \xEA\xA0[\x87-\x8A]
+LETTER_RNG_186 \xEA\xA0[\x8C-\xA2]
+LETTER_RNG_187 \xEA\xA1[\x80-\xB3]
+LETTER_RNG_188 \xEA\xA2[\x82-\xB3]
+LETTER_RNG_189 \xEA\xA3[\xB2-\xB7]
+LETTER_RNG_190 \xEA\xA3\xBB
+LETTER_RNG_191 \xEA\xA3[\xBD-\xBE]
+LETTER_RNG_192 \xEA\xA4[\x8A-\xA5]
+LETTER_RNG_193 \xEA(\xA4[\xB0-\xBF]|\xA5[\x80-\x86])
+LETTER_RNG_194 \xEA\xA5[\xA0-\xBC]
+LETTER_RNG_195 \xEA\xA6[\x84-\xB2]
+LETTER_RNG_196 \xEA\xA7\x8F
+LETTER_RNG_197 \xEA\xA7[\xA0-\xA4]
+LETTER_RNG_198 \xEA\xA7[\xA6-\xAF]
+LETTER_RNG_199 \xEA(\xA7[\xBA-\xBF]|\xA8[\x80-\xA8])
+LETTER_RNG_200 \xEA\xA9[\x80-\x82]
+LETTER_RNG_201 \xEA\xA9[\x84-\x8B]
+LETTER_RNG_202 \xEA\xA9[\xA0-\xB6]
+LETTER_RNG_203 \xEA\xA9\xBA
+LETTER_RNG_204 \xEA(\xA9[\xBE-\xBF]|\xAA[\x80-\xAF])
+LETTER_RNG_205 \xEA\xAA\xB1
+LETTER_RNG_206 \xEA\xAA[\xB5-\xB6]
+LETTER_RNG_207 \xEA\xAA[\xB9-\xBD]
+LETTER_RNG_208 \xEA\xAB\x80
+LETTER_RNG_209 \xEA\xAB[\x82-\x9D]
+LETTER_RNG_210 \xEA\xAB[\xA0-\xAA]
+LETTER_RNG_211 \xEA\xAB[\xB2-\xB4]
+LETTER_RNG_212 \xEA(\xAC[\x81-\xBF]|\xAD[\x80-\x9A])
+LETTER_RNG_213 \xEA\xAD[\x9C-\xA9]
+LETTER_RNG_214 \xEA(\xAD[\xB0-\xBF]|\xAF[\x80-\xA2]|\xAE[\x80-\xBF])
+LETTER_RNG_215 \xED\x9F[\x80-\xBB]|(\xEA[\xB0-\xBF]|\xED[\x80-\x9E]|[\xEB-\xEC][\x80-\xBF])[\x80-\xBF]
+LETTER_RNG_216 \xEF(\xAC[\x80-\x9D]|[\xA4-\xAB][\x80-\xBF])
+LETTER_RNG_217 \xEF\xAC[\x9F-\xA8]
+LETTER_RNG_218 \xEF(\xAC[\xAA-\xBF]|\xAE[\x80-\xB1]|\xAD[\x80-\xBF])
+LETTER_RNG_219 \xEF(\xAF[\x93-\xBF]|\xB4[\x80-\xBD]|[\xB0-\xB3][\x80-\xBF])
+LETTER_RNG_220 \xEF(\xB5[\x90-\xBF]|\xB7[\x80-\xBB]|\xB6[\x80-\xBF])
+LETTER_RNG_221 \xEF(\xB9[\xB0-\xBF]|\xBB[\x80-\xBC]|\xBA[\x80-\xBF])
+LETTER_RNG_222 \xEF\xBC[\xA1-\xBA]
+LETTER_RNG_223 \xEF\xBD[\x81-\x9A]
+LETTER_RNG_224 \xEF(\xBD[\xA6-\xBF]|\xBF[\x80-\x9C]|\xBE[\x80-\xBF])
+LETTER_RNG_225 \xF0\x90(\x83[\x80-\xBA]|[\x80-\x82][\x80-\xBF])
+LETTER_RNG_226 \xF0\x90\x85[\x80-\xB4]
+LETTER_RNG_227 \xF0\x90(\x8B[\x80-\x90]|\x8A[\x80-\xBF])
+LETTER_RNG_228 \xF0\x90\x8C[\x80-\x9F]
+LETTER_RNG_229 \xF0\x90(\x8C[\xAD-\xBF]|\x8D[\x80-\xB5])
+LETTER_RNG_230 \xF0\x90\x8E[\x80-\x9D]
+LETTER_RNG_231 \xF0\x90(\x8E[\xA0-\xBF]|\x8F[\x80-\x8F])
+LETTER_RNG_232 \xF0\x90(\x8F[\x91-\xBF]|\x92[\x80-\x9D]|[\x90-\x91][\x80-\xBF])
+LETTER_RNG_233 \xF0\x90(\x92[\xB0-\xBF]|\x95[\x80-\xA3]|[\x93-\x94][\x80-\xBF])
+LETTER_RNG_234 \xF0\x90(\xA1[\x80-\x95]|[\x98-\xA0][\x80-\xBF])
+LETTER_RNG_235 \xF0\x90\xA1[\xA0-\xB6]
+LETTER_RNG_236 \xF0\x90\xA2[\x80-\x9E]
+LETTER_RNG_237 \xF0\x90\xA3[\xA0-\xB5]
+LETTER_RNG_238 \xF0\x90\xA4[\x80-\x95]
+LETTER_RNG_239 \xF0\x90\xA4[\xA0-\xB9]
+LETTER_RNG_240 \xF0\x90\xA6[\x80-\xB7]
+LETTER_RNG_241 \xF0\x90\xA6[\xBE-\xBF]
+LETTER_RNG_242 \xF0\x90\xA8\x80
+LETTER_RNG_243 \xF0\x90\xA8[\x90-\xB5]
+LETTER_RNG_244 \xF0\x90\xA9[\xA0-\xBC]
+LETTER_RNG_245 \xF0\x90\xAA[\x80-\x9C]
+LETTER_RNG_246 \xF0\x90\xAB[\x80-\x87]
+LETTER_RNG_247 \xF0\x90\xAB[\x89-\xA4]
+LETTER_RNG_248 \xF0\x90\xAC[\x80-\xB5]
+LETTER_RNG_249 \xF0\x90\xAD[\x80-\x95]
+LETTER_RNG_250 \xF0\x90\xAD[\xA0-\xB2]
+LETTER_RNG_251 \xF0\x90\xAE[\x80-\x91]
+LETTER_RNG_252 \xF0\x90(\xB3[\x80-\xB2]|[\xB0-\xB2][\x80-\xBF])
+LETTER_RNG_253 \xF0\x90\xB4[\x80-\xA3]
+LETTER_RNG_254 \xF0\x90\xBA[\x80-\xA9]
+LETTER_RNG_255 \xF0\x90(\xBA[\xB0-\xBF]|\xBC[\x80-\x9C]|\xBB[\x80-\xBF])
+LETTER_RNG_256 \xF0\x90(\xBC[\xA7-\xBF]|\xBD[\x80-\x85])
+LETTER_RNG_257 \xF0\x90(\xBE[\xB0-\xBF]|\xBF[\x80-\x84])
+LETTER_RNG_258 \xF0\x90\xBF[\xA0-\xB6]
+LETTER_RNG_259 \xF0\x91\x80[\x83-\xB7]
+LETTER_RNG_260 \xF0\x91\x82[\x83-\xAF]
+LETTER_RNG_261 \xF0\x91\x83[\x90-\xA8]
+LETTER_RNG_262 \xF0\x91\x84[\x83-\xA6]
+LETTER_RNG_263 \xF0\x91\x85\x84
+LETTER_RNG_264 \xF0\x91\x85[\x87-\xB2]
+LETTER_RNG_265 \xF0\x91\x85\xB6
+LETTER_RNG_266 \xF0\x91\x86[\x83-\xB2]
+LETTER_RNG_267 \xF0\x91\x87[\x81-\x84]
+LETTER_RNG_268 \xF0\x91\x87\x9A
+LETTER_RNG_269 \xF0\x91\x87\x9C
+LETTER_RNG_270 \xF0\x91\x88[\x80-\xAB]
+LETTER_RNG_271 \xF0\x91\x8A[\x80-\xA8]
+LETTER_RNG_272 \xF0\x91(\x8A[\xB0-\xBF]|\x8B[\x80-\x9E])
+LETTER_RNG_273 \xF0\x91\x8C[\x85-\xB9]
+LETTER_RNG_274 \xF0\x91\x8C\xBD
+LETTER_RNG_275 \xF0\x91\x8D\x90
+LETTER_RNG_276 \xF0\x91\x8D[\x9D-\xA1]
+LETTER_RNG_277 \xF0\x91\x90[\x80-\xB4]
+LETTER_RNG_278 \xF0\x91\x91[\x87-\x8A]
+LETTER_RNG_279 \xF0\x91(\x91[\x9F-\xBF]|\x92[\x80-\xAF])
+LETTER_RNG_280 \xF0\x91\x93[\x84-\x85]
+LETTER_RNG_281 \xF0\x91\x93\x87
+LETTER_RNG_282 \xF0\x91\x96[\x80-\xAE]
+LETTER_RNG_283 \xF0\x91\x97[\x98-\x9B]
+LETTER_RNG_284 \xF0\x91\x98[\x80-\xAF]
+LETTER_RNG_285 \xF0\x91\x99\x84
+LETTER_RNG_286 \xF0\x91\x9A[\x80-\xAA]
+LETTER_RNG_287 \xF0\x91\x9A\xB8
+LETTER_RNG_288 \xF0\x91\x9C[\x80-\x9A]
+LETTER_RNG_289 \xF0\x91\xA0[\x80-\xAB]
+LETTER_RNG_290 \xF0\x91(\xA2[\xA0-\xBF]|\xA3[\x80-\x9F])
+LETTER_RNG_291 \xF0\x91(\xA3\xBF|\xA4[\x80-\xAF])
+LETTER_RNG_292 \xF0\x91\xA4\xBF
+LETTER_RNG_293 \xF0\x91\xA5\x81
+LETTER_RNG_294 \xF0\x91(\xA6[\xA0-\xBF]|\xA7[\x80-\x90])
+LETTER_RNG_295 \xF0\x91\xA7\xA1
+LETTER_RNG_296 \xF0\x91\xA7\xA3
+LETTER_RNG_297 \xF0\x91\xA8\x80
+LETTER_RNG_298 \xF0\x91\xA8[\x8B-\xB2]
+LETTER_RNG_299 \xF0\x91\xA8\xBA
+LETTER_RNG_300 \xF0\x91\xA9\x90
+LETTER_RNG_301 \xF0\x91(\xA9[\x9C-\xBF]|\xAA[\x80-\x89])
+LETTER_RNG_302 \xF0\x91\xAA\x9D
+LETTER_RNG_303 \xF0\x91(\xB0[\x80-\xAE]|[\xAB-\xAF][\x80-\xBF])
+LETTER_RNG_304 \xF0\x91\xB1\x80
+LETTER_RNG_305 \xF0\x91(\xB1[\xB2-\xBF]|\xB2[\x80-\x8F])
+LETTER_RNG_306 \xF0\x91\xB4[\x80-\xB0]
+LETTER_RNG_307 \xF0\x91\xB5\x86
+LETTER_RNG_308 \xF0\x91(\xB5[\xA0-\xBF]|\xB6[\x80-\x89])
+LETTER_RNG_309 \xF0\x91\xB6\x98
+LETTER_RNG_310 \xF0\x91\xBB[\xA0-\xB2]
+LETTER_RNG_311 \xF0\x91\xBE\xB0
+LETTER_RNG_312 \xF0\x92(\x91[\x80-\xAE]|[\x80-\x90][\x80-\xBF])
+LETTER_RNG_313 \xF0(\x93\x90[\x80-\xAE]|(\x92[\x92-\xBF]|\x93[\x80-\x8F])[\x80-\xBF])
+LETTER_RNG_314 \xF0(\x96\xA9[\x80-\x9E]|(\x94[\x90-\xBF]|\x96[\x80-\xA8]|\x95[\x80-\xBF])[\x80-\xBF])
+LETTER_RNG_315 \xF0\x96\xAB[\x90-\xAD]
+LETTER_RNG_316 \xF0\x96\xAC[\x80-\xAF]
+LETTER_RNG_317 \xF0\x96\xAD[\x80-\x83]
+LETTER_RNG_318 \xF0\x96(\xAD[\xA3-\xBF]|[\xAE-\xB9][\x80-\xBF])
+LETTER_RNG_319 \xF0\x96(\xBD[\x80-\x8A]|\xBC[\x80-\xBF])
+LETTER_RNG_320 \xF0\x96\xBD\x90
+LETTER_RNG_321 \xF0\x96(\xBE[\x93-\xBF]|\xBF[\x80-\xA1])
+LETTER_RNG_322 \xF0\x96\xBF\xA3
+LETTER_RNG_323 \xF0(\x9B\xB2[\x80-\x99]|(\x9B[\x80-\xB1]|[\x97-\x9A][\x80-\xBF])[\x80-\xBF])
+LETTER_RNG_324 \xF0\x9D(\x9B\x80|[\x90-\x9A][\x80-\xBF])
+LETTER_RNG_325 \xF0\x9D\x9B[\x82-\x9A]
+LETTER_RNG_326 \xF0\x9D\x9B[\x9C-\xBA]
+LETTER_RNG_327 \xF0\x9D(\x9B[\xBC-\xBF]|\x9C[\x80-\x94])
+LETTER_RNG_328 \xF0\x9D\x9C[\x96-\xB4]
+LETTER_RNG_329 \xF0\x9D(\x9C[\xB6-\xBF]|\x9D[\x80-\x8E])
+LETTER_RNG_330 \xF0\x9D\x9D[\x90-\xAE]
+LETTER_RNG_331 \xF0\x9D(\x9D[\xB0-\xBF]|\x9E[\x80-\x88])
+LETTER_RNG_332 \xF0\x9D\x9E[\x8A-\xA8]
+LETTER_RNG_333 \xF0\x9D(\x9E[\xAA-\xBF]|\x9F[\x80-\x82])
+LETTER_RNG_334 \xF0\x9D\x9F[\x84-\x8B]
+LETTER_RNG_335 \xF0\x9E\x84[\x80-\xAC]
+LETTER_RNG_336 \xF0\x9E\x84[\xB7-\xBD]
+LETTER_RNG_337 \xF0\x9E\x85\x8E
+LETTER_RNG_338 \xF0\x9E\x8B[\x80-\xAB]
+LETTER_RNG_339 \xF0\x9E(\xA3[\x80-\x84]|[\xA0-\xA2][\x80-\xBF])
+LETTER_RNG_340 \xF0\x9E(\xA5[\x80-\x83]|\xA4[\x80-\xBF])
+LETTER_RNG_341 \xF0\x9E\xA5\x8B
+LETTER_RNG_342 \xF0\x9E(\xBA[\x80-\xBB]|[\xB8-\xB9][\x80-\xBF])
+LETTER_RNG_343 \xF0(\xB1\x8D[\x80-\x8A]|(\xB1[\x80-\x8C]|[\xA0-\xB0][\x80-\xBF])[\x80-\xBF])
+
+LETTER_GROUP_1 {LETTER_RNG_1}|{LETTER_RNG_2}|{LETTER_RNG_3}|{LETTER_RNG_4}|{LETTER_RNG_5}|{LETTER_RNG_6}|{LETTER_RNG_7}|{LETTER_RNG_8}|{LETTER_RNG_9}|{LETTER_RNG_10}
+LETTER_GROUP_2 {LETTER_GROUP_1}|{LETTER_RNG_11}|{LETTER_RNG_12}|{LETTER_RNG_13}|{LETTER_RNG_14}|{LETTER_RNG_15}|{LETTER_RNG_16}|{LETTER_RNG_17}|{LETTER_RNG_18}|{LETTER_RNG_19}
+LETTER_GROUP_3 {LETTER_GROUP_2}|{LETTER_RNG_20}|{LETTER_RNG_21}|{LETTER_RNG_22}|{LETTER_RNG_23}|{LETTER_RNG_24}|{LETTER_RNG_25}|{LETTER_RNG_26}|{LETTER_RNG_27}|{LETTER_RNG_28}
+LETTER_GROUP_4 {LETTER_GROUP_3}|{LETTER_RNG_29}|{LETTER_RNG_30}|{LETTER_RNG_31}|{LETTER_RNG_32}|{LETTER_RNG_33}|{LETTER_RNG_34}|{LETTER_RNG_35}|{LETTER_RNG_36}|{LETTER_RNG_37}
+LETTER_GROUP_5 {LETTER_GROUP_4}|{LETTER_RNG_38}|{LETTER_RNG_39}|{LETTER_RNG_40}|{LETTER_RNG_41}|{LETTER_RNG_42}|{LETTER_RNG_43}|{LETTER_RNG_44}|{LETTER_RNG_45}|{LETTER_RNG_46}
+LETTER_GROUP_6 {LETTER_GROUP_5}|{LETTER_RNG_47}|{LETTER_RNG_48}|{LETTER_RNG_49}|{LETTER_RNG_50}|{LETTER_RNG_51}|{LETTER_RNG_52}|{LETTER_RNG_53}|{LETTER_RNG_54}|{LETTER_RNG_55}
+LETTER_GROUP_7 {LETTER_GROUP_6}|{LETTER_RNG_56}|{LETTER_RNG_57}|{LETTER_RNG_58}|{LETTER_RNG_59}|{LETTER_RNG_60}|{LETTER_RNG_61}|{LETTER_RNG_62}|{LETTER_RNG_63}|{LETTER_RNG_64}
+LETTER_GROUP_8 {LETTER_GROUP_7}|{LETTER_RNG_65}|{LETTER_RNG_66}|{LETTER_RNG_67}|{LETTER_RNG_68}|{LETTER_RNG_69}|{LETTER_RNG_70}|{LETTER_RNG_71}|{LETTER_RNG_72}|{LETTER_RNG_73}
+LETTER_GROUP_9 {LETTER_GROUP_8}|{LETTER_RNG_74}|{LETTER_RNG_75}|{LETTER_RNG_76}|{LETTER_RNG_77}|{LETTER_RNG_78}|{LETTER_RNG_79}|{LETTER_RNG_80}|{LETTER_RNG_81}|{LETTER_RNG_82}
+LETTER_GROUP_10 {LETTER_GROUP_9}|{LETTER_RNG_83}|{LETTER_RNG_84}|{LETTER_RNG_85}|{LETTER_RNG_86}|{LETTER_RNG_87}|{LETTER_RNG_88}|{LETTER_RNG_89}|{LETTER_RNG_90}|{LETTER_RNG_91}
+LETTER_GROUP_11 {LETTER_GROUP_10}|{LETTER_RNG_92}|{LETTER_RNG_93}|{LETTER_RNG_94}|{LETTER_RNG_95}|{LETTER_RNG_96}|{LETTER_RNG_97}|{LETTER_RNG_98}|{LETTER_RNG_99}|{LETTER_RNG_100}
+LETTER_GROUP_12 {LETTER_GROUP_11}|{LETTER_RNG_101}|{LETTER_RNG_102}|{LETTER_RNG_103}|{LETTER_RNG_104}|{LETTER_RNG_105}|{LETTER_RNG_106}|{LETTER_RNG_107}|{LETTER_RNG_108}|{LETTER_RNG_109}
+LETTER_GROUP_13 {LETTER_GROUP_12}|{LETTER_RNG_110}|{LETTER_RNG_111}|{LETTER_RNG_112}|{LETTER_RNG_113}|{LETTER_RNG_114}|{LETTER_RNG_115}|{LETTER_RNG_116}|{LETTER_RNG_117}|{LETTER_RNG_118}
+LETTER_GROUP_14 {LETTER_GROUP_13}|{LETTER_RNG_119}|{LETTER_RNG_120}|{LETTER_RNG_121}|{LETTER_RNG_122}|{LETTER_RNG_123}|{LETTER_RNG_124}|{LETTER_RNG_125}|{LETTER_RNG_126}|{LETTER_RNG_127}
+LETTER_GROUP_15 {LETTER_GROUP_14}|{LETTER_RNG_128}|{LETTER_RNG_129}|{LETTER_RNG_130}|{LETTER_RNG_131}|{LETTER_RNG_132}|{LETTER_RNG_133}|{LETTER_RNG_134}|{LETTER_RNG_135}|{LETTER_RNG_136}
+LETTER_GROUP_16 {LETTER_GROUP_15}|{LETTER_RNG_137}|{LETTER_RNG_138}|{LETTER_RNG_139}|{LETTER_RNG_140}|{LETTER_RNG_141}|{LETTER_RNG_142}|{LETTER_RNG_143}|{LETTER_RNG_144}|{LETTER_RNG_145}
+LETTER_GROUP_17 {LETTER_GROUP_15}|{LETTER_RNG_146}|{LETTER_RNG_147}|{LETTER_RNG_148}|{LETTER_RNG_149}|{LETTER_RNG_150}|{LETTER_RNG_151}|{LETTER_RNG_152}|{LETTER_RNG_153}|{LETTER_RNG_154}
+LETTER_GROUP_18 {LETTER_GROUP_17}|{LETTER_RNG_155}|{LETTER_RNG_156}|{LETTER_RNG_157}|{LETTER_RNG_158}|{LETTER_RNG_159}|{LETTER_RNG_160}|{LETTER_RNG_161}|{LETTER_RNG_162}|{LETTER_RNG_163}
+LETTER_GROUP_19 {LETTER_GROUP_18}|{LETTER_RNG_164}|{LETTER_RNG_165}|{LETTER_RNG_166}|{LETTER_RNG_167}|{LETTER_RNG_168}|{LETTER_RNG_169}|{LETTER_RNG_170}|{LETTER_RNG_171}|{LETTER_RNG_172}
+LETTER_GROUP_20 {LETTER_GROUP_19}|{LETTER_RNG_173}|{LETTER_RNG_174}|{LETTER_RNG_175}|{LETTER_RNG_176}|{LETTER_RNG_177}|{LETTER_RNG_178}|{LETTER_RNG_179}|{LETTER_RNG_180}|{LETTER_RNG_181}
+LETTER_GROUP_21 {LETTER_GROUP_20}|{LETTER_RNG_182}|{LETTER_RNG_183}|{LETTER_RNG_184}|{LETTER_RNG_185}|{LETTER_RNG_186}|{LETTER_RNG_187}|{LETTER_RNG_188}|{LETTER_RNG_189}|{LETTER_RNG_190}
+LETTER_GROUP_22 {LETTER_GROUP_21}|{LETTER_RNG_191}|{LETTER_RNG_192}|{LETTER_RNG_193}|{LETTER_RNG_194}|{LETTER_RNG_195}|{LETTER_RNG_196}|{LETTER_RNG_197}|{LETTER_RNG_198}|{LETTER_RNG_199}
+LETTER_GROUP_23 {LETTER_GROUP_22}|{LETTER_RNG_200}|{LETTER_RNG_201}|{LETTER_RNG_202}|{LETTER_RNG_203}|{LETTER_RNG_204}|{LETTER_RNG_205}|{LETTER_RNG_206}|{LETTER_RNG_207}|{LETTER_RNG_208}
+LETTER_GROUP_24 {LETTER_GROUP_23}|{LETTER_RNG_209}|{LETTER_RNG_210}|{LETTER_RNG_211}|{LETTER_RNG_212}|{LETTER_RNG_213}|{LETTER_RNG_214}|{LETTER_RNG_215}|{LETTER_RNG_216}|{LETTER_RNG_217}
+LETTER_GROUP_25 {LETTER_GROUP_24}|{LETTER_RNG_218}|{LETTER_RNG_219}|{LETTER_RNG_220}|{LETTER_RNG_221}|{LETTER_RNG_222}|{LETTER_RNG_223}|{LETTER_RNG_224}|{LETTER_RNG_225}|{LETTER_RNG_226}
+LETTER_GROUP_26 {LETTER_GROUP_25}|{LETTER_RNG_227}|{LETTER_RNG_228}|{LETTER_RNG_229}|{LETTER_RNG_230}|{LETTER_RNG_231}|{LETTER_RNG_232}|{LETTER_RNG_233}|{LETTER_RNG_234}|{LETTER_RNG_235}
+LETTER_GROUP_27 {LETTER_GROUP_26}|{LETTER_RNG_236}|{LETTER_RNG_237}|{LETTER_RNG_238}|{LETTER_RNG_239}|{LETTER_RNG_240}|{LETTER_RNG_241}|{LETTER_RNG_242}|{LETTER_RNG_243}|{LETTER_RNG_244}
+LETTER_GROUP_28 {LETTER_GROUP_27}|{LETTER_RNG_245}|{LETTER_RNG_246}|{LETTER_RNG_247}|{LETTER_RNG_248}|{LETTER_RNG_249}|{LETTER_RNG_250}|{LETTER_RNG_251}|{LETTER_RNG_252}|{LETTER_RNG_253}
+LETTER_GROUP_29 {LETTER_GROUP_28}|{LETTER_RNG_254}|{LETTER_RNG_255}|{LETTER_RNG_256}|{LETTER_RNG_257}|{LETTER_RNG_258}|{LETTER_RNG_259}|{LETTER_RNG_260}|{LETTER_RNG_261}|{LETTER_RNG_262}
+LETTER_GROUP_30 {LETTER_GROUP_29}|{LETTER_RNG_263}|{LETTER_RNG_264}|{LETTER_RNG_265}|{LETTER_RNG_266}|{LETTER_RNG_267}|{LETTER_RNG_268}|{LETTER_RNG_269}|{LETTER_RNG_270}|{LETTER_RNG_271}
+LETTER_GROUP_31 {LETTER_GROUP_30}|{LETTER_RNG_272}|{LETTER_RNG_273}|{LETTER_RNG_274}|{LETTER_RNG_275}|{LETTER_RNG_276}|{LETTER_RNG_277}|{LETTER_RNG_278}|{LETTER_RNG_279}|{LETTER_RNG_280}
+LETTER_GROUP_32 {LETTER_GROUP_31}|{LETTER_RNG_281}|{LETTER_RNG_282}|{LETTER_RNG_283}|{LETTER_RNG_284}|{LETTER_RNG_285}|{LETTER_RNG_286}|{LETTER_RNG_287}|{LETTER_RNG_288}|{LETTER_RNG_289}
+LETTER_GROUP_33 {LETTER_GROUP_32}|{LETTER_RNG_290}|{LETTER_RNG_291}|{LETTER_RNG_292}|{LETTER_RNG_293}|{LETTER_RNG_294}|{LETTER_RNG_295}|{LETTER_RNG_296}|{LETTER_RNG_297}|{LETTER_RNG_298}
+LETTER_GROUP_34 {LETTER_GROUP_33}|{LETTER_RNG_299}|{LETTER_RNG_300}|{LETTER_RNG_301}|{LETTER_RNG_302}|{LETTER_RNG_303}|{LETTER_RNG_304}|{LETTER_RNG_305}|{LETTER_RNG_306}|{LETTER_RNG_307}
+LETTER_GROUP_35 {LETTER_GROUP_34}|{LETTER_RNG_308}|{LETTER_RNG_309}|{LETTER_RNG_310}|{LETTER_RNG_311}|{LETTER_RNG_312}|{LETTER_RNG_313}|{LETTER_RNG_314}|{LETTER_RNG_315}|{LETTER_RNG_316}
+LETTER_GROUP_36 {LETTER_GROUP_35}|{LETTER_RNG_317}|{LETTER_RNG_318}|{LETTER_RNG_319}|{LETTER_RNG_320}|{LETTER_RNG_321}|{LETTER_RNG_322}|{LETTER_RNG_323}|{LETTER_RNG_324}|{LETTER_RNG_325}
+LETTER_GROUP_37 {LETTER_GROUP_36}|{LETTER_RNG_326}|{LETTER_RNG_327}|{LETTER_RNG_328}|{LETTER_RNG_329}|{LETTER_RNG_330}|{LETTER_RNG_331}|{LETTER_RNG_332}|{LETTER_RNG_333}|{LETTER_RNG_334}
+LETTER_GROUP_38 {LETTER_GROUP_37}|{LETTER_RNG_335}|{LETTER_RNG_336}|{LETTER_RNG_337}|{LETTER_RNG_338}|{LETTER_RNG_339}|{LETTER_RNG_340}|{LETTER_RNG_341}|{LETTER_RNG_342}|{LETTER_RNG_343}
+
+LETTER_G_GROUP_1 {LETTER_GROUP_1}|{LETTER_GROUP_2}|{LETTER_GROUP_3}|{LETTER_GROUP_4}|{LETTER_GROUP_5}|{LETTER_GROUP_6}|{LETTER_GROUP_7}|{LETTER_GROUP_8}|{LETTER_GROUP_9}|{LETTER_GROUP_10}
+LETTER_G_GROUP_2 {LETTER_G_GROUP_1}|{LETTER_GROUP_11}|{LETTER_GROUP_12}|{LETTER_GROUP_13}|{LETTER_GROUP_14}|{LETTER_GROUP_15}|{LETTER_GROUP_16}|{LETTER_GROUP_17}|{LETTER_GROUP_18}|{LETTER_GROUP_19}
+LETTER_G_GROUP_3 {LETTER_G_GROUP_2}|{LETTER_GROUP_20}|{LETTER_GROUP_21}|{LETTER_GROUP_22}|{LETTER_GROUP_23}|{LETTER_GROUP_24}|{LETTER_GROUP_25}|{LETTER_GROUP_26}|{LETTER_GROUP_27}|{LETTER_GROUP_28}
+LETTER_G_GROUP_4 {LETTER_G_GROUP_3}|{LETTER_GROUP_29}|{LETTER_GROUP_30}|{LETTER_GROUP_31}|{LETTER_GROUP_32}|{LETTER_GROUP_33}|{LETTER_GROUP_34}|{LETTER_GROUP_35}|{LETTER_GROUP_36}|{LETTER_GROUP_37}
+LETTER_G_GROUP_5 {LETTER_G_GROUP_4}|{LETTER_GROUP_38}
+
+UNICODE_LETTER {LETTER_G_GROUP_1}|{LETTER_G_GROUP_2}|{LETTER_G_GROUP_3}|{LETTER_G_GROUP_4}|{LETTER_G_GROUP_5}
+
+/* Unicode digit ranges (category Nd) */
+/* generated with unicode_range_generator.l */
+/* UTF-8 ranges generated with https://lists.gnu.org/archive/html/help-flex/2005-01/msg00043.html */
+DIGIT_RNG_1 [0-9]
+DIGIT_RNG_2 \xD9[\xA0-\xA9]
+DIGIT_RNG_3 \xDB[\xB0-\xB9]
+DIGIT_RNG_4 \xDF[\x80-\x89]
+DIGIT_RNG_5 \xE0\xA5[\xA6-\xAF]
+DIGIT_RNG_6 \xE0\xA7[\xA6-\xAF]
+DIGIT_RNG_7 \xE0\xA9[\xA6-\xAF]
+DIGIT_RNG_8 \xE0\xAB[\xA6-\xAF]
+DIGIT_RNG_9 \xE0\xAD[\xA6-\xAF]
+DIGIT_RNG_10 \xE0\xAF[\xA6-\xAF]
+DIGIT_RNG_11 \xE0\xB1[\xA6-\xAF]
+DIGIT_RNG_12 \xE0\xB3[\xA6-\xAF]
+DIGIT_RNG_13 \xE0\xB5[\xA6-\xAF]
+DIGIT_RNG_14 \xE0\xB7[\xA6-\xAF]
+DIGIT_RNG_15 \xE0\xB9[\x90-\x99]
+DIGIT_RNG_16 \xE0\xBB[\x90-\x99]
+DIGIT_RNG_17 \xE0\xBC[\xA0-\xA9]
+DIGIT_RNG_18 \xE1\x81[\x80-\x89]
+DIGIT_RNG_19 \xE1\x82[\x90-\x99]
+DIGIT_RNG_20 \xE1\x9F[\xA0-\xA9]
+DIGIT_RNG_21 \xE1\xA0[\x90-\x99]
+DIGIT_RNG_22 \xE1\xA5[\x86-\x8F]
+DIGIT_RNG_23 \xE1\xA7[\x90-\x99]
+DIGIT_RNG_24 \xE1\xAA[\x80-\x99]
+DIGIT_RNG_25 \xE1\xAD[\x90-\x99]
+DIGIT_RNG_26 \xE1\xAE[\xB0-\xB9]
+DIGIT_RNG_27 \xE1\xB1[\x80-\x89]
+DIGIT_RNG_28 \xE1\xB1[\x90-\x99]
+DIGIT_RNG_29 \xEA\x98[\xA0-\xA9]
+DIGIT_RNG_30 \xEA\xA3[\x90-\x99]
+DIGIT_RNG_31 \xEA\xA4[\x80-\x89]
+DIGIT_RNG_32 \xEA\xA7[\x90-\x99]
+DIGIT_RNG_33 \xEA\xA7[\xB0-\xB9]
+DIGIT_RNG_34 \xEA\xA9[\x90-\x99]
+DIGIT_RNG_35 \xEA\xAF[\xB0-\xB9]
+DIGIT_RNG_36 \xEF\xBC[\x90-\x99]
+DIGIT_RNG_37 \xF0\x90\x92[\xA0-\xA9]
+DIGIT_RNG_38 \xF0\x90\xB4[\xB0-\xB9]
+DIGIT_RNG_39 \xF0\x91\x81[\xA6-\xAF]
+DIGIT_RNG_40 \xF0\x91\x83[\xB0-\xB9]
+DIGIT_RNG_41 \xF0\x91\x84[\xB6-\xBF]
+DIGIT_RNG_42 \xF0\x91\x87[\x90-\x99]
+DIGIT_RNG_43 \xF0\x91\x8B[\xB0-\xB9]
+DIGIT_RNG_44 \xF0\x91\x91[\x90-\x99]
+DIGIT_RNG_45 \xF0\x91\x93[\x90-\x99]
+DIGIT_RNG_46 \xF0\x91\x99[\x90-\x99]
+DIGIT_RNG_47 \xF0\x91\x9B[\x80-\x89]
+DIGIT_RNG_48 \xF0\x91\x9C[\xB0-\xB9]
+DIGIT_RNG_49 \xF0\x91\xA3[\xA0-\xA9]
+DIGIT_RNG_50 \xF0\x91\xA5[\x90-\x99]
+DIGIT_RNG_51 \xF0\x91\xB1[\x90-\x99]
+DIGIT_RNG_52 \xF0\x91\xB5[\x90-\x99]
+DIGIT_RNG_53 \xF0\x91\xB6[\xA0-\xA9]
+DIGIT_RNG_54 \xF0\x96\xA9[\xA0-\xA9]
+DIGIT_RNG_55 \xF0\x96\xAD[\x90-\x99]
+DIGIT_RNG_56 \xF0\x9D\x9F[\x8E-\xBF]
+DIGIT_RNG_57 \xF0\x9E\x85[\x80-\x89]
+DIGIT_RNG_58 \xF0\x9E\x8B[\xB0-\xB9]
+DIGIT_RNG_59 \xF0\x9E\xA5[\x90-\x99]
+DIGIT_RNG_60 \xF0\x9F\xAF[\xB0-\xB9]
+
+DIGIT_GROUP_1 {DIGIT_RNG_1}|{DIGIT_RNG_2}|{DIGIT_RNG_3}|{DIGIT_RNG_4}|{DIGIT_RNG_5}|{DIGIT_RNG_6}|{DIGIT_RNG_7}|{DIGIT_RNG_8}|{DIGIT_RNG_10}
+DIGIT_GROUP_2 {DIGIT_GROUP_1}|{DIGIT_RNG_11}|{DIGIT_RNG_12}|{DIGIT_RNG_13}|{DIGIT_RNG_14}|{DIGIT_RNG_15}|{DIGIT_RNG_16}|{DIGIT_RNG_17}|{DIGIT_RNG_18}
+DIGIT_GROUP_3 {DIGIT_GROUP_2}|{DIGIT_RNG_19}|{DIGIT_RNG_20}|{DIGIT_RNG_21}|{DIGIT_RNG_22}|{DIGIT_RNG_23}|{DIGIT_RNG_24}|{DIGIT_RNG_25}|{DIGIT_RNG_26}
+DIGIT_GROUP_4 {DIGIT_GROUP_3}|{DIGIT_RNG_27}|{DIGIT_RNG_28}|{DIGIT_RNG_29}|{DIGIT_RNG_30}|{DIGIT_RNG_31}|{DIGIT_RNG_32}|{DIGIT_RNG_33}|{DIGIT_RNG_34}
+DIGIT_GROUP_5 {DIGIT_GROUP_4}|{DIGIT_RNG_35}|{DIGIT_RNG_36}|{DIGIT_RNG_37}|{DIGIT_RNG_38}|{DIGIT_RNG_39}|{DIGIT_RNG_40}|{DIGIT_RNG_41}|{DIGIT_RNG_42}
+DIGIT_GROUP_6 {DIGIT_GROUP_5}|{DIGIT_RNG_43}|{DIGIT_RNG_44}|{DIGIT_RNG_45}|{DIGIT_RNG_46}|{DIGIT_RNG_47}|{DIGIT_RNG_48}|{DIGIT_RNG_49}|{DIGIT_RNG_50}
+DIGIT_GROUP_7 {DIGIT_GROUP_6}|{DIGIT_RNG_51}|{DIGIT_RNG_52}|{DIGIT_RNG_53}|{DIGIT_RNG_54}|{DIGIT_RNG_55}|{DIGIT_RNG_56}|{DIGIT_RNG_57}|{DIGIT_RNG_58}
+DIGIT_GROUP_8 {DIGIT_GROUP_7}|{DIGIT_RNG_59}|{DIGIT_RNG_60}
+
+UNICODE_DIGIT {DIGIT_GROUP_1}|{DIGIT_GROUP_2}|{DIGIT_GROUP_3}|{DIGIT_GROUP_4}|{DIGIT_GROUP_5}|{DIGIT_GROUP_6}|{DIGIT_GROUP_7}|{DIGIT_GROUP_8}
+
+/* Unicode combining mark ranges (categories Mn and Mc) */
+/* generated with unicode_range_generator.l */
+/* UTF-8 ranges generated with https://lists.gnu.org/archive/html/help-flex/2005-01/msg00043.html */
+COMB_MARK_RNG_1 \xCD[\x80-\xAF]|\xCC[\x80-\xBF]
+COMB_MARK_RNG_2 \xD2[\x83-\x87]
+COMB_MARK_RNG_3 \xD6[\x91-\xBD]
+COMB_MARK_RNG_4 \xD6\xBF
+COMB_MARK_RNG_5 \xD7[\x81-\x82]
+COMB_MARK_RNG_6 \xD7[\x84-\x85]
+COMB_MARK_RNG_7 \xD7\x87
+COMB_MARK_RNG_8 \xD8[\x90-\x9A]
+COMB_MARK_RNG_9 \xD9[\x8B-\x9F]
+COMB_MARK_RNG_10 \xD9\xB0
+COMB_MARK_RNG_11 \xDB[\x96-\x9C]
+COMB_MARK_RNG_12 \xDB[\x9F-\xA4]
+COMB_MARK_RNG_13 \xDB[\xA7-\xA8]
+COMB_MARK_RNG_14 \xDB[\xAA-\xAD]
+COMB_MARK_RNG_15 \xDC\x91
+COMB_MARK_RNG_16 \xDC[\xB0-\xBF]|\xDD[\x80-\x8A]
+COMB_MARK_RNG_17 \xDE[\xA6-\xB0]
+COMB_MARK_RNG_18 \xDF[\xAB-\xB3]
+COMB_MARK_RNG_19 \xDF\xBD
+COMB_MARK_RNG_20 \xE0\xA0[\x96-\x99]
+COMB_MARK_RNG_21 \xE0\xA0[\x9B-\xA3]
+COMB_MARK_RNG_22 \xE0\xA0[\xA5-\xA7]
+COMB_MARK_RNG_23 \xE0\xA0[\xA9-\xAD]
+COMB_MARK_RNG_24 \xE0\xA1[\x99-\x9B]
+COMB_MARK_RNG_25 \xE0\xA3[\x93-\xA1]
+COMB_MARK_RNG_26 \xE0(\xA3[\xA3-\xBF]|\xA4[\x80-\x83])
+COMB_MARK_RNG_27 \xE0\xA4[\xBA-\xBC]
+COMB_MARK_RNG_28 \xE0(\xA4[\xBE-\xBF]|\xA5[\x80-\x8F])
+COMB_MARK_RNG_29 \xE0\xA5[\x91-\x97]
+COMB_MARK_RNG_30 \xE0\xA5[\xA2-\xA3]
+COMB_MARK_RNG_31 \xE0\xA6[\x81-\x83]
+COMB_MARK_RNG_32 \xE0\xA6\xBC
+COMB_MARK_RNG_33 \xE0(\xA6[\xBE-\xBF]|\xA7[\x80-\x8D])
+COMB_MARK_RNG_34 \xE0\xA7\x97
+COMB_MARK_RNG_35 \xE0\xA7[\xA2-\xA3]
+COMB_MARK_RNG_36 \xE0(\xA7[\xBE-\xBF]|\xA8[\x80-\x83])
+COMB_MARK_RNG_37 \xE0(\xA8[\xBC-\xBF]|\xA9[\x80-\x91])
+COMB_MARK_RNG_38 \xE0\xA9[\xB0-\xB1]
+COMB_MARK_RNG_39 \xE0\xA9\xB5
+COMB_MARK_RNG_40 \xE0\xAA[\x81-\x83]
+COMB_MARK_RNG_41 \xE0\xAA\xBC
+COMB_MARK_RNG_42 \xE0(\xAA[\xBE-\xBF]|\xAB[\x80-\x8D])
+COMB_MARK_RNG_43 \xE0\xAB[\xA2-\xA3]
+COMB_MARK_RNG_44 \xE0(\xAB[\xBA-\xBF]|\xAC[\x80-\x83])
+COMB_MARK_RNG_45 \xE0\xAC\xBC
+COMB_MARK_RNG_46 \xE0(\xAC[\xBE-\xBF]|\xAD[\x80-\x97])
+COMB_MARK_RNG_47 \xE0\xAD[\xA2-\xA3]
+COMB_MARK_RNG_48 \xE0\xAE\x82
+COMB_MARK_RNG_49 \xE0(\xAE[\xBE-\xBF]|\xAF[\x80-\x8D])
+COMB_MARK_RNG_50 \xE0\xAF\x97
+COMB_MARK_RNG_51 \xE0\xB0[\x80-\x84]
+COMB_MARK_RNG_52 \xE0(\xB0[\xBE-\xBF]|\xB1[\x80-\x96])
+COMB_MARK_RNG_53 \xE0\xB1[\xA2-\xA3]
+COMB_MARK_RNG_54 \xE0\xB2[\x81-\x83]
+COMB_MARK_RNG_55 \xE0\xB2\xBC
+COMB_MARK_RNG_56 \xE0(\xB2[\xBE-\xBF]|\xB3[\x80-\x96])
+COMB_MARK_RNG_57 \xE0\xB3[\xA2-\xA3]
+COMB_MARK_RNG_58 \xE0\xB4[\x80-\x83]
+COMB_MARK_RNG_59 \xE0\xB4[\xBB-\xBC]
+COMB_MARK_RNG_60 \xE0(\xB4[\xBE-\xBF]|\xB5[\x80-\x8D])
+COMB_MARK_RNG_61 \xE0\xB5\x97
+COMB_MARK_RNG_62 \xE0\xB5[\xA2-\xA3]
+COMB_MARK_RNG_63 \xE0\xB6[\x81-\x83]
+COMB_MARK_RNG_64 \xE0\xB7[\x8A-\x9F]
+COMB_MARK_RNG_65 \xE0\xB7[\xB2-\xB3]
+COMB_MARK_RNG_66 \xE0\xB8\xB1
+COMB_MARK_RNG_67 \xE0\xB8[\xB4-\xBA]
+COMB_MARK_RNG_68 \xE0\xB9[\x87-\x8E]
+COMB_MARK_RNG_69 \xE0\xBA\xB1
+COMB_MARK_RNG_70 \xE0\xBA[\xB4-\xBC]
+COMB_MARK_RNG_71 \xE0\xBB[\x88-\x8D]
+COMB_MARK_RNG_72 \xE0\xBC[\x98-\x99]
+COMB_MARK_RNG_73 \xE0\xBC\xB5
+COMB_MARK_RNG_74 \xE0\xBC\xB7
+COMB_MARK_RNG_75 \xE0\xBC\xB9
+COMB_MARK_RNG_76 \xE0\xBC[\xBE-\xBF]
+COMB_MARK_RNG_77 \xE0(\xBD[\xB1-\xBF]|\xBE[\x80-\x84])
+COMB_MARK_RNG_78 \xE0\xBE[\x86-\x87]
+COMB_MARK_RNG_79 \xE0\xBE[\x8D-\xBC]
+COMB_MARK_RNG_80 \xE0\xBF\x86
+COMB_MARK_RNG_81 \xE1\x80[\xAB-\xBE]
+COMB_MARK_RNG_82 \xE1\x81[\x96-\x99]
+COMB_MARK_RNG_83 \xE1\x81[\x9E-\xA0]
+COMB_MARK_RNG_84 \xE1\x81[\xA2-\xA4]
+COMB_MARK_RNG_85 \xE1\x81[\xA7-\xAD]
+COMB_MARK_RNG_86 \xE1\x81[\xB1-\xB4]
+COMB_MARK_RNG_87 \xE1\x82[\x82-\x8D]
+COMB_MARK_RNG_88 \xE1\x82\x8F
+COMB_MARK_RNG_89 \xE1\x82[\x9A-\x9D]
+COMB_MARK_RNG_90 \8xE1\x8D[\x9D-\x9F]
+COMB_MARK_RNG_91 \xE1\x9C[\x92-\x94]
+COMB_MARK_RNG_92 \xE1\x9C[\xB2-\xB4]
+COMB_MARK_RNG_93 \xE1\x9D[\x92-\x93]
+COMB_MARK_RNG_94 \xE1\x9D[\xB2-\xB3]
+COMB_MARK_RNG_95 \xE1(\x9E[\xB4-\xBF]|\x9F[\x80-\x93])
+COMB_MARK_RNG_96 \xE1\x9F\x9D
+COMB_MARK_RNG_97 \xE1\xA0[\x8B-\x8D]
+COMB_MARK_RNG_98 \xE1\xA2[\x85-\x86]
+COMB_MARK_RNG_99 \xE1\xA2\xA9
+COMB_MARK_RNG_100 \xE1\xA4[\xA0-\xBB]
+COMB_MARK_RNG_101 \xE1\xA8[\x97-\x9B]
+COMB_MARK_RNG_102 \xE1\xA9[\x95-\xBF]
+COMB_MARK_RNG_103 \xE1\xAA[\xB0-\xBD]
+COMB_MARK_RNG_104 \xE1(\xAA\xBF|\xAC[\x80-\x84]|\xAB[\x80-\xBF])
+COMB_MARK_RNG_105 \xE1(\xAC[\xB4-\xBF]|\xAD[\x80-\x84])
+COMB_MARK_RNG_106 \xE1\xAD[\xAB-\xB3]
+COMB_MARK_RNG_107 \xE1\xAE[\x80-\x82]
+COMB_MARK_RNG_108 \xE1\xAE[\xA1-\xAD]
+COMB_MARK_RNG_109 \xE1\xAF[\xA6-\xB3]
+COMB_MARK_RNG_110 \xE1\xB0[\xA4-\xB7]
+COMB_MARK_RNG_111 \xE1\xB3[\x90-\x92]
+COMB_MARK_RNG_112 \xE1\xB3[\x94-\xA8]
+COMB_MARK_RNG_113 \xE1\xB3\xAD
+COMB_MARK_RNG_114 \xE1\xB3\xB4
+COMB_MARK_RNG_115 \xE1\xB3[\xB7-\xB9]
+COMB_MARK_RNG_116 \xE1\xB7[\x80-\xBF]
+COMB_MARK_RNG_117 \xE2\x83[\x90-\x9C]
+COMB_MARK_RNG_118 \xE2\x83\xA1
+COMB_MARK_RNG_119 \xE2\x83[\xA5-\xB0]
+COMB_MARK_RNG_120 \xE2\xB3[\xAF-\xB1]
+COMB_MARK_RNG_121 \xE2\xB5\xBF
+COMB_MARK_RNG_122 \xE2\xB7[\xA0-\xBF]
+COMB_MARK_RNG_123 \xE3\x80[\xAA-\xAF]
+COMB_MARK_RNG_124 \xE3\x82[\x99-\x9A]
+COMB_MARK_RNG_125 \xEA\x99\xAF
+COMB_MARK_RNG_126 \xEA\x99[\xB4-\xBD]
+COMB_MARK_RNG_127 \xEA\x9A[\x9E-\x9F]
+COMB_MARK_RNG_128 \xEA\x9B[\xB0-\xB1]
+COMB_MARK_RNG_129 \xEA\xA0\x82
+COMB_MARK_RNG_130 \xEA\xA0\x86
+COMB_MARK_RNG_131 \xEA\xA0\x8B
+COMB_MARK_RNG_132 \xEA\xA0[\xA3-\xA7]
+COMB_MARK_RNG_133 \xEA\xA0\xAC
+COMB_MARK_RNG_134 \xEA\xA2[\x80-\x81]
+COMB_MARK_RNG_135 \xEA(\xA2[\xB4-\xBF]|\xA3[\x80-\x85])
+COMB_MARK_RNG_136 \xEA\xA3[\xA0-\xB1]
+COMB_MARK_RNG_137 \xEA\xA3\xBF
+COMB_MARK_RNG_138 \xEA\xA4[\xA6-\xAD]
+COMB_MARK_RNG_139 \xEA\xA5[\x87-\x93]
+COMB_MARK_RNG_140 \xEA\xA6[\x80-\x83]
+COMB_MARK_RNG_141 \xEA(\xA6[\xB3-\xBF]|\xA7\x80)
+COMB_MARK_RNG_142 \xEA\xA7\xA5
+COMB_MARK_RNG_143 \xEA\xA8[\xA9-\xB6]
+COMB_MARK_RNG_144 \xEA\xA9\x83
+COMB_MARK_RNG_145 \xEA\xA9[\x8C-\x8D]
+COMB_MARK_RNG_146 \xEA\xA9[\xBB-\xBD]
+COMB_MARK_RNG_147 \xEA\xAA\xB0
+COMB_MARK_RNG_148 \xEA\xAA[\xB2-\xB4]
+COMB_MARK_RNG_149 \xEA\xAA[\xB7-\xB8]
+COMB_MARK_RNG_150 \xEA\xAA[\xBE-\xBF]
+COMB_MARK_RNG_151 \xEA\xAB\x81
+COMB_MARK_RNG_152 \xEA\xAB[\xAB-\xAF]
+COMB_MARK_RNG_153 \xEA\xAB[\xB5-\xB6]
+COMB_MARK_RNG_154 \xEA\xAF[\xA3-\xAA]
+COMB_MARK_RNG_155 \xEA\xAF[\xAC-\xAD]
+COMB_MARK_RNG_156 \xEF\xAC\x9E
+COMB_MARK_RNG_157 \xEF\xB8[\x80-\x8F]
+COMB_MARK_RNG_158 \xEF\xB8[\xA0-\xAF]
+COMB_MARK_RNG_159 \xF0\x90\x87\xBD
+COMB_MARK_RNG_160 \xF0\x90\x8B\xA0
+COMB_MARK_RNG_161 \xF0\x90\x8D[\xB6-\xBA]
+COMB_MARK_RNG_162 \xF0\x90\xA8[\x81-\x8F]
+COMB_MARK_RNG_163 \xF0\x90\xA8[\xB8-\xBF]
+COMB_MARK_RNG_164 \xF0\x90\xAB[\xA5-\xA6]
+COMB_MARK_RNG_165 \xF0\x90\xB4[\xA4-\xA7]
+COMB_MARK_RNG_166 \xF0\x90\xBA[\xAB-\xAC]
+COMB_MARK_RNG_167 \xF0\x90\xBD[\x86-\x90]
+COMB_MARK_RNG_168 \xF0\x91\x80[\x80-\x82]
+COMB_MARK_RNG_169 \xF0\x91(\x80[\xB8-\xBF]|\x81[\x80-\x86])
+COMB_MARK_RNG_170 \xF0\x91(\x81\xBF|\x82[\x80-\x82])
+COMB_MARK_RNG_171 \xF0\x91\x82[\xB0-\xBA]
+COMB_MARK_RNG_172 \xF0\x91\x84[\x80-\x82]
+COMB_MARK_RNG_173 \xF0\x91\x84[\xA7-\xB4]
+COMB_MARK_RNG_174 \xF0\x91\x85[\x85-\x86]
+COMB_MARK_RNG_175 \xF0\x91\x85\xB3
+COMB_MARK_RNG_176 \xF0\x91\x86[\x80-\x82]
+COMB_MARK_RNG_177 \xF0\x91(\x86[\xB3-\xBF]|\x87\x80)
+COMB_MARK_RNG_178 \xF0\x91\x87[\x89-\x8C]
+COMB_MARK_RNG_179 \xF0\x91\x87[\x8E-\x8F]
+COMB_MARK_RNG_180 \xF0\x91\x88[\xAC-\xB7]
+COMB_MARK_RNG_181 \xF0\x91\x88\xBE
+COMB_MARK_RNG_182 \xF0\x91\x8B[\x9F-\xAA]
+COMB_MARK_RNG_183 \xF0\x91\x8C[\x80-\x83]
+COMB_MARK_RNG_184 \xF0\x91\x8C[\xBB-\xBC]
+COMB_MARK_RNG_185 \xF0\x91(\x8C[\xBE-\xBF]|\x8D[\x80-\x8D])
+COMB_MARK_RNG_186 \xF0\x91\x8D\x97
+COMB_MARK_RNG_187 \xF0\x91\x8D[\xA2-\xB4]
+COMB_MARK_RNG_188 \xF0\x91(\x90[\xB5-\xBF]|\x91[\x80-\x86])
+COMB_MARK_RNG_189 \xF0\x91\x91\x9E
+COMB_MARK_RNG_190 \xF0\x91(\x92[\xB0-\xBF]|\x93[\x80-\x83])
+COMB_MARK_RNG_191 \xF0\x91(\x96[\xAF-\xBF]|\x97\x80)
+COMB_MARK_RNG_192 \xF0\x91\x97[\x9C-\x9D]
+COMB_MARK_RNG_193 \xF0\x91(\x98[\xB0-\xBF]|\x99\x80)
+COMB_MARK_RNG_194 \xF0\x91\x9A[\xAB-\xB7]
+COMB_MARK_RNG_195 \xF0\x91\x9C[\x9D-\xAB]
+COMB_MARK_RNG_196 \xF0\x91\xA0[\xAC-\xBA]
+COMB_MARK_RNG_197 \xF0\x91\xA4[\xB0-\xBE]
+COMB_MARK_RNG_198 \xF0\x91\xA5\x80
+COMB_MARK_RNG_199 \xF0\x91\xA5[\x82-\x83]
+COMB_MARK_RNG_200 \xF0\x91\xA7[\x91-\xA0]
+COMB_MARK_RNG_201 \xF0\x91\xA7\xA4
+COMB_MARK_RNG_202 \xF0\x91\xA8[\x81-\x8A]
+COMB_MARK_RNG_203 \xF0\x91\xA8[\xB3-\xB9]
+COMB_MARK_RNG_204 \xF0\x91\xA8[\xBB-\xBE]
+COMB_MARK_RNG_205 \xF0\x91\xA9\x87
+COMB_MARK_RNG_206 \xF0\x91\xA9[\x91-\x9B]
+COMB_MARK_RNG_207 \xF0\x91\xAA[\x8A-\x99]
+COMB_MARK_RNG_208 \xF0\x91\xB0[\xAF-\xBF]
+COMB_MARK_RNG_209 \xF0\x91\xB2[\x92-\xB6]
+COMB_MARK_RNG_210 \xF0\x91(\xB4[\xB1-\xBF]|\xB5[\x80-\x85])
+COMB_MARK_RNG_211 \xF0\x91\xB5\x87
+COMB_MARK_RNG_212 \xF0\x91\xB6[\x8A-\x97]
+COMB_MARK_RNG_213 \xF0\x91\xBB[\xB3-\xB6]
+COMB_MARK_RNG_214 \xF0\x96\xAB[\xB0-\xB4]
+COMB_MARK_RNG_215 \xF0\x96\xAC[\xB0-\xB6]
+COMB_MARK_RNG_216 \xF0\x96\xBD\x8F
+COMB_MARK_RNG_217 \xF0\x96(\xBD[\x91-\xBF]|\xBE[\x80-\x92])
+COMB_MARK_RNG_218 \xF0\x96\xBF[\xA4-\xB1]
+COMB_MARK_RNG_219 \xF0\x9B\xB2[\x9D-\x9E]
+COMB_MARK_RNG_220 \xF0\x9D\x85[\xA5-\xA9]
+COMB_MARK_RNG_221 \xF0\x9D\x85[\xAD-\xB2]
+COMB_MARK_RNG_222 \xF0\x9D(\x85[\xBB-\xBF]|\x86[\x80-\x82])
+COMB_MARK_RNG_223 \xF0\x9D\x86[\x85-\x8B]
+COMB_MARK_RNG_224 \xF0\x9D\x86[\xAA-\xAD]
+COMB_MARK_RNG_225 \xF0\x9D\x89[\x82-\x84]
+COMB_MARK_RNG_226 \xF0\x9D\xA8[\x80-\xB6]
+COMB_MARK_RNG_227 \xF0\x9D(\xA8[\xBB-\xBF]|\xA9[\x80-\xAC])
+COMB_MARK_RNG_228 \xF0\x9D\xA9\xB5
+COMB_MARK_RNG_229 \xF0\x9D\xAA\x84
+COMB_MARK_RNG_230 \xF0(\x9D\xAA[\x9B-\xBF]|\x9E\x80[\x80-\xAA]|\x9D[\xAB-\xBF][\x80-\xBF])
+COMB_MARK_RNG_231 \xF0\x9E\x84[\xB0-\xB6]
+COMB_MARK_RNG_232 \xF0\x9E\x8B[\xAC-\xAF]
+COMB_MARK_RNG_233 \xF0\x9E\xA3[\x90-\x96]
+COMB_MARK_RNG_234 \xF0\x9E\xA5[\x84-\x8A]
+COMB_MARK_RNG_235 \xF3\xA0(\x87[\x80-\xAF]|[\x84-\x86][\x80-\xBF])
+
+COMB_MARK_GROUP_1 {COMB_MARK_RNG_1}|{COMB_MARK_RNG_2}|{COMB_MARK_RNG_3}|{COMB_MARK_RNG_4}|{COMB_MARK_RNG_5}|{COMB_MARK_RNG_6}|{COMB_MARK_RNG_7}|{COMB_MARK_RNG_8}|{COMB_MARK_RNG_9}|{COMB_MARK_RNG_10}
+COMB_MARK_GROUP_2 {COMB_MARK_GROUP_1}|{COMB_MARK_RNG_11}|{COMB_MARK_RNG_12}|{COMB_MARK_RNG_13}|{COMB_MARK_RNG_14}|{COMB_MARK_RNG_15}|{COMB_MARK_RNG_16}|{COMB_MARK_RNG_17}|{COMB_MARK_RNG_18}|{COMB_MARK_RNG_19}
+COMB_MARK_GROUP_3 {COMB_MARK_GROUP_2}|{COMB_MARK_RNG_20}|{COMB_MARK_RNG_21}|{COMB_MARK_RNG_22}|{COMB_MARK_RNG_23}|{COMB_MARK_RNG_24}|{COMB_MARK_RNG_25}|{COMB_MARK_RNG_26}|{COMB_MARK_RNG_27}|{COMB_MARK_RNG_28}
+COMB_MARK_GROUP_4 {COMB_MARK_GROUP_3}|{COMB_MARK_RNG_29}|{COMB_MARK_RNG_30}|{COMB_MARK_RNG_31}|{COMB_MARK_RNG_32}|{COMB_MARK_RNG_33}|{COMB_MARK_RNG_34}|{COMB_MARK_RNG_35}|{COMB_MARK_RNG_36}|{COMB_MARK_RNG_37}
+COMB_MARK_GROUP_5 {COMB_MARK_GROUP_4}|{COMB_MARK_RNG_38}|{COMB_MARK_RNG_39}|{COMB_MARK_RNG_40}|{COMB_MARK_RNG_41}|{COMB_MARK_RNG_42}|{COMB_MARK_RNG_43}|{COMB_MARK_RNG_44}|{COMB_MARK_RNG_45}|{COMB_MARK_RNG_46}
+COMB_MARK_GROUP_6 {COMB_MARK_GROUP_5}|{COMB_MARK_RNG_47}|{COMB_MARK_RNG_48}|{COMB_MARK_RNG_49}|{COMB_MARK_RNG_50}|{COMB_MARK_RNG_51}|{COMB_MARK_RNG_52}|{COMB_MARK_RNG_53}|{COMB_MARK_RNG_54}|{COMB_MARK_RNG_55}
+COMB_MARK_GROUP_7 {COMB_MARK_GROUP_6}|{COMB_MARK_RNG_56}|{COMB_MARK_RNG_57}|{COMB_MARK_RNG_58}|{COMB_MARK_RNG_59}|{COMB_MARK_RNG_60}|{COMB_MARK_RNG_61}|{COMB_MARK_RNG_62}|{COMB_MARK_RNG_63}|{COMB_MARK_RNG_64}
+COMB_MARK_GROUP_8 {COMB_MARK_GROUP_7}|{COMB_MARK_RNG_65}|{COMB_MARK_RNG_66}|{COMB_MARK_RNG_67}|{COMB_MARK_RNG_68}|{COMB_MARK_RNG_69}|{COMB_MARK_RNG_70}|{COMB_MARK_RNG_71}|{COMB_MARK_RNG_72}|{COMB_MARK_RNG_73}
+COMB_MARK_GROUP_9 {COMB_MARK_GROUP_8}|{COMB_MARK_RNG_74}|{COMB_MARK_RNG_75}|{COMB_MARK_RNG_76}|{COMB_MARK_RNG_77}|{COMB_MARK_RNG_78}|{COMB_MARK_RNG_79}|{COMB_MARK_RNG_80}|{COMB_MARK_RNG_81}|{COMB_MARK_RNG_82}
+COMB_MARK_GROUP_10 {COMB_MARK_GROUP_9}|{COMB_MARK_RNG_83}|{COMB_MARK_RNG_84}|{COMB_MARK_RNG_85}|{COMB_MARK_RNG_86}|{COMB_MARK_RNG_87}|{COMB_MARK_RNG_88}|{COMB_MARK_RNG_89}|{COMB_MARK_RNG_90}|{COMB_MARK_RNG_91}
+COMB_MARK_GROUP_11 {COMB_MARK_GROUP_10}|{COMB_MARK_RNG_92}|{COMB_MARK_RNG_93}|{COMB_MARK_RNG_94}|{COMB_MARK_RNG_95}|{COMB_MARK_RNG_96}|{COMB_MARK_RNG_97}|{COMB_MARK_RNG_98}|{COMB_MARK_RNG_99}|{COMB_MARK_RNG_100}
+COMB_MARK_GROUP_12 {COMB_MARK_GROUP_11}|{COMB_MARK_RNG_101}|{COMB_MARK_RNG_102}|{COMB_MARK_RNG_103}|{COMB_MARK_RNG_104}|{COMB_MARK_RNG_105}|{COMB_MARK_RNG_106}|{COMB_MARK_RNG_107}|{COMB_MARK_RNG_108}|{COMB_MARK_RNG_109}
+COMB_MARK_GROUP_13 {COMB_MARK_GROUP_12}|{COMB_MARK_RNG_110}|{COMB_MARK_RNG_111}|{COMB_MARK_RNG_112}|{COMB_MARK_RNG_113}|{COMB_MARK_RNG_114}|{COMB_MARK_RNG_115}|{COMB_MARK_RNG_116}|{COMB_MARK_RNG_117}|{COMB_MARK_RNG_118}
+COMB_MARK_GROUP_14 {COMB_MARK_GROUP_13}|{COMB_MARK_RNG_119}|{COMB_MARK_RNG_120}|{COMB_MARK_RNG_121}|{COMB_MARK_RNG_122}|{COMB_MARK_RNG_123}|{COMB_MARK_RNG_124}|{COMB_MARK_RNG_125}|{COMB_MARK_RNG_126}|{COMB_MARK_RNG_127}
+COMB_MARK_GROUP_15 {COMB_MARK_GROUP_14}|{COMB_MARK_RNG_128}|{COMB_MARK_RNG_129}|{COMB_MARK_RNG_130}|{COMB_MARK_RNG_131}|{COMB_MARK_RNG_132}|{COMB_MARK_RNG_133}|{COMB_MARK_RNG_134}|{COMB_MARK_RNG_135}|{COMB_MARK_RNG_136}
+COMB_MARK_GROUP_16 {COMB_MARK_GROUP_15}|{COMB_MARK_RNG_137}|{COMB_MARK_RNG_138}|{COMB_MARK_RNG_139}|{COMB_MARK_RNG_140}|{COMB_MARK_RNG_141}|{COMB_MARK_RNG_142}|{COMB_MARK_RNG_143}|{COMB_MARK_RNG_144}|{COMB_MARK_RNG_145}
+COMB_MARK_GROUP_17 {COMB_MARK_GROUP_16}|{COMB_MARK_RNG_146}|{COMB_MARK_RNG_147}|{COMB_MARK_RNG_148}|{COMB_MARK_RNG_149}|{COMB_MARK_RNG_150}|{COMB_MARK_RNG_151}|{COMB_MARK_RNG_152}|{COMB_MARK_RNG_153}|{COMB_MARK_RNG_154}
+COMB_MARK_GROUP_18 {COMB_MARK_GROUP_17}|{COMB_MARK_RNG_155}|{COMB_MARK_RNG_156}|{COMB_MARK_RNG_157}|{COMB_MARK_RNG_158}|{COMB_MARK_RNG_159}|{COMB_MARK_RNG_160}|{COMB_MARK_RNG_161}|{COMB_MARK_RNG_162}|{COMB_MARK_RNG_163}
+COMB_MARK_GROUP_19 {COMB_MARK_GROUP_18}|{COMB_MARK_RNG_164}|{COMB_MARK_RNG_165}|{COMB_MARK_RNG_166}|{COMB_MARK_RNG_167}|{COMB_MARK_RNG_168}|{COMB_MARK_RNG_169}|{COMB_MARK_RNG_170}|{COMB_MARK_RNG_171}|{COMB_MARK_RNG_172}
+COMB_MARK_GROUP_20 {COMB_MARK_GROUP_19}|{COMB_MARK_RNG_173}|{COMB_MARK_RNG_174}|{COMB_MARK_RNG_175}|{COMB_MARK_RNG_176}|{COMB_MARK_RNG_177}|{COMB_MARK_RNG_178}|{COMB_MARK_RNG_179}|{COMB_MARK_RNG_180}|{COMB_MARK_RNG_181}
+COMB_MARK_GROUP_21 {COMB_MARK_GROUP_20}|{COMB_MARK_RNG_182}|{COMB_MARK_RNG_183}|{COMB_MARK_RNG_184}|{COMB_MARK_RNG_185}|{COMB_MARK_RNG_186}|{COMB_MARK_RNG_187}|{COMB_MARK_RNG_188}|{COMB_MARK_RNG_189}|{COMB_MARK_RNG_190}
+COMB_MARK_GROUP_22 {COMB_MARK_GROUP_21}|{COMB_MARK_RNG_191}|{COMB_MARK_RNG_192}|{COMB_MARK_RNG_193}|{COMB_MARK_RNG_194}|{COMB_MARK_RNG_195}|{COMB_MARK_RNG_196}|{COMB_MARK_RNG_197}|{COMB_MARK_RNG_198}|{COMB_MARK_RNG_199}
+COMB_MARK_GROUP_23 {COMB_MARK_GROUP_22}|{COMB_MARK_RNG_200}|{COMB_MARK_RNG_201}|{COMB_MARK_RNG_202}|{COMB_MARK_RNG_203}|{COMB_MARK_RNG_204}|{COMB_MARK_RNG_205}|{COMB_MARK_RNG_206}|{COMB_MARK_RNG_207}|{COMB_MARK_RNG_208}
+COMB_MARK_GROUP_24 {COMB_MARK_GROUP_23}|{COMB_MARK_RNG_209}|{COMB_MARK_RNG_210}|{COMB_MARK_RNG_211}|{COMB_MARK_RNG_212}|{COMB_MARK_RNG_213}|{COMB_MARK_RNG_214}|{COMB_MARK_RNG_215}|{COMB_MARK_RNG_216}|{COMB_MARK_RNG_217}
+COMB_MARK_GROUP_25 {COMB_MARK_GROUP_24}|{COMB_MARK_RNG_218}|{COMB_MARK_RNG_219}|{COMB_MARK_RNG_220}|{COMB_MARK_RNG_221}|{COMB_MARK_RNG_222}|{COMB_MARK_RNG_223}|{COMB_MARK_RNG_224}|{COMB_MARK_RNG_225}|{COMB_MARK_RNG_226}
+COMB_MARK_GROUP_26 {COMB_MARK_GROUP_25}|{COMB_MARK_RNG_227}|{COMB_MARK_RNG_228}|{COMB_MARK_RNG_229}|{COMB_MARK_RNG_230}|{COMB_MARK_RNG_231}|{COMB_MARK_RNG_232}|{COMB_MARK_RNG_233}|{COMB_MARK_RNG_234}|{COMB_MARK_RNG_235}
+
+COMB_MARK_G_GROUP_1 {COMB_MARK_GROUP_1}|{COMB_MARK_GROUP_2}|{COMB_MARK_GROUP_3}|{COMB_MARK_GROUP_4}|{COMB_MARK_GROUP_5}|{COMB_MARK_GROUP_6}|{COMB_MARK_GROUP_7}|{COMB_MARK_GROUP_8}|{COMB_MARK_GROUP_9}|{COMB_MARK_GROUP_10}
+COMB_MARK_G_GROUP_2 {COMB_MARK_G_GROUP_1}|{COMB_MARK_GROUP_11}|{COMB_MARK_GROUP_12}|{COMB_MARK_GROUP_13}|{COMB_MARK_GROUP_14}|{COMB_MARK_GROUP_15}|{COMB_MARK_GROUP_16}|{COMB_MARK_GROUP_17}|{COMB_MARK_GROUP_18}|{COMB_MARK_GROUP_19}
+COMB_MARK_G_GROUP_3 {COMB_MARK_G_GROUP_2}|{COMB_MARK_GROUP_20}|{COMB_MARK_GROUP_21}|{COMB_MARK_GROUP_22}|{COMB_MARK_GROUP_23}|{COMB_MARK_GROUP_24}|{COMB_MARK_GROUP_25}|{COMB_MARK_GROUP_26}
+
+UNICODE_COMBINING_MARK {COMB_MARK_G_GROUP_1}|{COMB_MARK_G_GROUP_2}|{COMB_MARK_G_GROUP_3}
+
+/* Unicode connector punctuation ranges (category Pc) */
+/* generated with unicode_range_generator.l */
+/* UTF-8 ranges generated with https://lists.gnu.org/archive/html/help-flex/2005-01/msg00043.html */
+CONNECTOR_PUNCT_RNG_1 _
+CONNECTOR_PUNCT_RNG_2 \xE2(\x80\xBF|\x81\x80)
+CONNECTOR_PUNCT_RNG_3 \xE2\x81\x94
+CONNECTOR_PUNCT_RNG_4 \xEF\xB8[\xB3-\xB4]
+CONNECTOR_PUNCT_RNG_5 \xEF\xB9[\x8D-\x8F]
+CONNECTOR_PUNCT_RNG_6 \xEF\xBC\xBF
+
+UNICODE_CONNECTOR_PUNCTUATION {CONNECTOR_PUNCT_RNG_1}|{CONNECTOR_PUNCT_RNG_2}|{CONNECTOR_PUNCT_RNG_3}|{CONNECTOR_PUNCT_RNG_4}|{CONNECTOR_PUNCT_RNG_5}|{CONNECTOR_PUNCT_RNG_6}
+
+UNICODE_ZWNJ \xE2\x80\x8C
+UNICODE_ZWJ \xE2\x80\x8D
+
+/* Unicode escape sequence */
+/* according to https://ecma-international.org/ecma-262/5.1/#sec-7.8.4 (escape sequence) */
+UNICODE_ESCAPE_SEQUENCE \\u[0-9a-fA-F]{4}
+
+/* identifiers */
+/* according to https://ecma-international.org/ecma-262/5.1/#sec-7.6 */
+IDENTIFIER_START [_$]|({UNICODE_LETTER})|{UNICODE_ESCAPE_SEQUENCE}
+IDENTIFIER_PART (({IDENTIFIER_START})|({UNICODE_COMBINING_MARK})|({UNICODE_DIGIT})|({UNICODE_CONNECTOR_PUNCTUATION})|{UNICODE_ZWNJ}|{UNICODE_ZWJ})*
+IDENTIFIER ({IDENTIFIER_START}{IDENTIFIER_PART})*
+
+/* literals */
+/* according to https://ecma-international.org/ecma-262/5.1/#sec-7.8 */
+LITERAL_NULL null
+LITERAL_BOOLEAN true|false
+LITERAL_DECIMAL [.]?[0-9]+[\.]?[0-9]*[eE]?[0-9]*
+LITERAL_HEX_INTEGER 0x[0-9a-fA-F]*|0X[0-9a-fA-F]*
+LITERAL_DOUBLE_STRING_BEGIN \"
+LITERAL_SINGLE_STRING_BEGIN \'
+LITERAL_REGULAR_EXPRESSION \/[^*\/]
+/* extra literals */
+/* according to https://ecma-international.org/ecma-262/5.1/#sec-4.3 */
+LITERAL_UNDEFINED undefined
+LITERAL_INFINITY Infinity|\xE2\x88\x9E
+LITERAL_NAN NaN
+LITERAL {LITERAL_NULL}|{LITERAL_BOOLEAN}|{LITERAL_DECIMAL}|{LITERAL_HEX_INTEGER}|{LITERAL_UNDEFINED}|{LITERAL_INFINITY}|{LITERAL_NAN}
+
+HTML_COMMENT_OPEN <!--
+TAG_SCRIPT_OPEN (?i:<script)
+TAG_SCRIPT_CLOSE (?i:<\/script>)
+
+/* from 0x000 to 0x10FFFD to match undefined tokens */
+/* UTF-8 ranges generated with https://lists.gnu.org/archive/html/help-flex/2005-01/msg00043.html */
+ALL_UNICODE [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x80-\xBF])[\x80-\xBF]|\xF4\x8F\xBF[\x80-\xBD]|(\xF4\x8F[\x80-\xBE]|(\xF0[\x90-\xBF]|\xF4[\x80-\x8E]|[\xF1-\xF3][\x80-\xBF])[\x80-\xBF])[\x80-\xBF]
+
+/* match regex literal only if the previous token was of type PUNCTUATOR_3 or KEYWORD */
+/* this resolves an ambiguity with a division operator: var x = 2/2/1; */
+%x regex
+
+/* do not match division operators as punctuators if the previous token was of type PUNCTUATOR */
+/* this resolves an ambiguity with regular expression in some cases such as (/=abc=/g) */
+%x div_op
+
+%%
+<*>{WHITESPACES} { /* skip */ }
+<*>{CHAR_ESCAPE_SEQUENCES} { /* skip */ }
+<*>{LINE_TERMINATORS} { BEGIN(regex); }
+<*>{TAG_SCRIPT_OPEN} { if ( !eval(TAG_SCRIPT_OPEN, YYText()) ) { update_ptr(); return 1; } }
+<*>{TAG_SCRIPT_CLOSE} { update_ptr(); *ptr -= YYLeng(); return 0; }
+<*>{HTML_COMMENT_OPEN} { skip_single_line_comment(); }
+<*>{SINGLE_LINE_COMMENT} { skip_single_line_comment(); }
+<*>{MULTI_LINE_COMMENT} { skip_multi_line_comment(); }
+<*>{USE_STRICT_DIRECTIVE} { if ( !eval(DIRECTIVE, YYText()) ) { update_ptr(); return 1; } }
+<*>{KEYWORD} { if ( !eval(KEYWORD, YYText()) ) { update_ptr(); return 1; } BEGIN(regex); }
+<*>{CLOSING_BRACES} { if ( !eval(PUNCTUATOR, YYText()) ) { update_ptr(); return 1; } BEGIN(div_op); }
+<div_op>{DIV_OPERATOR}|{DIV_ASSIGNMENT_OPERATOR} { if ( !eval(PUNCTUATOR, YYText()) ) { update_ptr(); return 1; } }
+<*>{PUNCTUATOR} { if ( !eval(PUNCTUATOR, YYText()) ) { update_ptr(); return 1; } BEGIN(regex); }
+<*>{OPERATOR} { if ( !eval(OPERATOR, YYText()) ) { update_ptr(); return 1; } BEGIN(div_op); }
+<*>{LITERAL} { if ( !eval(LITERAL, YYText()) ) { update_ptr(); return 1; } BEGIN(div_op); }
+<*>{LITERAL_DOUBLE_STRING_BEGIN} { if ( !eval_string_literal(YYText(), '"') ) { update_ptr(); return 1; } BEGIN(div_op); }
+<*>{LITERAL_SINGLE_STRING_BEGIN} { if ( !eval_string_literal(YYText(), '\'') ) { update_ptr(); return 1; } BEGIN(div_op); }
+<regex>{LITERAL_REGULAR_EXPRESSION} { if ( !eval_regex_literal(YYText()) ) { update_ptr(); return 1; } BEGIN(div_op); }
+<*>{IDENTIFIER} { if ( !eval_identifier(YYText()) ) { update_ptr(); return 1; } BEGIN(div_op); }
+<*>.|{ALL_UNICODE} { if ( !eval(UNDEFINED, YYText()) ) { update_ptr(); return 1; } }
+<<EOF>> { if ( eval_eof() ) { update_ptr(); return 0; } }
+%%
+
+#include <cassert>
+
+// static helper functions
+
+static std::string unicode_to_utf8(const unsigned int code)
+{
+ std::string res;
+
+ if ( code <= 0x7f )
+ res += (char)code;
+ else if ( code <= 0x7ff )
+ {
+ res += ( 0xc0 | (code >> 6) );
+ res += ( 0x80 | (code & 0x3f) );
+ }
+ else if ( code <= 0xffff )
+ {
+ res += ( 0xe0 | (code >> 12) );
+ res += ( 0x80 | ((code >> 6) & 0x3f) );
+ res += ( 0x80 | (code & 0x3f) );
+ }
+
+ return res;
+}
+
+static std::string unescape_unicode(const char* lexeme)
+{
+ assert(lexeme);
+
+ std::string lex = lexeme;
+ std::string res;
+
+ bool is_unescape = false;
+ bool is_unicode = false;
+ short digits_left = 4;
+ std::string unicode_str;
+
+ for ( const auto& ch : lex )
+ {
+ if ( ch == '\\' )
+ {
+ is_unescape = true;
+ continue;
+ }
+
+ if ( is_unescape )
+ {
+ if ( ch == 'u' )
+ {
+ is_unicode = true;
+ continue;
+ }
+ is_unescape = false;
+ }
+
+ if ( is_unicode )
+ {
+ unicode_str += ch;
+ if ( !(--digits_left) )
+ {
+ const unsigned int unicode = std::stoi(unicode_str, nullptr, 16);
+ res += unicode_to_utf8(unicode);
+
+ unicode_str = "";
+ digits_left = 4;
+ is_unicode = false;
+ }
+ continue;
+ }
+
+ res += ch;
+ }
+
+ return res;
+}
+
+// JSTokenizer members
+
+struct JSTokenizer::ScanBuffers
+{
+ YY_BUFFER_STATE initial = nullptr;
+ YY_BUFFER_STATE temporal = nullptr;
+};
+
+JSTokenizer::JSTokenizer(std::stringstream& in, std::stringstream& out, char* dstbuf,
+ uint16_t dstlen, const char** ptr, int* bytes_copied)
+ : yyFlexLexer(in, out),
+ dstbuf(dstbuf),
+ dstlen(dstlen),
+ ptr(ptr),
+ bytes_copied(bytes_copied)
+{
+ assert(bytes_copied);
+ init();
+}
+
+JSTokenizer::~JSTokenizer()
+{ delete buffers; }
+
+void JSTokenizer::init()
+{
+ buffers = new ScanBuffers;
+ *bytes_copied = 0;
+
+ // since regular expression may occur at the beginning of the input
+ BEGIN(regex);
+}
+
+void JSTokenizer::switch_to_temporal(const std::string& data)
+{
+ temporal.str(data);
+ buffers->initial = YY_CURRENT_BUFFER;
+ buffers->temporal = yy_create_buffer(temporal, data.size());
+ yy_switch_to_buffer(buffers->temporal);
+}
+
+void JSTokenizer::switch_to_initial()
+{
+ yy_delete_buffer(buffers->temporal);
+ yy_switch_to_buffer(buffers->initial);
+ buffers->temporal = nullptr;
+}
+
+bool JSTokenizer::eval_identifier(const char* lexeme)
+{
+ // If an identifier has escaped Unicode, unescape and match again
+ // in a temporal scan buffer
+ if ( strstr(lexeme, "\\u") )
+ {
+ const std::string unescaped_lex = unescape_unicode(lexeme);
+ switch_to_temporal(unescaped_lex);
+ return true;
+ }
+
+ return eval(IDENTIFIER, lexeme);
+}
+
+bool JSTokenizer::eval_string_literal(const char* match_prefix, const char quotes)
+{
+ std::string s;
+ bool is_ok = parse_literal(match_prefix, quotes, s);
+
+ return eval(is_ok ? LITERAL : UNDEFINED, s.c_str());
+}
+
+bool JSTokenizer::eval_regex_literal(const char* match_prefix)
+{
+ static const std::string regex_flags = "gimsuy";
+
+ std::string s;
+ bool is_ok = parse_literal(match_prefix, '/', s, true);
+
+ // append regex flags
+ char c;
+ while ( (c = yyinput()) != 0 )
+ {
+ if ( regex_flags.find(c) != std::string::npos )
+ s += c;
+ else
+ {
+ unput(c);
+ break;
+ }
+ }
+
+ return eval(is_ok ? LITERAL : UNDEFINED, s.c_str());
+}
+
+// A return value of this method uses to terminate the scanner
+// true - terminate, false - continue scanning
+// Use this method only in <<EOF>> handler
+// The return value should be used to make a decision about yyterminate() call
+bool JSTokenizer::eval_eof()
+{
+ // If the temporal scan buffer reaches EOF, cleanup and
+ // continue with the initial one
+ if ( buffers->temporal )
+ {
+ switch_to_initial();
+ return false;
+ }
+
+ // Normal termination
+ return true;
+}
+
+void JSTokenizer::skip_single_line_comment()
+{
+ char c;
+
+ while ( (c = yyinput()) != 0 )
+ {
+ if ( c == '\n' )
+ break;
+ }
+}
+
+void JSTokenizer::skip_multi_line_comment()
+{
+ char c;
+
+ while ( (c = yyinput()) != 0 )
+ {
+ if ( c == '*' )
+ {
+ if ( (c = yyinput()) == '/' )
+ break;
+ else
+ unput(c);
+ }
+ }
+}
+
+// Unicode line terminators
+#define LS "\u2028"
+#define PS "\u2029"
+
+// This method delineates and validates literals from the input stream such as:
+// 1. double quotes string literal
+// 2. single quotes string literal
+// 3. regex literal
+// Call this method when lexer meets those literals
+// match_prefix is a lexeme part already matched by the lexer (with sentinel char)
+bool JSTokenizer::parse_literal(const std::string& match_prefix, const char sentinel_ch,
+ std::string& result, bool is_regex)
+{
+ bool is_ok = true;
+ char c;
+ short n = 0;
+
+ for ( auto it = match_prefix.crbegin(); it != match_prefix.crend(); ++it )
+ unput(*it);
+
+ result += yyinput();
+ while ( (c = yyinput()) != 0 )
+ {
+ result += c;
+
+ if ( c == sentinel_ch and !( n % 2 ) )
+ break;
+ else if ( c == '\\' )
+ {
+ ++n;
+ continue;
+ }
+ else if ( c == '\r' )
+ {
+ if ( is_regex )
+ {
+ is_ok = false;
+ result = result.substr(0, result.size() - n);
+ }
+ else if ( n == 0 )
+ is_ok = false;
+ else if ( ( (c = yyinput()) != 0 ) and c == '\n' )
+ {
+ result = result.substr(0, result.size() - 2);
+ continue;
+ }
+ else
+ {
+ is_ok = false;
+ unput(c);
+ }
+
+ break;
+ }
+ else if ( c == '\n' )
+ {
+ if ( is_regex )
+ {
+ is_ok = false;
+ result = result.substr(0, result.size() - n);
+ }
+ else if ( n == 0 )
+ is_ok = false;
+ else
+ {
+ result = result.substr(0, result.size() - 2);
+ continue;
+ }
+
+ break;
+ }
+
+ n = 0;
+ }
+
+ if ( !is_ok )
+ {
+ result.back() = sentinel_ch;
+ return is_ok;
+ }
+
+ if ( result.find(LS) != std::string::npos or result.find(PS) != std::string::npos )
+ is_ok = false;
+
+ return is_ok;
+}
+
+bool JSTokenizer::eval(const JSToken tok, const char* lexeme)
+{
+ bool ret = false;
+
+ switch( tok )
+ {
+ case IDENTIFIER:
+ ret = normalize_identifier(prev_tok, lexeme);
+ break;
+
+ case KEYWORD:
+ ret = normalize_lexeme(prev_tok, lexeme);
+ break;
+
+ case PUNCTUATOR:
+ ret = normalize_punctuator(prev_tok, lexeme);
+ break;
+
+ case OPERATOR:
+ ret = normalize_operator(prev_tok, lexeme);
+ break;
+
+ case LITERAL:
+ ret = normalize_lexeme(prev_tok, lexeme);
+ break;
+
+ case DIRECTIVE:
+ ret = normalize_directive(prev_tok, lexeme);
+ break;
+
+ case TAG_SCRIPT_OPEN:
+ ret = normalize_tag_script_open(prev_tok, lexeme);
+ break;
+
+ case UNDEFINED:
+ ret = normalize_undefined(prev_tok, lexeme);
+ break;
+ }
+
+ prev_tok = tok;
+
+ // set a default pattern match start condition
+ if ( yy_start != INITIAL )
+ BEGIN(INITIAL);
+
+ return ret;
+}
+
+bool JSTokenizer::normalize_identifier(const JSToken prev_tok, const char* lexeme)
+{
+ return normalize_lexeme(prev_tok, lexeme);
+}
+
+bool JSTokenizer::normalize_punctuator(const JSToken, const char* lexeme)
+{
+ return write_output(lexeme);
+}
+
+bool JSTokenizer::normalize_operator(const JSToken prev_tok, const char* lexeme)
+{
+ switch( prev_tok )
+ {
+ case IDENTIFIER:
+ case KEYWORD:
+ case PUNCTUATOR:
+ case LITERAL:
+ case DIRECTIVE:
+ case TAG_SCRIPT_OPEN:
+ case UNDEFINED:
+ return write_output(lexeme);
+ break;
+
+ case OPERATOR:
+ return write_output(" " + std::string(lexeme));
+ break;
+ }
+
+ return false;
+}
+
+bool JSTokenizer::normalize_directive(const JSToken prev_tok, const char* lexeme)
+{
+ std::string str = lexeme;
+
+ if ( str.rfind(";") == std::string::npos )
+ str += ";";
+
+ return normalize_lexeme(prev_tok, str.c_str());
+}
+
+bool JSTokenizer::normalize_tag_script_open(const JSToken, const char* lexeme)
+{
+ // FIXIT-L add builtin alert here
+ return write_output(lexeme);
+}
+
+bool JSTokenizer::normalize_undefined(const JSToken, const char* lexeme)
+{ return write_output(lexeme); }
+
+bool JSTokenizer::normalize_lexeme(const JSToken prev_tok, const char* lexeme)
+{
+ switch( prev_tok )
+ {
+ case PUNCTUATOR:
+ case OPERATOR:
+ case DIRECTIVE:
+ case UNDEFINED:
+ return write_output(lexeme);
+ break;
+
+ case IDENTIFIER:
+ case KEYWORD:
+ case LITERAL:
+ case TAG_SCRIPT_OPEN:
+ return write_output(" " + std::string(lexeme));
+ break;
+ }
+
+ return false;
+}
+
+bool JSTokenizer::write_output(const std::string& str)
+{
+ size_t len = str.size();
+ int new_size = *bytes_copied + len;
+
+ if ( new_size >= 0 and new_size <= dstlen )
+ memcpy((char*) dstbuf, (const char*)str.c_str(), len);
+ else
+ return false;
+
+ dstbuf += len;
+ *bytes_copied = new_size;
+ return true;
+}
+
+void JSTokenizer::update_ptr()
+{ *ptr += yyin.tellg(); }
+
--- /dev/null
+//--------------------------------------------------------------------------
+// Copyright (C) 2021-2021 Cisco and/or its affiliates. All rights reserved.
+//
+// This program is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License Version 2 as published
+// by the Free Software Foundation. You may not use, modify or distribute
+// this program under any other version of the GNU General Public License.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this program; if not, write to the Free Software Foundation, Inc.,
+// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+//--------------------------------------------------------------------------
+// js_normalizer_test.cc author Oleksandr Serhiienko <oserhiie@cisco.com>
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "catch/catch.hpp"
+
+#include <cstring>
+
+#include "utils/js_normalizer.h"
+
+namespace snort
+{
+// Mock for JSTokenizer
+[[noreturn]] void FatalError(const char*, ...)
+{ exit(EXIT_FAILURE); }
+}
+
+using namespace snort;
+
+#define NORM_DEPTH 65535
+
+#define NORMALIZE(srcbuf, expected) \
+ char dstbuf[sizeof(expected)]; \
+ int bytes_copied; \
+ const char* ptr = srcbuf; \
+ int norm_depth = NORM_DEPTH; \
+ int ret = JSNormalizer::normalize(srcbuf, sizeof(srcbuf), \
+ dstbuf, sizeof(dstbuf), &ptr, &bytes_copied, norm_depth);
+
+#define VALIDATE(srcbuf, expected) \
+ CHECK(ret == 0); \
+ CHECK((ptr - srcbuf) == sizeof(srcbuf)); \
+ CHECK(bytes_copied == sizeof(expected) - 1); \
+ CHECK(!memcmp(dstbuf, expected, bytes_copied));
+
+// ClamAV test cases
+static const char clamav_buf0[] =
+ "function foo(a, b) {\n"
+ "var x = 1.9e2*2*a/ 4.;\n"
+ "var y = 'test\\'tst';//var\n"
+ "x=b[5],/* multiline\nvar z=6;\nsome*some/other**/"
+ "z=x/y;/* multiline oneline */var t=z/a;\n"
+ "z=[test,testi];"
+ "document.writeln('something\\n');}";
+
+static const char clamav_expected0[] =
+ "function foo(a,b){var x=1.9e2*2*a/4.;var y='test\\'tst';x=b[5],z=x/y;var t=z/a;"
+ "z=[test,testi];document.writeln('something\\n');}";
+
+static const char clamav_buf1[] =
+ "function () { var id\\u1234tx;}";
+
+static const char clamav_expected1[] =
+ "function(){var id\u1234tx;}";
+
+static const char clamav_buf2[] =
+ "function () { var tst=\"a\"+'bc'+ 'd'; }";
+
+static const char clamav_expected2[] =
+ "function(){var tst=\"a\"+'bc'+'d';}";
+
+static const char clamav_buf3[] =
+ "dF('bmfsu%2639%2638x11u%2638%263%3A%264C1');";
+
+static const char clamav_expected3[] =
+ "dF('bmfsu%2639%2638x11u%2638%263%3A%264C1');";
+
+#define B64 "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"
+
+static char clamav_buf4[] =
+ "qbphzrag.jevgr(harfpncr('%3P%73%63%72%69%70%74%20%6P%61%6R%67%75%61%67%65%3Q%22%6N%61%76%61"
+ "%73%63%72%69%70%74%22%3R%66%75%6R%63%74%69%6S%6R%20%64%46%28%73%29%7O%76%61%72%20%73%31"
+ "%3Q%75%6R%65%73%63%61%70%65%28%73%2R%73%75%62%73%74%72%28%30%2P%73%2R%6P%65%6R%67%74%68"
+ "%2Q%31%29%29%3O%20%76%61%72%20%74%3Q%27%27%3O%66%6S%72%28%69%3Q%30%3O%69%3P%73%31%2R%6P"
+ "%65%6R%67%74%68%3O%69%2O%2O%29%74%2O%3Q%53%74%72%69%6R%67%2R%66%72%6S%6Q%43%68%61%72%43"
+ "%6S%64%65%28%73%31%2R%63%68%61%72%43%6S%64%65%41%74%28%69%29%2Q%73%2R%73%75%62%73%74%72"
+ "%28%73%2R%6P%65%6R%67%74%68%2Q%31%2P%31%29%29%3O%64%6S%63%75%6Q%65%6R%74%2R%77%72%69%74"
+ "%65%28%75%6R%65%73%63%61%70%65%28%74%29%29%3O%7Q%3P%2S%73%63%72%69%70%74%3R'));"
+ "riny(qS('tV%285%3O%285%3Nsdwjl%28585%3N7%28586Q%28585%3N7%3P%7P55l%28585%3N7%3P%28585%3N7"
+ "%28586R%28585%3N8T5%285%3N%285%3P%286R3'));";
+
+static char clamav_expected4[] =
+ "qbphzrag.jevgr(harfpncr('%3P%73%63%72%69%70%74%20%6P%61%6R%67%75%61%67%65%3Q%22%6N%61%76%61"
+ "%73%63%72%69%70%74%22%3R%66%75%6R%63%74%69%6S%6R%20%64%46%28%73%29%7O%76%61%72%20%73%31"
+ "%3Q%75%6R%65%73%63%61%70%65%28%73%2R%73%75%62%73%74%72%28%30%2P%73%2R%6P%65%6R%67%74%68"
+ "%2Q%31%29%29%3O%20%76%61%72%20%74%3Q%27%27%3O%66%6S%72%28%69%3Q%30%3O%69%3P%73%31%2R%6P"
+ "%65%6R%67%74%68%3O%69%2O%2O%29%74%2O%3Q%53%74%72%69%6R%67%2R%66%72%6S%6Q%43%68%61%72%43"
+ "%6S%64%65%28%73%31%2R%63%68%61%72%43%6S%64%65%41%74%28%69%29%2Q%73%2R%73%75%62%73%74%72"
+ "%28%73%2R%6P%65%6R%67%74%68%2Q%31%2P%31%29%29%3O%64%6S%63%75%6Q%65%6R%74%2R%77%72%69%74"
+ "%65%28%75%6R%65%73%63%61%70%65%28%74%29%29%3O%7Q%3P%2S%73%63%72%69%70%74%3R'));"
+ "riny(qS('tV%285%3O%285%3Nsdwjl%28585%3N7%28586Q%28585%3N7%3P%7P55l%28585%3N7%3P%28585%3N7"
+ "%28586R%28585%3N8T5%285%3N%285%3P%286R3'));";
+
+static char clamav_buf5[] =
+ "shapgvba (c,n,p,x,r,e){}('0(\\'1\\');',2,2,'nyreg|j00g'.fcyvg('|'),0,{});";
+
+static const char clamav_expected5[] =
+ "shapgvba(c,n,p,x,r,e){}('0(\\'1\\');',2,2,'nyreg|j00g'.fcyvg('|'),0,{});";
+
+static const char clamav_buf6[] =
+ "function $(p,a,c,k,e,d){} something(); $('0(\\'1\\');',2,2,'alert|w00t'.split('|'),0,{});";
+
+static const char clamav_expected6[] =
+ "function $(p,a,c,k,e,d){}something();$('0(\\'1\\');',2,2,'alert|w00t'.split('|'),0,{});";
+
+static const char clamav_buf7[] =
+ "var z=\"tst" B64 "tst\";";
+
+static const char clamav_expected7[] =
+ "var z=\"tst" B64 "tst\";";
+
+static const char clamav_buf8[] =
+ "var z=\'tst" B64 "tst\';";
+
+static const char clamav_expected8[] =
+ "var z=\'tst" B64 "tst\';";
+
+static char clamav_buf9[] =
+ "riny(harfpncr('%61%6p%65%72%74%28%27%74%65%73%74%27%29%3o'));";
+
+static const char clamav_expected9[] =
+ "riny(harfpncr('%61%6p%65%72%74%28%27%74%65%73%74%27%29%3o'));";
+
+static const char clamav_buf10[] =
+ "function $ $() dF(x); function (p,a,c,k,e,r){function $(){}";
+
+static const char clamav_expected10[] =
+ "function $ $()dF(x);function(p,a,c,k,e,r){function $(){}";
+
+static const char clamav_buf11[] =
+ "var x=123456789 ;";
+
+static const char clamav_expected11[] =
+ "var x=123456789;";
+
+static const char clamav_buf12[] =
+ "var x='test\\u0000test';";
+
+static const char clamav_expected12[] =
+ "var x='test\\u0000test';";
+
+static const char clamav_buf13[] =
+ "var x\\s12345";
+
+static const char clamav_expected13[] =
+ "var x\\s12345";
+
+static const char clamav_buf14[] =
+ "document.write(unescape('test%20test";
+
+static const char clamav_expected14[] =
+ "document.write(unescape('test%20test";
+
+TEST_CASE("clamav tests", "[JSNormalizer]")
+{
+ SECTION("test_case_0")
+ {
+ NORMALIZE(clamav_buf0, clamav_expected0);
+ VALIDATE(clamav_buf0, clamav_expected0);
+ }
+ SECTION("test_case_1")
+ {
+ NORMALIZE(clamav_buf1, clamav_expected1);
+ VALIDATE(clamav_buf1, clamav_expected1);
+ }
+ SECTION("test_case_2")
+ {
+ NORMALIZE(clamav_buf2, clamav_expected2);
+ VALIDATE(clamav_buf2, clamav_expected2);
+ }
+ SECTION("test_case_3")
+ {
+ NORMALIZE(clamav_buf3, clamav_expected3);
+ VALIDATE(clamav_buf3, clamav_expected3);
+ }
+ SECTION("test_case_4")
+ {
+ NORMALIZE(clamav_buf4, clamav_expected4);
+ VALIDATE(clamav_buf4, clamav_expected4);
+ }
+ SECTION("test_case_5")
+ {
+ NORMALIZE(clamav_buf5, clamav_expected5);
+ VALIDATE(clamav_buf5, clamav_expected5);
+ }
+ SECTION("test_case_6")
+ {
+ NORMALIZE(clamav_buf6, clamav_expected6);
+ VALIDATE(clamav_buf6, clamav_expected6);
+ }
+ SECTION("test_case_7")
+ {
+ NORMALIZE(clamav_buf7, clamav_expected7);
+ VALIDATE(clamav_buf7, clamav_expected7);
+ }
+ SECTION("test_case_8")
+ {
+ NORMALIZE(clamav_buf8, clamav_expected8);
+ VALIDATE(clamav_buf8, clamav_expected8);
+ }
+ SECTION("test_case_9")
+ {
+ NORMALIZE(clamav_buf9, clamav_expected9);
+ VALIDATE(clamav_buf9, clamav_expected9);
+ }
+ SECTION("test_case_10")
+ {
+ NORMALIZE(clamav_buf10, clamav_expected10);
+ VALIDATE(clamav_buf10, clamav_expected10);
+ }
+ SECTION("test_case_11")
+ {
+ NORMALIZE(clamav_buf11, clamav_expected11);
+ VALIDATE(clamav_buf11, clamav_expected11);
+ }
+ SECTION("test_case_12")
+ {
+ NORMALIZE(clamav_buf12, clamav_expected12);
+ VALIDATE(clamav_buf12, clamav_expected12);
+ }
+ SECTION("test_case_13")
+ {
+ NORMALIZE(clamav_buf13, clamav_expected13);
+ VALIDATE(clamav_buf13, clamav_expected13);
+ }
+ SECTION("test_case_14")
+ {
+ NORMALIZE(clamav_buf14, clamav_expected14);
+ VALIDATE(clamav_buf14, clamav_expected14);
+ }
+}
+
+// Test cases for all match patterns
+static const char all_patterns_buf0[] =
+ "var \x9\xB\xC\x20\xA0\x8\xA\xD\xEF\xBB\xBF\xE2\x80\xA8\xE2\x80\xA9\n"
+ " \n\t\r\v a; \0";
+
+static const char all_patterns_expected0[] =
+ "var a;";
+
+static const char all_patterns_buf1[] =
+ "<!-- var html_comment = 'comment' ;\n"
+ "var a = 1;// first var\nvar b = 2; /* second var\nvar foo = 'bar'\n*/"
+ "\nvar c = 3; // third var";
+
+static const char all_patterns_expected1[] =
+ "var a=1;var b=2;var c=3;";
+
+static const char all_patterns_buf2[] =
+ "{ a } ( a ) [ a ] a >= b a == b a != b a === b a !== b a /= b . ; , "
+ "a < b a > b a <= b a + b- c a * b a % b a ++; --b a << 2 a >> 3 a >>> 4 a & b a | b "
+ "a ^ b ! a a && b a || b ?: a = 2 a += 2 a -= 2 a *= 2 a %= 2 a <<= b a >>= b a >>>= b "
+ "a &= b a|= b a ^= b a/b ~ a";
+
+static const char all_patterns_expected2[] =
+ "{a}(a)[a]a>=b a==b a!=b a===b a!==b a/=b.;,a<b a>b a<=b a+b-c a*b "
+ "a%b a++;--b a<<2 a>>3 a>>>4 a&b a|b a^b!a a&&b a||b?:a=2 a+=2 a-=2 a*=2 a%=2 a<<=b "
+ "a>>=b a>>>=b a&=b a|=b a^=b a/b~a";
+
+static const char all_patterns_buf3[] =
+ "break case debugger in import protected do else function try "
+ "implements static instanceof new this class let typeof var with enum private catch "
+ "continue default extends public finally for if super yield return switch throw const "
+ "interface void while delete export package";
+
+static const char all_patterns_expected3[] =
+ "break case debugger in import protected do else function try "
+ "implements static instanceof new this class let typeof var with enum private catch "
+ "continue default extends public finally for if super yield return switch throw const "
+ "interface void while delete export package";
+
+static const char all_patterns_buf4[] =
+ "/regex/g undefined null true false 2 23 2.3 2.23 .2 .02 4. +2 -2 "
+ "+3.3 -3.3 +23 -32 2.3E45 3.E34 -2.3E45 -3.E34 +2.3E45 +3.E34 0x1234 0XFFFF Infinity "
+ "\xE2\x88\x9E NaN \"\" \"double string\" \"d\" '' 'single string' 's' x=/regex/gs "
+ "x=2/2/1";
+
+static const char all_patterns_expected4[] =
+ "/regex/g undefined null true false 2 23 2.3 2.23 .2 .02 4.+2-2"
+ "+3.3-3.3+23-32 2.3E45 3.E34-2.3E45-3.E34+2.3E45+3.E34 0x1234 0XFFFF Infinity "
+ "\xE2\x88\x9E NaN \"\" \"double string\" \"d\" '' 'single string' 's' x=/regex/gs "
+ "x=2/2/1";
+
+static const char all_patterns_buf5[] =
+ "$2abc _2abc abc $__$ 肖晗 XÆA12 \\u0041abc \\u00FBdef \\u1234ghi ab\xE2\x80\xA8ww "
+ "ab\xE2\x80\xA9ww ab\xEF\xBB\xBFww ab∞ww 2abc";
+
+static const char all_patterns_expected5[] =
+ "$2abc _2abc abc $__$ 肖晗 XÆA12 \u0041abc \u00FBdef \u1234ghi ab ww "
+ "ab ww ab ww ab ∞ ww 2 abc";
+
+static const char all_patterns_buf6[] =
+ "var a = 1;\n"
+ "<script>\n"
+ "<script var>\n"
+ "var b = 2 ;\n";
+
+static const char all_patterns_expected6[] =
+ "var a=1;<script><script var>var b=2;";
+
+TEST_CASE("all patterns", "[JSNormalizer]")
+{
+ SECTION("whitespaces and special characters")
+ {
+ NORMALIZE(all_patterns_buf0, all_patterns_expected0);
+ VALIDATE(all_patterns_buf0, all_patterns_expected0);
+ }
+ SECTION("comments")
+ {
+ NORMALIZE(all_patterns_buf1, all_patterns_expected1);
+ VALIDATE(all_patterns_buf1, all_patterns_expected1);
+ }
+ SECTION("directives")
+ {
+ const char srcbuf0[] = "'use strict'\nvar a = 1;";
+ const char srcbuf1[] = "\"use strict\"\nvar a = 1;";
+ const char srcbuf2[] = "'use strict';var a = 1;";
+ const char srcbuf3[] = "\"use strict\";var a = 1;";
+ const char srcbuf4[] = "var a = 1 'use strict';";
+ const char expected0[] = "'use strict';var a=1;";
+ const char expected1[] = "\"use strict\";var a=1;";
+ const char expected2[] = "var a=1 'use strict';";
+ char dstbuf0[sizeof(expected0)];
+ char dstbuf1[sizeof(expected1)];
+ char dstbuf2[sizeof(expected0)];
+ char dstbuf3[sizeof(expected1)];
+ char dstbuf4[sizeof(expected2)];
+ int bytes_copied0, bytes_copied1, bytes_copied2, bytes_copied3, bytes_copied4;
+ const char* ptr0 = srcbuf0;
+ const char* ptr1 = srcbuf1;
+ const char* ptr2 = srcbuf2;
+ const char* ptr3 = srcbuf3;
+ const char* ptr4 = srcbuf4;
+ int norm_depth = NORM_DEPTH;
+
+ int ret0 = JSNormalizer::normalize(srcbuf0, sizeof(srcbuf0), dstbuf0, sizeof(dstbuf0),
+ &ptr0, &bytes_copied0, norm_depth);
+ int ret1 = JSNormalizer::normalize(srcbuf1, sizeof(srcbuf1), dstbuf1, sizeof(dstbuf1),
+ &ptr1, &bytes_copied1, norm_depth);
+ int ret2 = JSNormalizer::normalize(srcbuf2, sizeof(srcbuf2), dstbuf2, sizeof(dstbuf2),
+ &ptr2, &bytes_copied2, norm_depth);
+ int ret3 = JSNormalizer::normalize(srcbuf3, sizeof(srcbuf3), dstbuf3, sizeof(dstbuf3),
+ &ptr3, &bytes_copied3, norm_depth);
+ int ret4 = JSNormalizer::normalize(srcbuf4, sizeof(srcbuf4), dstbuf4, sizeof(dstbuf4),
+ &ptr4, &bytes_copied4, norm_depth);
+
+ CHECK(ret0 == 0);
+ CHECK((ptr0 - srcbuf0) == sizeof(srcbuf0));
+ CHECK(bytes_copied0 == sizeof(expected0) - 1);
+ CHECK(!memcmp(dstbuf0, expected0, bytes_copied0));
+
+ CHECK(ret1 == 0);
+ CHECK((ptr1 - srcbuf1) == sizeof(srcbuf1));
+ CHECK(bytes_copied1 == sizeof(expected1) - 1);
+ CHECK(!memcmp(dstbuf1, expected1, bytes_copied1));
+
+ CHECK(ret2 == 0);
+ CHECK((ptr2 - srcbuf2) == sizeof(srcbuf2));
+ CHECK(bytes_copied2 == sizeof(expected0) - 1);
+ CHECK(!memcmp(dstbuf2, expected0, bytes_copied2));
+
+ CHECK(ret3 == 0);
+ CHECK((ptr3 - srcbuf3) == sizeof(srcbuf3));
+ CHECK(bytes_copied3 == sizeof(expected1) - 1);
+ CHECK(!memcmp(dstbuf3, expected1, bytes_copied3));
+
+ CHECK(ret4 == 0);
+ CHECK((ptr4 - srcbuf4) == sizeof(srcbuf4));
+ CHECK(bytes_copied4 == sizeof(expected2) - 1);
+ CHECK(!memcmp(dstbuf4, expected2, bytes_copied4));
+ }
+ SECTION("punctuators")
+ {
+ NORMALIZE(all_patterns_buf2, all_patterns_expected2);
+ VALIDATE(all_patterns_buf2, all_patterns_expected2);
+ }
+ SECTION("keywords")
+ {
+ NORMALIZE(all_patterns_buf3, all_patterns_expected3);
+ VALIDATE(all_patterns_buf3, all_patterns_expected3);
+ }
+ SECTION("literals")
+ {
+ NORMALIZE(all_patterns_buf4, all_patterns_expected4);
+ VALIDATE(all_patterns_buf4, all_patterns_expected4);
+ }
+ SECTION("identifiers")
+ {
+ NORMALIZE(all_patterns_buf5, all_patterns_expected5);
+ VALIDATE(all_patterns_buf5, all_patterns_expected5);
+ }
+ SECTION("tag script open")
+ {
+ NORMALIZE(all_patterns_buf6, all_patterns_expected6);
+ VALIDATE(all_patterns_buf6, all_patterns_expected6);
+ }
+}
+
+// Tests for different syntax cases
+static const char syntax_cases_buf0[] =
+ "var a;\n"
+ "var b = \"init this stuff\";\n"
+ "var c = \"Hi\" + \" \" + \"Joe\";\n"
+ "var d = 1 + 2 + \"3\";\n"
+ "var e = [ 2, 3, 5, 8 ];\n"
+ "var f = false;\n"
+ "var g = /( i'm a .* regex )/;\n"
+ "var h = function(){};\n"
+ "const PI = 3.14;\n"
+ "var a = 1, b = 2, c = a + b;\n"
+ "let z = 'zzz zz';\n"
+ "var g = null;\n"
+ "var name = { first: \"Jane\", last: \"Doe\" };\n"
+ "var esc = 'I don\\'t \\n know';\n";
+
+static const char syntax_cases_expected0[] =
+ "var a;var b=\"init this stuff\";var c=\"Hi\"+\" \"+\"Joe\";"
+ "var d=1+2+\"3\";var e=[2,3,5,8];var f=false;var g=/( i'm a .* regex )/;"
+ "var h=function(){};const PI=3.14;var a=1,b=2,c=a+b;let z='zzz zz';var g=null;"
+ "var name={first:\"Jane\",last:\"Doe\"};var esc='I don\\'t \\n know';";
+
+static const char syntax_cases_buf1[] =
+ "a = b + c - d;\n"
+ "a = b * (c / d);\n"
+ "x = 100 % 48;\n"
+ "a ++; b -- ; -- a; ++ b;\n";
+
+static const char syntax_cases_expected1[] =
+ "a=b+c-d;a=b*(c/d);x=100%48;a++;b--;--a;++b;";
+
+static const char syntax_cases_buf2[] =
+ "!(a == b);\n"
+ "a != b;\n"
+ "typeof a;\n"
+ "x << 2; x >> 3;\n"
+ "a = b;\n"
+ "a == b;\n"
+ "a != b;\n"
+ "a === b;\n"
+ "a !== b;\n"
+ "a < b; a > b;\n"
+ "a <= b; a >= b;\n"
+ "a += b;\n"
+ "a && b;\n"
+ "a || b;\n";
+
+static const char syntax_cases_expected2[] =
+ "!(a==b);a!=b;typeof a;x<<2;x>>3;a=b;a==b;a!=b;a===b;a!==b;a<b;a>b;"
+ "a<=b;a>=b;a+=b;a&&b;a||b;";
+
+static const char syntax_cases_buf3[] =
+ "var foo = {\n"
+ "firstFoo: \"FooFirst\",\n"
+ "secondFoo: \"FooSecond\",\n"
+ "thirdFoo: 10,\n"
+ "fourthFoo: 120,\n"
+ "methodFoo : function () {\n"
+ "\treturn this.firstFoo + \" \" + this.secondFoo;\n"
+ "}\n"
+ "};\n";
+
+static const char syntax_cases_expected3[] =
+ "var foo={firstFoo:\"FooFirst\",secondFoo:\"FooSecond\","
+ "thirdFoo:10,fourthFoo:120,methodFoo:function(){return this.firstFoo+\" \"+"
+ "this.secondFoo;}};";
+
+static const char syntax_cases_buf4[] =
+ "var dogs = [\"Bulldog\", \"Beagle\", \"Labrador\"];\n"
+ "var dogs = new Array(\"Bulldog\", \"Beagle\", \"Labrador\");\n"
+ "\t\t\t\n"
+ "alert( dogs[ 1 ] );\n"
+ "dogs[0] = \"Bull Terrier\";\n"
+ "\n"
+ "for (var i = 0; i < dogs.length; i++) {\n"
+ "console.log(dogs[i]);\n"
+ "}\n\r";
+
+static const char syntax_cases_expected4[] =
+ "var dogs=[\"Bulldog\",\"Beagle\",\"Labrador\"];"
+ "var dogs=new Array(\"Bulldog\",\"Beagle\",\"Labrador\");alert(dogs[1]);"
+ "dogs[0]=\"Bull Terrier\";for(var i=0;i<dogs.length;i++){console.log(dogs[i]);}";
+
+static const char syntax_cases_buf5[] =
+ "var i = 1;\n"
+ "while (i < 100) {\n"
+ "i *= 2;\n"
+ "document.write(i + \", \");\n"
+ "}\n"
+ "\n"
+ "i = 1;\n"
+ "do {\n"
+ "i *= 2;\n"
+ "document.write(i + \", \");\n"
+ "} while (i < 100)\n"
+ "\n"
+ "for (var i = 0; i < 10; i++) {\n"
+ "if (i == 5) { break; }\n"
+ "document.write(i + \", \");\n"
+ "}\n"
+ "\n"
+ "for (var i = 0; i < 10; i++) {\n"
+ "if (i == 5) { continue; }\n"
+ "document.write(i + \", \");\n"
+ "}\n\r";
+
+static const char syntax_cases_expected5[] =
+ "var i=1;while(i<100){i*=2;document.write(i+\", \");}i=1;do{i*=2;"
+ "document.write(i+\", \");}while(i<100)for(var i=0;i<10;i++){if(i==5){break;}"
+ "document.write(i+\", \");}for(var i=0;i<10;i++){if(i==5){continue;}"
+ "document.write(i+\", \");}";
+
+static const char syntax_cases_buf6[] =
+ "var n = 1800;\n"
+ "var res;\n"
+ "if ( (n >= 1400) && (n < 1900) ) {\n"
+ "res = \"In range.\";\n"
+ "} else {\n"
+ "res = \"Not in range.\";\n"
+ "}\n"
+ "\n"
+ "var text;\n"
+ "switch ( new Date().getDay() ) {\n"
+ "case 6:\n"
+ "text = \"Saturday\";\n"
+ "break;\n"
+ "case 0:\n"
+ "text = \"Sunday\";\n"
+ "break;\n"
+ "default:\n"
+ "text = \"Whatever\";\n"
+ "}\n\r";
+
+static const char syntax_cases_expected6[] =
+ "var n=1800;var res;if((n>=1400)&&(n<1900)){res=\"In range.\";}"
+ "else{res=\"Not in range.\";}var text;switch(new Date().getDay()){case 6:"
+ "text=\"Saturday\";break;case 0:text=\"Sunday\";break;default:text=\"Whatever\";}";
+
+static const char syntax_cases_buf7[] =
+ "var x = document.getElementById(\"mynum\").value;\n"
+ "try { \n"
+ "if(x == \"\") throw \"empty\";\n"
+ "if(isNaN(x)) throw \"not a number\";\n"
+ "x = Number(x);\n"
+ "if(x > 10) throw \"too high\";\n"
+ "}\n"
+ "catch(err) {\n"
+ "document.write(\"Input is \" + err);\n"
+ "console.error(err);\n"
+ "}\n"
+ "finally {\n"
+ "document.write(\"</br />Done\");\n"
+ "}\n\r";
+
+static const char syntax_cases_expected7[] =
+ "var x=document.getElementById(\"mynum\").value;try{if(x==\"\")"
+ "throw \"empty\";if(isNaN(x))throw \"not a number\";x=Number(x);if(x>10)"
+ "throw \"too high\";}catch(err){document.write(\"Input is \"+err);console.error(err);}"
+ "finally{document.write(\"</br />Done\");}";
+
+static const char syntax_cases_buf8[] =
+ "function sum (a, b) {\n"
+ "return new Promise(function (resolve, reject) {\n"
+ "setTimeout(function () {\n"
+ "if (typeof a !== \"number\" || typeof b !== \"number\") {\n"
+ "return reject(new TypeError(\"Inputs must be numbers\"));\n"
+ "}\n"
+ "resolve(a + b);\n"
+ "}, 1000);\n"
+ "});\n"
+ "}\n"
+ "\n"
+ "var myPromise = sum(10, 5);\n"
+ "myPromise.then(function (result) {\n"
+ "document.write(\" 10 + 5: \", result);\n"
+ "return sum(null, \"foo\");\n"
+ "}).then(function () {\n"
+ "}).catch(function (err) {\n"
+ "console.error(err);\n"
+ "});\n\r";
+
+static const char syntax_cases_expected8[] =
+ "function sum(a,b){return new Promise(function(resolve,reject)"
+ "{setTimeout(function(){if(typeof a!==\"number\"||typeof b!==\"number\"){return "
+ "reject(new TypeError(\"Inputs must be numbers\"));}resolve(a+b);},1000);});}"
+ "var myPromise=sum(10,5);myPromise.then(function(result){"
+ "document.write(\" 10 + 5: \",result);return sum(null,\"foo\");}).then(function(){})"
+ ".catch(function(err){console.error(err);});";
+
+static const char syntax_cases_buf9[] =
+ "var a = Math.round( (new Date).getTime()/1E3 );\n"
+ "var b = a.match( /^[0-9a-z-_.]{10,1200}$/i );\n"
+ "var c = a.match( /=\\s*{((.|\\s)*?)};/g ) ;\n\r";
+
+static const char syntax_cases_expected9[] =
+ "var a=Math.round((new Date).getTime()/1E3);"
+ "var b=a.match(/^[0-9a-z-_.]{10,1200}$/i);"
+ "var c=a.match(/=\\s*{((.|\\s)*?)};/g);";
+
+static const char syntax_cases_buf10[] =
+ "var a = 2\n/ab -cd/";
+
+static const char syntax_cases_expected10[] =
+ "var a=2 /ab -cd/";
+
+static const char syntax_cases_buf11[] =
+ "var d_str1 = \"\\\\ \" ; var d_str2 = \"abc\\\"def\" ;"
+ "var d_str3 = \"\\\"abc \" ;var s_str1 = '\\\\ ' ; var s_str2 = 'abc\\\'def' ; "
+ "var s_str3 = '\\\'abc ' ;var re_1 = /\\\\ / ; var re_2 = /abc\\/def/ ; "
+ "var re_3 = /\\/abc / ;";
+
+static const char syntax_cases_expected11[] =
+ "var d_str1=\"\\\\ \";var d_str2=\"abc\\\"def\";"
+ "var d_str3=\"\\\"abc \";var s_str1='\\\\ ';var s_str2='abc\\\'def';"
+ "var s_str3='\\\'abc ';var re_1=/\\\\ /;var re_2=/abc\\/def/;var re_3=/\\/abc /;";
+
+static const char syntax_cases_buf12[] =
+ "var str1 = \"abc\\\n def\" ;"
+ "var str2 = \"abc\\\r\n def\" ;"
+ "var str3 = 'abc\\\n def' ;"
+ "var str4 = 'abc\\\r\n def' ;";
+
+static const char syntax_cases_expected12[] =
+ "var str1=\"abc def\";"
+ "var str2=\"abc def\";"
+ "var str3='abc def';"
+ "var str4='abc def';";
+
+static const char syntax_cases_buf13[] =
+ "return /regex/i.test( str ) ;";
+
+static const char syntax_cases_expected13[] =
+ "return /regex/i.test(str);";
+
+static const char syntax_cases_buf14[] =
+ "var a = b+ ++c ;\n"
+ "var a = b++ +c ;\n"
+ "var a = b++ + ++c ;\n"
+ "var a = b- --c ;\n"
+ "var a = b-- -c ;\n"
+ "var a = b-- - --c ;\n"
+ "var a = b++ - ++c ;\n"
+ "var a = b * -c ;\n"
+ "var a = b % -c ;\n"
+ "var a = b + -c ;";
+
+static const char syntax_cases_expected14[] =
+ "var a=b+ ++c;"
+ "var a=b++ +c;"
+ "var a=b++ + ++c;"
+ "var a=b- --c;"
+ "var a=b-- -c;"
+ "var a=b-- - --c;"
+ "var a=b++ - ++c;"
+ "var a=b* -c;"
+ "var a=b% -c;"
+ "var a=b+ -c;";
+
+static const char syntax_cases_buf15[] =
+ "var str1 = 'abc\u2028 def' ;\n"
+ "var str2 = 'abc\u2029 def' ;\n\r";
+
+static const char syntax_cases_expected15[] =
+ "var str1='abc\u2028 def';"
+ "var str2='abc\u2029 def';";
+
+static const char syntax_cases_buf16[] =
+ "var invalid_str = \"abc\n def\"";
+
+static const char syntax_cases_expected16[] =
+ "var invalid_str=\"abc\"def \"";
+
+static const char syntax_cases_buf17[] =
+ "var invalid_str = 'abc\r def'";
+
+static const char syntax_cases_expected17[] =
+ "var invalid_str='abc'def '";
+
+static const char syntax_cases_buf18[] =
+ "var invalid_str = 'abc\\\n\r def'";
+
+static const char syntax_cases_expected18[] =
+ "var invalid_str='abc'def '";
+
+static const char syntax_cases_buf19[] =
+ "var invalid_re = /abc\\\n def/";
+
+static const char syntax_cases_expected19[] =
+ "var invalid_re=/abc/def/";
+
+static const char syntax_cases_buf20[] =
+ "var invalid_re = /abc\\\r\n def/";
+
+static const char syntax_cases_expected20[] =
+ "var invalid_re=/abc/def/";
+
+TEST_CASE("syntax cases", "[JSNormalizer]")
+{
+ SECTION("variables")
+ {
+ NORMALIZE(syntax_cases_buf0, syntax_cases_expected0);
+ VALIDATE(syntax_cases_buf0, syntax_cases_expected0);
+ }
+ SECTION("operators")
+ {
+ NORMALIZE(syntax_cases_buf1, syntax_cases_expected1);
+ VALIDATE(syntax_cases_buf1, syntax_cases_expected1);
+ }
+ SECTION("arithmetic and logical operators")
+ {
+ NORMALIZE(syntax_cases_buf2, syntax_cases_expected2);
+ VALIDATE(syntax_cases_buf2, syntax_cases_expected2);
+ }
+ SECTION("complex object")
+ {
+ NORMALIZE(syntax_cases_buf3, syntax_cases_expected3);
+ VALIDATE(syntax_cases_buf3, syntax_cases_expected3);
+ }
+ SECTION("arrays")
+ {
+ NORMALIZE(syntax_cases_buf4, syntax_cases_expected4);
+ VALIDATE(syntax_cases_buf4, syntax_cases_expected4);
+ }
+ SECTION("loops")
+ {
+ NORMALIZE(syntax_cases_buf5, syntax_cases_expected5);
+ VALIDATE(syntax_cases_buf5, syntax_cases_expected5);
+ }
+ SECTION("if-else and switch statements")
+ {
+ NORMALIZE(syntax_cases_buf6, syntax_cases_expected6);
+ VALIDATE(syntax_cases_buf6, syntax_cases_expected6);
+ }
+ SECTION("try-catch statements")
+ {
+ NORMALIZE(syntax_cases_buf7, syntax_cases_expected7);
+ VALIDATE(syntax_cases_buf7, syntax_cases_expected7);
+ }
+ SECTION("functions and promises")
+ {
+ NORMALIZE(syntax_cases_buf8, syntax_cases_expected8);
+ VALIDATE(syntax_cases_buf8, syntax_cases_expected8);
+ }
+ SECTION("regex-division ambiguity")
+ {
+ NORMALIZE(syntax_cases_buf9, syntax_cases_expected9);
+ VALIDATE(syntax_cases_buf9, syntax_cases_expected9);
+ }
+ SECTION("regex on a new line")
+ {
+ NORMALIZE(syntax_cases_buf10, syntax_cases_expected10);
+ VALIDATE(syntax_cases_buf10, syntax_cases_expected10);
+ }
+ SECTION("string and regex literals ambiguity with escaped sentinel chars")
+ {
+ NORMALIZE(syntax_cases_buf11, syntax_cases_expected11);
+ VALIDATE(syntax_cases_buf11, syntax_cases_expected11);
+ }
+ SECTION("escaped LF and CR chars in literals")
+ {
+ NORMALIZE(syntax_cases_buf12, syntax_cases_expected12);
+ VALIDATE(syntax_cases_buf12, syntax_cases_expected12);
+ }
+ SECTION("regex after keyword")
+ {
+ NORMALIZE(syntax_cases_buf13, syntax_cases_expected13);
+ VALIDATE(syntax_cases_buf13, syntax_cases_expected13);
+ }
+ SECTION("white space between '+'<-->'++' and '-'<-->'--'")
+ {
+ NORMALIZE(syntax_cases_buf14, syntax_cases_expected14);
+ VALIDATE(syntax_cases_buf14, syntax_cases_expected14);
+ }
+ SECTION("LS and PS chars within literal")
+ {
+ NORMALIZE(syntax_cases_buf15, syntax_cases_expected15);
+ VALIDATE(syntax_cases_buf15, syntax_cases_expected15);
+ }
+ SECTION("explicit LF within literal")
+ {
+ NORMALIZE(syntax_cases_buf16, syntax_cases_expected16);
+ VALIDATE(syntax_cases_buf16, syntax_cases_expected16);
+ }
+ SECTION("explicit CR within literal")
+ {
+ NORMALIZE(syntax_cases_buf17, syntax_cases_expected17);
+ VALIDATE(syntax_cases_buf17, syntax_cases_expected17);
+ }
+ SECTION("escaped LF-CR sequence within literal")
+ {
+ NORMALIZE(syntax_cases_buf18, syntax_cases_expected18);
+ VALIDATE(syntax_cases_buf18, syntax_cases_expected18);
+ }
+ SECTION("escaped LF within regex literal")
+ {
+ NORMALIZE(syntax_cases_buf19, syntax_cases_expected19);
+ VALIDATE(syntax_cases_buf19, syntax_cases_expected19);
+ }
+ SECTION("escaped CR-LF within regex literal")
+ {
+ NORMALIZE(syntax_cases_buf20, syntax_cases_expected20);
+ VALIDATE(syntax_cases_buf20, syntax_cases_expected20);
+ }
+}
+
+TEST_CASE("norm_depth is specified", "[JSNormalizer]")
+{
+ const char srcbuf[] = "var abc = 123;\n\r";
+ const char expected[] = "var abc";
+ char dstbuf[7];
+ int bytes_copied;
+ const char* ptr = srcbuf;
+ int norm_depth = 7;
+ int ret = JSNormalizer::normalize(srcbuf, sizeof(srcbuf), dstbuf, sizeof(dstbuf), &ptr,
+ &bytes_copied, norm_depth);
+
+ CHECK(ret == 0);
+ CHECK(bytes_copied == sizeof(expected) - 1);
+ CHECK(!memcmp(dstbuf, expected, bytes_copied));
+}
+
+TEST_CASE("tag script end is specified", "[JSNormalizer]")
+{
+ const char srcbuf[] =
+ "var a = 1 ;\n" // 12 bytes
+ "var b = 2 ;\n" // 12 bytes --> ptr_offset = 24
+ "</script>\n"
+ "var c = 3 ;\n";
+ const int ptr_offset = 24;
+ const char expected[] = "var a=1;var b=2;";
+ char dstbuf[sizeof(expected)];
+ int bytes_copied;
+ const char* ptr = srcbuf;
+ int norm_depth = NORM_DEPTH;
+ int ret = JSNormalizer::normalize(srcbuf, sizeof(srcbuf), dstbuf, sizeof(dstbuf), &ptr,
+ &bytes_copied, norm_depth);
+
+ CHECK(ret == 0);
+ CHECK(bytes_copied == sizeof(expected) - 1);
+ CHECK((ptr - srcbuf) == ptr_offset);
+ CHECK(!memcmp(dstbuf, expected, bytes_copied));
+}
+
+// Tests for JavaScript parsing errors and anomalies
+
+TEST_CASE("parsing errors", "[JSNormalizer]")
+{
+ SECTION("dstlen is too small")
+ {
+ const char srcbuf[] = "var abc = 123;\n\r";
+ const char expected[] = "var abc";
+ char dstbuf[7];
+ int bytes_copied;
+ const char* ptr = srcbuf;
+ int norm_depth = NORM_DEPTH;
+ int ret = JSNormalizer::normalize(srcbuf, sizeof(srcbuf), dstbuf, sizeof(dstbuf), &ptr,
+ &bytes_copied, norm_depth);
+
+ CHECK(ret == 1);
+ CHECK(bytes_copied == sizeof(expected) - 1);
+ CHECK(!memcmp(dstbuf, expected, bytes_copied));
+ }
+}
+