From: Bruno Haible Date: Tue, 20 May 2025 17:35:20 +0000 (+0200) Subject: xgettext: JavaScript: Parse RegExp literals according to ECMA-262 15th edition. X-Git-Tag: v0.26~147 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=2fb3d868b278e4014cef10c1d97ec1f85806e530;p=thirdparty%2Fgettext.git xgettext: JavaScript: Parse RegExp literals according to ECMA-262 15th edition. Reported by Christian Wasserthal in . * gettext-tools/src/x-javascript.c (phase5_scan_regexp): Report an error when end-of-line is reached. Parse character classes. Recognize more modifier flags. * gettext-tools/tests/xgettext-javascript-2: Add two more test cases. * NEWS: Mention the change. --- diff --git a/NEWS b/NEWS index d2a87986d..d44410482 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,10 @@ +Version 0.26 - July 2025 + +# Programming languages support: + * JavaScript: + - xgettext now parses regular expressions with character classes + correctly. + Version 0.25 - May 2025 # Programming languages support: diff --git a/gettext-tools/src/x-javascript.c b/gettext-tools/src/x-javascript.c index d2b17335c..f77e01f01 100644 --- a/gettext-tools/src/x-javascript.c +++ b/gettext-tools/src/x-javascript.c @@ -1075,37 +1075,79 @@ is_after_expression (void) } } +/* Scans a regular expression literal. + The leading '/' has already been consumed. + See ECMA-262 15th edition sections + - § 12.9.5 Regular Expression Literals + - § 22.2.3.3 RegExpInitialize */ static void phase5_scan_regexp (void) { + bool at_start; int c; /* Scan for end of RegExp literal ('/'). */ - for (;;) + for (at_start = true; ; at_start = false) { /* Must use phase2 as there can't be comments. */ c = phase2_getc (); + if (c == UEOF || c == '\n' || c == '\r' || c == 0x2028 || c == 0x2029) + goto unterminated; + if (at_start && c == '*') + { + if_error (IF_SEVERITY_WARNING, + logical_file_name, line_number, (size_t)(-1), false, + _("invalid RegExp literal")); + return; + } if (c == '/') break; if (c == '\\') { c = phase2_getc (); - if (c != UEOF) - continue; + if (c == UEOF || c == '\n' || c == '\r' || c == 0x2028 || c == 0x2029) + goto unterminated; } - if (c == UEOF) + else if (c == '[') { - if_error (IF_SEVERITY_WARNING, - logical_file_name, line_number, (size_t)(-1), false, - _("RegExp literal terminated too early")); - return; + for (;;) + { + c = phase2_getc (); + if (c == UEOF + || c == '\n' || c == '\r' || c == 0x2028 || c == 0x2029) + goto unterminated_in_class; + if (c == ']') + break; + if (c == '\\') + { + c = phase2_getc (); + if (c == UEOF + || c == '\n' || c == '\r' || c == 0x2028 || c == 0x2029) + goto unterminated_in_class; + } + } } } - /* Scan for modifier flags (ECMA-262 5th section 15.10.4.1). */ + /* Scan for modifier flags (ECMA-262 15th edition § 22.2.3.3). */ c = phase2_getc (); - if (!(c == 'g' || c == 'i' || c == 'm')) + if (!(c == 'd' || c == 'g' || c == 'i' || c == 'm' || c == 's' + || c == 'u' || c == 'v' || c == 'y')) phase2_ungetc (c); + + return; + + unterminated: + if_error (IF_SEVERITY_WARNING, + logical_file_name, line_number, (size_t)(-1), false, + _("RegExp literal terminated too early")); + return; + + unterminated_in_class: + if_error (IF_SEVERITY_WARNING, + logical_file_name, line_number, (size_t)(-1), false, + _("RegExp literal lacks a ']' to match the '['")); + return; } /* Various syntactic constructs can be nested: diff --git a/gettext-tools/tests/xgettext-javascript-2 b/gettext-tools/tests/xgettext-javascript-2 index d3d642814..806c74642 100755 --- a/gettext-tools/tests/xgettext-javascript-2 +++ b/gettext-tools/tests/xgettext-javascript-2 @@ -28,6 +28,8 @@ else var s7 = /a\/\f\r\n\t\v\0\b\s\S\w\W\d\D\b\Bb/.test(_("RegExp test string #14")); var s8 = /(?=(a+))a*b\1/.exec(_("RegExp test string #15")); var s9 = /_("a+")/.exec(_("RegExp test string #16")); +var s10 = /[abc/def]/.exec(_("RegExp test string #17")); +var s11 = /[abc/`def]/.exec(_("RegExp test string #18")); EOF : ${XGETTEXT=xgettext} @@ -101,6 +103,12 @@ msgstr "" msgid "RegExp test string #16" msgstr "" + +msgid "RegExp test string #17" +msgstr "" + +msgid "RegExp test string #18" +msgstr "" EOF : ${DIFF=diff}