From: Bruno Haible Date: Fri, 16 Aug 2024 23:29:02 +0000 (+0200) Subject: xgettext: Scheme: Recognize hexadecimal escape sequences in string literals. X-Git-Tag: v0.23~157 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=8054e9260130fe74457bb0498432ba43f0e6e692;p=thirdparty%2Fgettext.git xgettext: Scheme: Recognize hexadecimal escape sequences in string literals. * gettext-tools/src/x-scheme.c (accumulate_escaped): Recognize \x, \u, \U escape sequences. * gettext-tools/tests/xgettext-scheme-7: Test also some \x escape sequences. * gettext-tools/tests/xgettext-scheme-8: Test also some \x, \u, \U escape sequences. --- diff --git a/gettext-tools/src/x-scheme.c b/gettext-tools/src/x-scheme.c index e6c5a8793..2d7b3a146 100644 --- a/gettext-tools/src/x-scheme.c +++ b/gettext-tools/src/x-scheme.c @@ -692,6 +692,166 @@ accumulate_escaped (struct mixed_string_buffer *literal) } continue; } + if (follow_guile && (c == 'x' || c == 'u' || c == 'U')) + { + /* In Guile, \x must be followed by exactly 2 hexadecimal digits, + \u must be followed by exactly 4 hexadecimal digits, and + \U must be followed by exactly 6 hexadecimal digits, producing + a value < 0x110000. See + . */ + int first = c; + unsigned int count = (c == 'x' ? 2 : c == 'u' ? 4 : 6); + unsigned int n_limit = + (c == 'x' ? 0x100 : c == 'u' ? 0x10000 : 0x110000); + c = phase1_getc (); + switch (c) + { + default: + phase1_ungetc (c); + phase1_ungetc (first); + /* Invalid input. Be tolerant, no error message. */ + mixed_string_buffer_append_char (literal, '\\'); + continue; + + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + break; + } + { + unsigned int n; + bool overflow; + + n = 0; + overflow = false; + + for (;;) + { + switch (c) + { + default: + phase1_ungetc (c); + goto guile_hex_escape_done; + + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + if (n < n_limit / 16) + n = n * 16 + c - '0'; + else + overflow = true; + break; + + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + if (n < n_limit / 16) + n = n * 16 + 10 + c - 'A'; + else + overflow = true; + break; + + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + if (n < n_limit / 16) + n = n * 16 + 10 + c - 'a'; + else + overflow = true; + break; + } + if (--count == 0) + goto guile_hex_escape_done; + c = phase1_getc (); + } + + guile_hex_escape_done: + if (count > 0) + { + if_error (IF_SEVERITY_WARNING, + logical_file_name, line_number, (size_t)(-1), false, + _("hexadecimal escape sequence with too few digits")); + n = 0xFFFD; + } + else if (overflow) + { + if_error (IF_SEVERITY_WARNING, + logical_file_name, line_number, (size_t)(-1), false, + _("hexadecimal escape sequence out of range")); + n = 0xFFFD; + } + mixed_string_buffer_append_unicode (literal, n); + } + continue; + } + if (!follow_guile && c == 'x') + { + /* In R6RS mode, \x must be followed by one or more hexadecimal + digits and then a semicolon. See + R6RS § 4.2.7, R7RS § 6.7 and § 7.1.1. */ + unsigned int const n_limit = 0x110000; + unsigned int count; + unsigned int n; + bool overflow; + + count = 0; + n = 0; + overflow = false; + + for (;;) + { + c = phase1_getc (); + switch (c) + { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + if (n < n_limit / 16) + n = n * 16 + c - '0'; + else + overflow = true; + break; + + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + if (n < n_limit / 16) + n = n * 16 + 10 + c - 'A'; + else + overflow = true; + break; + + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + if (n < n_limit / 16) + n = n * 16 + 10 + c - 'a'; + else + overflow = true; + break; + + case ';': + if (count == 0) + { + if_error (IF_SEVERITY_WARNING, + logical_file_name, line_number, (size_t)(-1), false, + _("hexadecimal escape sequence with no digits")); + n = 0xFFFD; + } + else if (overflow) + { + if_error (IF_SEVERITY_WARNING, + logical_file_name, line_number, (size_t)(-1), false, + _("hexadecimal escape sequence out of range")); + n = 0xFFFD; + } + goto r6rs_hex_escape_done; + + default: + if_error (IF_SEVERITY_WARNING, + logical_file_name, line_number, (size_t)(-1), false, + _("hexadecimal escape sequence not terminated with a semicolon")); + n = '\\'; + goto r6rs_hex_escape_done; + } + count++; + } + + r6rs_hex_escape_done: + mixed_string_buffer_append_unicode (literal, n); + continue; + } switch (c) { case '0': diff --git a/gettext-tools/tests/xgettext-scheme-7 b/gettext-tools/tests/xgettext-scheme-7 index bc0f26377..f066fe560 100755 --- a/gettext-tools/tests/xgettext-scheme-7 +++ b/gettext-tools/tests/xgettext-scheme-7 @@ -8,12 +8,34 @@ cat <<\EOF > xg-sc-7.scm drobwllllantysiliogogogoch")) (display (gettext "The hotel has a \ pretty garden.")) +(display (gettext "abc\xa;def\x3BB;ghi\x20AC;jkl\x0001F603;mno")) EOF : ${XGETTEXT=xgettext} -${XGETTEXT} -L Scheme --omit-header -d xg-sc-7 xg-sc-7.scm || Exit 1 +${XGETTEXT} -L Scheme -o xg-sc-7.tmp xg-sc-7.scm || Exit 1 +LC_ALL=C tr -d '\r' < xg-sc-7.tmp > xg-sc-7.po || Exit 1 + +sed -e '/POT-Creation-Date/d' < xg-sc-7.po > xg-sc-7.pot cat <<\EOF > xg-sc-7.ok +# SOME DESCRIPTIVE TITLE. +# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER +# This file is distributed under the same license as the PACKAGE package. +# FIRST AUTHOR , YEAR. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PACKAGE VERSION\n" +"Report-Msgid-Bugs-To: \n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language-Team: LANGUAGE \n" +"Language: \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" + #: xg-sc-7.scm:1 msgid "Llanfairpwllgwyngyllgogerychwyrndrobwllllantysiliogogogoch" msgstr "" @@ -21,10 +43,16 @@ msgstr "" #: xg-sc-7.scm:3 msgid "The hotel has a pretty garden." msgstr "" + +#: xg-sc-7.scm:5 +msgid "" +"abc\n" +"defλghi€jkl😃mno" +msgstr "" EOF : ${DIFF=diff} -${DIFF} xg-sc-7.ok xg-sc-7.po +${DIFF} xg-sc-7.ok xg-sc-7.pot result=$? exit $result diff --git a/gettext-tools/tests/xgettext-scheme-8 b/gettext-tools/tests/xgettext-scheme-8 index 94dbbfdac..0e3191c83 100755 --- a/gettext-tools/tests/xgettext-scheme-8 +++ b/gettext-tools/tests/xgettext-scheme-8 @@ -8,17 +8,41 @@ cat <<\EOF > xg-sc-8.scm drobwllllantysiliogogogoch")) (display (gettext "The hotel has a \ pretty garden.")) +(display (gettext "abc\xa;def\x3BB;ghi\x20AC;jkl\x0001F603;mno")) +(display (gettext "Abc\x0adef\u03BBghi\u20ACjkl\U01F603mno")) #!r6rs (display (gettext "Llanfairpwllgwyngyllgogerychwyrn\ drobwllllantysiliogogogoch")) (display (gettext "The hotel has a \ pretty garden.")) +(display (gettext "abc\xa;def\x3BB;ghi\x20AC;jkl\x0001F603;mno")) EOF : ${XGETTEXT=xgettext} -${XGETTEXT} -L Guile --omit-header -d xg-sc-8 xg-sc-8.scm || Exit 1 +${XGETTEXT} -L Guile -o xg-sc-8.tmp xg-sc-8.scm || Exit 1 +LC_ALL=C tr -d '\r' < xg-sc-8.tmp > xg-sc-8.po || Exit 1 + +sed -e '/POT-Creation-Date/d' < xg-sc-8.po > xg-sc-8.pot cat <<\EOF > xg-sc-8.ok +# SOME DESCRIPTIVE TITLE. +# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER +# This file is distributed under the same license as the PACKAGE package. +# FIRST AUTHOR , YEAR. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PACKAGE VERSION\n" +"Report-Msgid-Bugs-To: \n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language-Team: LANGUAGE \n" +"Language: \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" + #: xg-sc-8.scm:1 msgid "" "Llanfairpwllgwyngyllgogerychwyrn drobwllllantysiliogogogoch" @@ -28,17 +52,33 @@ msgstr "" msgid "The hotel has a pretty garden." msgstr "" +#: xg-sc-8.scm:5 +msgid "abc�;def;B;ghi AC;jkl" +msgstr "" + #: xg-sc-8.scm:6 -msgid "Llanfairpwllgwyngyllgogerychwyrndrobwllllantysiliogogogoch" +msgid "" +"Abc\n" +"defλghi€jkl😃mno" msgstr "" #: xg-sc-8.scm:8 +msgid "Llanfairpwllgwyngyllgogerychwyrndrobwllllantysiliogogogoch" +msgstr "" + +#: xg-sc-8.scm:10 msgid "The hotel has a pretty garden." msgstr "" + +#: xg-sc-8.scm:12 +msgid "" +"abc\n" +"defλghi€jkl😃mno" +msgstr "" EOF : ${DIFF=diff} -${DIFF} xg-sc-8.ok xg-sc-8.po +${DIFF} xg-sc-8.ok xg-sc-8.pot result=$? exit $result