]> git.ipfire.org Git - thirdparty/gettext.git/commitdiff
xgettext: Scheme: Recognize hexadecimal escape sequences in string literals.
authorBruno Haible <bruno@clisp.org>
Fri, 16 Aug 2024 23:29:02 +0000 (01:29 +0200)
committerBruno Haible <bruno@clisp.org>
Fri, 16 Aug 2024 23:29:02 +0000 (01:29 +0200)
* gettext-tools/src/x-scheme.c (accumulate_escaped): Recognize \x, \u, \U escape
sequences.
* gettext-tools/tests/xgettext-scheme-7: Test also some \x escape sequences.
* gettext-tools/tests/xgettext-scheme-8: Test also some \x, \u, \U escape
sequences.

gettext-tools/src/x-scheme.c
gettext-tools/tests/xgettext-scheme-7
gettext-tools/tests/xgettext-scheme-8

index e6c5a8793a3d03e79c7a56941ab7b88a6e18eb1f..2d7b3a1460509065ed714912ae06f54c640e7727 100644 (file)
@@ -692,6 +692,166 @@ accumulate_escaped (struct mixed_string_buffer *literal)
                 }
               continue;
             }
+          if (follow_guile && (c == 'x' || c == 'u' || c == 'U'))
+            {
+              /* In Guile, \x must be followed by exactly 2 hexadecimal digits,
+                 \u must be followed by exactly 4 hexadecimal digits, and
+                 \U must be followed by exactly 6 hexadecimal digits, producing
+                 a value < 0x110000.  See
+                 <https://www.gnu.org/software/guile/manual/html_node/String-Syntax.html>.  */
+              int first = c;
+              unsigned int count = (c == 'x' ? 2 : c == 'u' ? 4 : 6);
+              unsigned int n_limit =
+                (c == 'x' ? 0x100 : c == 'u' ? 0x10000 : 0x110000);
+              c = phase1_getc ();
+              switch (c)
+                {
+                default:
+                  phase1_ungetc (c);
+                  phase1_ungetc (first);
+                  /* Invalid input.  Be tolerant, no error message.  */
+                  mixed_string_buffer_append_char (literal, '\\');
+                  continue;
+
+                case '0': case '1': case '2': case '3': case '4':
+                case '5': case '6': case '7': case '8': case '9':
+                case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+                case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+                  break;
+                }
+              {
+                unsigned int n;
+                bool overflow;
+
+                n = 0;
+                overflow = false;
+
+                for (;;)
+                  {
+                    switch (c)
+                      {
+                      default:
+                        phase1_ungetc (c);
+                        goto guile_hex_escape_done;
+
+                      case '0': case '1': case '2': case '3': case '4':
+                      case '5': case '6': case '7': case '8': case '9':
+                        if (n < n_limit / 16)
+                          n = n * 16 + c - '0';
+                        else
+                          overflow = true;
+                        break;
+
+                      case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+                        if (n < n_limit / 16)
+                          n = n * 16 + 10 + c - 'A';
+                        else
+                          overflow = true;
+                        break;
+
+                      case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+                        if (n < n_limit / 16)
+                          n = n * 16 + 10 + c - 'a';
+                        else
+                          overflow = true;
+                        break;
+                      }
+                    if (--count == 0)
+                      goto guile_hex_escape_done;
+                    c = phase1_getc ();
+                  }
+
+               guile_hex_escape_done:
+                if (count > 0)
+                  {
+                    if_error (IF_SEVERITY_WARNING,
+                              logical_file_name, line_number, (size_t)(-1), false,
+                              _("hexadecimal escape sequence with too few digits"));
+                    n = 0xFFFD;
+                  }
+                else if (overflow)
+                  {
+                    if_error (IF_SEVERITY_WARNING,
+                              logical_file_name, line_number, (size_t)(-1), false,
+                              _("hexadecimal escape sequence out of range"));
+                    n = 0xFFFD;
+                  }
+                mixed_string_buffer_append_unicode (literal, n);
+              }
+              continue;
+            }
+          if (!follow_guile && c == 'x')
+            {
+              /* In R6RS mode, \x must be followed by one or more hexadecimal
+                 digits and then a semicolon.  See
+                 R6RS § 4.2.7, R7RS § 6.7 and § 7.1.1.  */
+              unsigned int const n_limit = 0x110000;
+              unsigned int count;
+              unsigned int n;
+              bool overflow;
+
+              count = 0;
+              n = 0;
+              overflow = false;
+
+              for (;;)
+                {
+                  c = phase1_getc ();
+                  switch (c)
+                    {
+                    case '0': case '1': case '2': case '3': case '4':
+                    case '5': case '6': case '7': case '8': case '9':
+                      if (n < n_limit / 16)
+                        n = n * 16 + c - '0';
+                      else
+                        overflow = true;
+                      break;
+
+                    case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+                      if (n < n_limit / 16)
+                        n = n * 16 + 10 + c - 'A';
+                      else
+                        overflow = true;
+                      break;
+
+                    case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+                      if (n < n_limit / 16)
+                        n = n * 16 + 10 + c - 'a';
+                      else
+                        overflow = true;
+                      break;
+
+                    case ';':
+                      if (count == 0)
+                        {
+                          if_error (IF_SEVERITY_WARNING,
+                                    logical_file_name, line_number, (size_t)(-1), false,
+                                    _("hexadecimal escape sequence with no digits"));
+                          n = 0xFFFD;
+                        }
+                      else if (overflow)
+                        {
+                          if_error (IF_SEVERITY_WARNING,
+                                    logical_file_name, line_number, (size_t)(-1), false,
+                                    _("hexadecimal escape sequence out of range"));
+                          n = 0xFFFD;
+                        }
+                      goto r6rs_hex_escape_done;
+
+                    default:
+                      if_error (IF_SEVERITY_WARNING,
+                                logical_file_name, line_number, (size_t)(-1), false,
+                                _("hexadecimal escape sequence not terminated with a semicolon"));
+                      n = '\\';
+                      goto r6rs_hex_escape_done;
+                    }
+                  count++;
+                }
+
+             r6rs_hex_escape_done:
+              mixed_string_buffer_append_unicode (literal, n);
+              continue;
+            }
           switch (c)
             {
             case '0':
index bc0f26377b84d8c3f039b89534d7e0876812fbb9..f066fe5603fd6f7ced70845becb0ddd74ca4f14d 100755 (executable)
@@ -8,12 +8,34 @@ cat <<\EOF > xg-sc-7.scm
                    drobwllllantysiliogogogoch"))
 (display (gettext "The hotel has a  \
                    pretty garden."))
+(display (gettext "abc\xa;def\x3BB;ghi\x20AC;jkl\x0001F603;mno"))
 EOF
 
 : ${XGETTEXT=xgettext}
-${XGETTEXT} -L Scheme --omit-header -d xg-sc-7 xg-sc-7.scm || Exit 1
+${XGETTEXT} -L Scheme -o xg-sc-7.tmp xg-sc-7.scm || Exit 1
+LC_ALL=C tr -d '\r' < xg-sc-7.tmp > xg-sc-7.po || Exit 1
+
+sed -e '/POT-Creation-Date/d' < xg-sc-7.po > xg-sc-7.pot
 
 cat <<\EOF > xg-sc-7.ok
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER
+# This file is distributed under the same license as the PACKAGE package.
+# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
+#
+#, fuzzy
+msgid ""
+msgstr ""
+"Project-Id-Version: PACKAGE VERSION\n"
+"Report-Msgid-Bugs-To: \n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language-Team: LANGUAGE <LL@li.org>\n"
+"Language: \n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+
 #: xg-sc-7.scm:1
 msgid "Llanfairpwllgwyngyllgogerychwyrndrobwllllantysiliogogogoch"
 msgstr ""
@@ -21,10 +43,16 @@ msgstr ""
 #: xg-sc-7.scm:3
 msgid "The hotel has a  pretty garden."
 msgstr ""
+
+#: xg-sc-7.scm:5
+msgid ""
+"abc\n"
+"defλghi€jkl😃mno"
+msgstr ""
 EOF
 
 : ${DIFF=diff}
-${DIFF} xg-sc-7.ok xg-sc-7.po
+${DIFF} xg-sc-7.ok xg-sc-7.pot
 result=$?
 
 exit $result
index 94dbbfdac28a331602fbf9bb2a0801bf11ba521c..0e3191c831f3800c5c71492501f86cc139903cb8 100755 (executable)
@@ -8,17 +8,41 @@ cat <<\EOF > xg-sc-8.scm
                    drobwllllantysiliogogogoch"))
 (display (gettext "The hotel has a  \
                    pretty garden."))
+(display (gettext "abc\xa;def\x3BB;ghi\x20AC;jkl\x0001F603;mno"))
+(display (gettext "Abc\x0adef\u03BBghi\u20ACjkl\U01F603mno"))
 #!r6rs
 (display (gettext "Llanfairpwllgwyngyllgogerychwyrn\
                    drobwllllantysiliogogogoch"))
 (display (gettext "The hotel has a  \
                    pretty garden."))
+(display (gettext "abc\xa;def\x3BB;ghi\x20AC;jkl\x0001F603;mno"))
 EOF
 
 : ${XGETTEXT=xgettext}
-${XGETTEXT} -L Guile --omit-header -d xg-sc-8 xg-sc-8.scm || Exit 1
+${XGETTEXT} -L Guile -o xg-sc-8.tmp xg-sc-8.scm || Exit 1
+LC_ALL=C tr -d '\r' < xg-sc-8.tmp > xg-sc-8.po || Exit 1
+
+sed -e '/POT-Creation-Date/d' < xg-sc-8.po > xg-sc-8.pot
 
 cat <<\EOF > xg-sc-8.ok
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER
+# This file is distributed under the same license as the PACKAGE package.
+# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
+#
+#, fuzzy
+msgid ""
+msgstr ""
+"Project-Id-Version: PACKAGE VERSION\n"
+"Report-Msgid-Bugs-To: \n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language-Team: LANGUAGE <LL@li.org>\n"
+"Language: \n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+
 #: xg-sc-8.scm:1
 msgid ""
 "Llanfairpwllgwyngyllgogerychwyrn                   drobwllllantysiliogogogoch"
@@ -28,17 +52,33 @@ msgstr ""
 msgid "The hotel has a                     pretty garden."
 msgstr ""
 
+#: xg-sc-8.scm:5
+msgid "abc�;def;B;ghi AC;jkl"
+msgstr ""
+
 #: xg-sc-8.scm:6
-msgid "Llanfairpwllgwyngyllgogerychwyrndrobwllllantysiliogogogoch"
+msgid ""
+"Abc\n"
+"defλghi€jkl😃mno"
 msgstr ""
 
 #: xg-sc-8.scm:8
+msgid "Llanfairpwllgwyngyllgogerychwyrndrobwllllantysiliogogogoch"
+msgstr ""
+
+#: xg-sc-8.scm:10
 msgid "The hotel has a  pretty garden."
 msgstr ""
+
+#: xg-sc-8.scm:12
+msgid ""
+"abc\n"
+"defλghi€jkl😃mno"
+msgstr ""
 EOF
 
 : ${DIFF=diff}
-${DIFF} xg-sc-8.ok xg-sc-8.po
+${DIFF} xg-sc-8.ok xg-sc-8.pot
 result=$?
 
 exit $result