]> git.ipfire.org Git - thirdparty/gettext.git/commitdiff
xgettext: Add support for gettext-like functions with wide string argument.
authorBruno Haible <bruno@clisp.org>
Sat, 14 Nov 2020 17:55:17 +0000 (18:55 +0100)
committerBruno Haible <bruno@clisp.org>
Sat, 14 Nov 2020 17:55:59 +0000 (18:55 +0100)
Reported by Érico Nogueira <ericonr@disroot.org> in
<https://lists.gnu.org/archive/html/bug-gettext/2020-11/msg00013.html>.

* gettext-tools/src/x-c.c (phase5_get): Recognize also the string-literal
prefixes 'u', 'U', 'L'. Consider all types of C++ raw strings as relevant.
* gettext-tools/tests/xgettext-c-5: Add more test cases.
* gettext-tools/tests/xgettext-c-c++-1: Likewise.
* NEWS: Mention it.

NEWS
gettext-tools/src/x-c.c
gettext-tools/tests/xgettext-c-5
gettext-tools/tests/xgettext-c-c++-1

diff --git a/NEWS b/NEWS
index 51958169517fe29f0044b4cb62c61f36321acbdc..22b08784d997bb0286ac9c39081b37d113f85378 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,10 @@
+Version 0.21.1 - December 2020
+
+* Programming languages support:
+  - C, C++: xgettext now supports gettext-like functions that take wide strings
+    (of type 'const wchar_t *', 'const char16_t *', or 'const char32_t *') as
+    arguments.
+
 Version 0.21 - July 2020
 
 * Programming languages support:
index 0f19f966522ee8e74b1d977ef77186bb7c91e437..917bd7c88deb9cc2d19b975962dd3a1cd2014c4e 100644 (file)
@@ -1283,7 +1283,10 @@ phase5_get (token_ty *tp)
                  Note: The programmer who passes an UTF-8 encoded string to
                  gettext() or similar API functions will have to have called
                  bind_textdomain_codeset (DOMAIN, "UTF-8") first.  */
-              if (bufpos == 2 && buffer[0] == 'u' && buffer[1] == '8')
+              if ((bufpos == 1
+                   && (buffer[0] == 'u' || buffer[0] == 'U'
+                       || buffer[0] == 'L'))
+                  || (bufpos == 2 && buffer[0] == 'u' && buffer[1] == '8'))
                 goto string_literal;
               /* Recognize C++11 raw string literals.
                  See ISO C++ 11 section 2.14.5 [lex.string].
@@ -1302,8 +1305,14 @@ phase5_get (token_ty *tp)
                   && buffer[bufpos - 1] == 'R')
                 {
                   /* Only R and u8R raw strings can be used as gettext()
-                     arguments, for type reasons.  */
-                  const bool relevant = (bufpos != 2);
+                     arguments, for type reasons.  But the programmer may have
+                     defined
+                       - a c16gettext function that takes a 'const char16_t *'
+                         argument, or
+                       - a c32gettext function that takes a 'const char32_t *'
+                         argument, or
+                       - a wgettext function that takes a 'const wchar_t *'
+                         argument.  */
                   int starting_line_number = line_number;
                   bufpos = 0;
                   /* Start the buffer with a closing parenthesis.  This makes the
@@ -1369,10 +1378,8 @@ phase5_get (token_ty *tp)
                       int state;
 
                       /* Start accumulating the string.  */
-                      if (relevant)
-                        mixed_string_buffer_init (&msb, lc_string,
-                                                  logical_file_name,
-                                                  line_number);
+                      mixed_string_buffer_init (&msb, lc_string,
+                                                logical_file_name, line_number);
                       state = 0;
 
                       for (;;)
@@ -1380,8 +1387,7 @@ phase5_get (token_ty *tp)
                           c = phase3_getc ();
 
                           /* Keep line_number in sync.  */
-                          if (relevant)
-                            msb.line_number = line_number;
+                          msb.line_number = line_number;
 
                           if (c == EOF)
                             break;
@@ -1394,14 +1400,9 @@ phase5_get (token_ty *tp)
                               else /* state == bufpos && c == '"' */
                                 {
                                   /* Finished parsing the string.  */
-                                  if (relevant)
-                                    {
-                                      tp->type = token_type_string_literal;
-                                      tp->mixed_string = mixed_string_buffer_result (&msb);
-                                      tp->comment = add_reference (savable_comment);
-                                    }
-                                  else
-                                    tp->type = token_type_symbol;
+                                  tp->type = token_type_string_literal;
+                                  tp->mixed_string = mixed_string_buffer_result (&msb);
+                                  tp->comment = add_reference (savable_comment);
                                   return;
                                 }
                             }
@@ -1411,17 +1412,15 @@ phase5_get (token_ty *tp)
 
                               /* None of the bytes buffer[0]...buffer[state-1]
                                  can be ')'.  */
-                              if (relevant)
-                                for (i = 0; i < state; i++)
-                                  mixed_string_buffer_append_char (&msb, buffer[i]);
+                              for (i = 0; i < state; i++)
+                                mixed_string_buffer_append_char (&msb, buffer[i]);
 
                               /* But c may be ')'.  */
                               if (c == ')')
                                 state = 1;
                               else
                                 {
-                                  if (relevant)
-                                    mixed_string_buffer_append_char (&msb, c);
+                                  mixed_string_buffer_append_char (&msb, c);
                                   state = 0;
                                 }
                             }
index 01e9a1908c5b9d18ca164acf23c4d6950bbb42ab..e03b08f0a7a8645d6a0ad980c6f86d534b2105eb 100755 (executable)
@@ -7,9 +7,16 @@
 
 cat <<\EOF > xg-c-5.c
 gettext("Choose a " u8"rosé wine à la carte");
+/* Some people may define a gettext-like function that takes a 'const wchar_t *' argument. */
+wgettext(L"Pulp " L"Fiction");
+/* Or a gettext-like function that takes a 'const char16_t *' argument. */
+c16gettext (u"Rain " u"Man");
+/* Or a gettext-like function that takes a 'const char32_t *' argument. */
+c32gettext (U"Jurassic " U"Park");
 EOF
 
 ${XGETTEXT} --from-code=ISO-8859-1 --no-location \
+            -kgettext -kwgettext -kc16gettext -kc32gettext \
             -o xg-c-5.tmp xg-c-5.c || Exit 1
 func_filter_POT_Creation_Date xg-c-5.tmp xg-c-5.po
 
@@ -34,6 +41,15 @@ msgstr ""
 
 msgid "Choose a rosé wine à la carte"
 msgstr ""
+
+msgid "Pulp Fiction"
+msgstr ""
+
+msgid "Rain Man"
+msgstr ""
+
+msgid "Jurassic Park"
+msgstr ""
 EOF
 
 : ${DIFF=diff}
index 9d0180b146d209c29201b6d975589f574b4ff378..09276c556608d7f9306f5927fa00139a44f5ce47 100755 (executable)
@@ -45,6 +45,12 @@ gettext (uX"This (is NOT a raw string");
 
 gettext (u8"This is a UTF-8 string");
 
+wgettext (L"This is a wide string");
+
+c16gettext (u"This is a 16-bit wide char string");
+
+c32gettext (U"This is a 32-bit wide char string");
+
 u88"This is not a UTF-8 string";
 
 u"This is a UTF-16 string";
@@ -79,6 +85,7 @@ gettext(\"abc\");
 EOF
 
 ${XGETTEXT} --add-comments --no-location --no-wrap \
+            -kgettext -kwgettext -kc16gettext -kc32gettext \
             -o xg-c-c++-1.tmp xg-c-c++-1.cc || Exit 1
 func_filter_POT_Creation_Date xg-c-c++-1.tmp xg-c-c++-1.po
 
@@ -110,6 +117,15 @@ msgstr ""
 msgid "This is a UTF-8 string"
 msgstr ""
 
+msgid "This is a wide string"
+msgstr ""
+
+msgid "This is a 16-bit wide char string"
+msgstr ""
+
+msgid "This is a 32-bit wide char string"
+msgstr ""
+
 msgid ""
 "\n"
 "This is a raw string\n"