xgettext: Add support for gettext-like functions with wide string argument.

author Bruno Haible <bruno@clisp.org>

Sat, 14 Nov 2020 17:55:17 +0000 (18:55 +0100)

committer Bruno Haible <bruno@clisp.org>

Sat, 14 Nov 2020 17:55:59 +0000 (18:55 +0100)
author Bruno Haible <bruno@clisp.org>
Sat, 14 Nov 2020 17:55:17 +0000 (18:55 +0100)
committer Bruno Haible <bruno@clisp.org>
Sat, 14 Nov 2020 17:55:59 +0000 (18:55 +0100)
diff --git a/NEWS b/NEWS

index 51958169517fe29f0044b4cb62c61f36321acbdc..22b08784d997bb0286ac9c39081b37d113f85378 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,10 @@
+Version 0.21.1 - December 2020
+
+* Programming languages support:
+  - C, C++: xgettext now supports gettext-like functions that take wide strings
+    (of type 'const wchar_t *', 'const char16_t *', or 'const char32_t *') as
+    arguments.
+
  Version 0.21 - July 2020
  
  * Programming languages support:
diff --git a/gettext-tools/src/x-c.c b/gettext-tools/src/x-c.c

index 0f19f966522ee8e74b1d977ef77186bb7c91e437..917bd7c88deb9cc2d19b975962dd3a1cd2014c4e 100644 (file)
--- a/gettext-tools/src/x-c.c
+++ b/gettext-tools/src/x-c.c
@@ -1283,7 +1283,10 @@ phase5_get (token_ty *tp)
                   Note: The programmer who passes an UTF-8 encoded string to
                   gettext() or similar API functions will have to have called
                   bind_textdomain_codeset (DOMAIN, "UTF-8") first.  */
-              if (bufpos == 2 && buffer[0] == 'u' && buffer[1] == '8')
+              if ((bufpos == 1
+                   && (buffer[0] == 'u' || buffer[0] == 'U'
+                       || buffer[0] == 'L'))
+                  || (bufpos == 2 && buffer[0] == 'u' && buffer[1] == '8'))
                  goto string_literal;
                /* Recognize C++11 raw string literals.
                   See ISO C++ 11 section 2.14.5 [lex.string].
@@ -1302,8 +1305,14 @@ phase5_get (token_ty *tp)
                    && buffer[bufpos - 1] == 'R')
                  {
                    /* Only R and u8R raw strings can be used as gettext()
-                     arguments, for type reasons.  */
-                  const bool relevant = (bufpos != 2);
+                     arguments, for type reasons.  But the programmer may have
+                     defined
+                       - a c16gettext function that takes a 'const char16_t *'
+                         argument, or
+                       - a c32gettext function that takes a 'const char32_t *'
+                         argument, or
+                       - a wgettext function that takes a 'const wchar_t *'
+                         argument.  */
                    int starting_line_number = line_number;
                    bufpos = 0;
                    /* Start the buffer with a closing parenthesis.  This makes the
@@ -1369,10 +1378,8 @@ phase5_get (token_ty *tp)
                        int state;
  
                        /* Start accumulating the string.  */
-                      if (relevant)
-                        mixed_string_buffer_init (&msb, lc_string,
-                                                  logical_file_name,
-                                                  line_number);
+                      mixed_string_buffer_init (&msb, lc_string,
+                                                logical_file_name, line_number);
                        state = 0;
  
                        for (;;)
@@ -1380,8 +1387,7 @@ phase5_get (token_ty *tp)
                            c = phase3_getc ();
  
                            /* Keep line_number in sync.  */
-                          if (relevant)
-                            msb.line_number = line_number;
+                          msb.line_number = line_number;
  
                            if (c == EOF)
                              break;
@@ -1394,14 +1400,9 @@ phase5_get (token_ty *tp)
                                else /* state == bufpos && c == '"' */
                                  {
                                    /* Finished parsing the string.  */
-                                  if (relevant)
-                                    {
-                                      tp->type = token_type_string_literal;
-                                      tp->mixed_string = mixed_string_buffer_result (&msb);
-                                      tp->comment = add_reference (savable_comment);
-                                    }
-                                  else
-                                    tp->type = token_type_symbol;
+                                  tp->type = token_type_string_literal;
+                                  tp->mixed_string = mixed_string_buffer_result (&msb);
+                                  tp->comment = add_reference (savable_comment);
                                    return;
                                  }
                              }
@@ -1411,17 +1412,15 @@ phase5_get (token_ty *tp)
  
                                /* None of the bytes buffer[0]...buffer[state-1]
                                   can be ')'.  */
-                              if (relevant)
-                                for (i = 0; i < state; i++)
-                                  mixed_string_buffer_append_char (&msb, buffer[i]);
+                              for (i = 0; i < state; i++)
+                                mixed_string_buffer_append_char (&msb, buffer[i]);
  
                                /* But c may be ')'.  */
                                if (c == ')')
                                  state = 1;
                                else
                                  {
-                                  if (relevant)
-                                    mixed_string_buffer_append_char (&msb, c);
+                                  mixed_string_buffer_append_char (&msb, c);
                                    state = 0;
                                  }
                              }
diff --git a/gettext-tools/tests/xgettext-c-5 b/gettext-tools/tests/xgettext-c-5

index 01e9a1908c5b9d18ca164acf23c4d6950bbb42ab..e03b08f0a7a8645d6a0ad980c6f86d534b2105eb 100755 (executable)
--- a/gettext-tools/tests/xgettext-c-5
+++ b/gettext-tools/tests/xgettext-c-5
@@ -7,9 +7,16 @@
  
  cat <<\EOF > xg-c-5.c
  gettext("Choose a " u8"rosé wine à la carte");
+/* Some people may define a gettext-like function that takes a 'const wchar_t *' argument. */
+wgettext(L"Pulp " L"Fiction");
+/* Or a gettext-like function that takes a 'const char16_t *' argument. */
+c16gettext (u"Rain " u"Man");
+/* Or a gettext-like function that takes a 'const char32_t *' argument. */
+c32gettext (U"Jurassic " U"Park");
  EOF
  
  ${XGETTEXT} --from-code=ISO-8859-1 --no-location \
+            -kgettext -kwgettext -kc16gettext -kc32gettext \
              -o xg-c-5.tmp xg-c-5.c || Exit 1
  func_filter_POT_Creation_Date xg-c-5.tmp xg-c-5.po
  
@@ -34,6 +41,15 @@ msgstr ""
  
  msgid "Choose a rosé wine à la carte"
  msgstr ""
+
+msgid "Pulp Fiction"
+msgstr ""
+
+msgid "Rain Man"
+msgstr ""
+
+msgid "Jurassic Park"
+msgstr ""
  EOF
  
  : ${DIFF=diff}
diff --git a/gettext-tools/tests/xgettext-c-c++-1 b/gettext-tools/tests/xgettext-c-c++-1

index 9d0180b146d209c29201b6d975589f574b4ff378..09276c556608d7f9306f5927fa00139a44f5ce47 100755 (executable)
--- a/gettext-tools/tests/xgettext-c-c++-1
+++ b/gettext-tools/tests/xgettext-c-c++-1
@@ -45,6 +45,12 @@ gettext (uX"This (is NOT a raw string");
  
  gettext (u8"This is a UTF-8 string");
  
+wgettext (L"This is a wide string");
+
+c16gettext (u"This is a 16-bit wide char string");
+
+c32gettext (U"This is a 32-bit wide char string");
+
  u88"This is not a UTF-8 string";
  
  u"This is a UTF-16 string";
@@ -79,6 +85,7 @@ gettext(\"abc\");
  EOF
  
  ${XGETTEXT} --add-comments --no-location --no-wrap \
+            -kgettext -kwgettext -kc16gettext -kc32gettext \
              -o xg-c-c++-1.tmp xg-c-c++-1.cc || Exit 1
  func_filter_POT_Creation_Date xg-c-c++-1.tmp xg-c-c++-1.po
  
@@ -110,6 +117,15 @@ msgstr ""
  msgid "This is a UTF-8 string"
  msgstr ""
  
+msgid "This is a wide string"
+msgstr ""
+
+msgid "This is a 16-bit wide char string"
+msgstr ""
+
+msgid "This is a 32-bit wide char string"
+msgstr ""
+
  msgid ""
  "\n"
  "This is a raw string\n"
author	Bruno Haible <bruno@clisp.org>
	Sat, 14 Nov 2020 17:55:17 +0000 (18:55 +0100)
committer	Bruno Haible <bruno@clisp.org>
	Sat, 14 Nov 2020 17:55:59 +0000 (18:55 +0100)
NEWS		patch \| blob \| blame \| history
gettext-tools/src/x-c.c		patch \| blob \| blame \| history
gettext-tools/tests/xgettext-c-5		patch \| blob \| blame \| history
gettext-tools/tests/xgettext-c-c++-1		patch \| blob \| blame \| history