From: Bruno Haible Date: Sat, 14 Nov 2020 17:55:17 +0000 (+0100) Subject: xgettext: Add support for gettext-like functions with wide string argument. X-Git-Tag: v0.22~297 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=1a5081fda1e4dfefcf727df7aa812f0b92f7977b;p=thirdparty%2Fgettext.git xgettext: Add support for gettext-like functions with wide string argument. Reported by Érico Nogueira in . * gettext-tools/src/x-c.c (phase5_get): Recognize also the string-literal prefixes 'u', 'U', 'L'. Consider all types of C++ raw strings as relevant. * gettext-tools/tests/xgettext-c-5: Add more test cases. * gettext-tools/tests/xgettext-c-c++-1: Likewise. * NEWS: Mention it. --- diff --git a/NEWS b/NEWS index 519581695..22b08784d 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,10 @@ +Version 0.21.1 - December 2020 + +* Programming languages support: + - C, C++: xgettext now supports gettext-like functions that take wide strings + (of type 'const wchar_t *', 'const char16_t *', or 'const char32_t *') as + arguments. + Version 0.21 - July 2020 * Programming languages support: diff --git a/gettext-tools/src/x-c.c b/gettext-tools/src/x-c.c index 0f19f9665..917bd7c88 100644 --- a/gettext-tools/src/x-c.c +++ b/gettext-tools/src/x-c.c @@ -1283,7 +1283,10 @@ phase5_get (token_ty *tp) Note: The programmer who passes an UTF-8 encoded string to gettext() or similar API functions will have to have called bind_textdomain_codeset (DOMAIN, "UTF-8") first. */ - if (bufpos == 2 && buffer[0] == 'u' && buffer[1] == '8') + if ((bufpos == 1 + && (buffer[0] == 'u' || buffer[0] == 'U' + || buffer[0] == 'L')) + || (bufpos == 2 && buffer[0] == 'u' && buffer[1] == '8')) goto string_literal; /* Recognize C++11 raw string literals. See ISO C++ 11 section 2.14.5 [lex.string]. @@ -1302,8 +1305,14 @@ phase5_get (token_ty *tp) && buffer[bufpos - 1] == 'R') { /* Only R and u8R raw strings can be used as gettext() - arguments, for type reasons. */ - const bool relevant = (bufpos != 2); + arguments, for type reasons. But the programmer may have + defined + - a c16gettext function that takes a 'const char16_t *' + argument, or + - a c32gettext function that takes a 'const char32_t *' + argument, or + - a wgettext function that takes a 'const wchar_t *' + argument. */ int starting_line_number = line_number; bufpos = 0; /* Start the buffer with a closing parenthesis. This makes the @@ -1369,10 +1378,8 @@ phase5_get (token_ty *tp) int state; /* Start accumulating the string. */ - if (relevant) - mixed_string_buffer_init (&msb, lc_string, - logical_file_name, - line_number); + mixed_string_buffer_init (&msb, lc_string, + logical_file_name, line_number); state = 0; for (;;) @@ -1380,8 +1387,7 @@ phase5_get (token_ty *tp) c = phase3_getc (); /* Keep line_number in sync. */ - if (relevant) - msb.line_number = line_number; + msb.line_number = line_number; if (c == EOF) break; @@ -1394,14 +1400,9 @@ phase5_get (token_ty *tp) else /* state == bufpos && c == '"' */ { /* Finished parsing the string. */ - if (relevant) - { - tp->type = token_type_string_literal; - tp->mixed_string = mixed_string_buffer_result (&msb); - tp->comment = add_reference (savable_comment); - } - else - tp->type = token_type_symbol; + tp->type = token_type_string_literal; + tp->mixed_string = mixed_string_buffer_result (&msb); + tp->comment = add_reference (savable_comment); return; } } @@ -1411,17 +1412,15 @@ phase5_get (token_ty *tp) /* None of the bytes buffer[0]...buffer[state-1] can be ')'. */ - if (relevant) - for (i = 0; i < state; i++) - mixed_string_buffer_append_char (&msb, buffer[i]); + for (i = 0; i < state; i++) + mixed_string_buffer_append_char (&msb, buffer[i]); /* But c may be ')'. */ if (c == ')') state = 1; else { - if (relevant) - mixed_string_buffer_append_char (&msb, c); + mixed_string_buffer_append_char (&msb, c); state = 0; } } diff --git a/gettext-tools/tests/xgettext-c-5 b/gettext-tools/tests/xgettext-c-5 index 01e9a1908..e03b08f0a 100755 --- a/gettext-tools/tests/xgettext-c-5 +++ b/gettext-tools/tests/xgettext-c-5 @@ -7,9 +7,16 @@ cat <<\EOF > xg-c-5.c gettext("Choose a " u8"rosé wine à la carte"); +/* Some people may define a gettext-like function that takes a 'const wchar_t *' argument. */ +wgettext(L"Pulp " L"Fiction"); +/* Or a gettext-like function that takes a 'const char16_t *' argument. */ +c16gettext (u"Rain " u"Man"); +/* Or a gettext-like function that takes a 'const char32_t *' argument. */ +c32gettext (U"Jurassic " U"Park"); EOF ${XGETTEXT} --from-code=ISO-8859-1 --no-location \ + -kgettext -kwgettext -kc16gettext -kc32gettext \ -o xg-c-5.tmp xg-c-5.c || Exit 1 func_filter_POT_Creation_Date xg-c-5.tmp xg-c-5.po @@ -34,6 +41,15 @@ msgstr "" msgid "Choose a rosé wine à la carte" msgstr "" + +msgid "Pulp Fiction" +msgstr "" + +msgid "Rain Man" +msgstr "" + +msgid "Jurassic Park" +msgstr "" EOF : ${DIFF=diff} diff --git a/gettext-tools/tests/xgettext-c-c++-1 b/gettext-tools/tests/xgettext-c-c++-1 index 9d0180b14..09276c556 100755 --- a/gettext-tools/tests/xgettext-c-c++-1 +++ b/gettext-tools/tests/xgettext-c-c++-1 @@ -45,6 +45,12 @@ gettext (uX"This (is NOT a raw string"); gettext (u8"This is a UTF-8 string"); +wgettext (L"This is a wide string"); + +c16gettext (u"This is a 16-bit wide char string"); + +c32gettext (U"This is a 32-bit wide char string"); + u88"This is not a UTF-8 string"; u"This is a UTF-16 string"; @@ -79,6 +85,7 @@ gettext(\"abc\"); EOF ${XGETTEXT} --add-comments --no-location --no-wrap \ + -kgettext -kwgettext -kc16gettext -kc32gettext \ -o xg-c-c++-1.tmp xg-c-c++-1.cc || Exit 1 func_filter_POT_Creation_Date xg-c-c++-1.tmp xg-c-c++-1.po @@ -110,6 +117,15 @@ msgstr "" msgid "This is a UTF-8 string" msgstr "" +msgid "This is a wide string" +msgstr "" + +msgid "This is a 16-bit wide char string" +msgstr "" + +msgid "This is a 32-bit wide char string" +msgstr "" + msgid "" "\n" "This is a raw string\n"