From 4dfe1c4609a5c600112cd83a4d3af5d4e4d402c7 Mon Sep 17 00:00:00 2001 From: Daiki Ueno Date: Tue, 13 Jan 2015 12:00:35 +0900 Subject: [PATCH] c++: Differentiate scanning logic from C To enable raw string literals only in C++, add a flag indicating the current source language is C++. Suggested by Vaclav Slavik at: . * gettext-tools/src/x-c.h (extract_cxx): New declaration. (SCANNERS_C): Use extract_cxx for "C++". * gettext-tools/src/x-c.c (cxx_extensions): New variable. (phase5_get): Respect cxx_extensions and recognize raw string literals only when it is set. (extract_cxx): New function. (extract_c, extract_objc): Reset cxx_extensions. * gettext-tools/tests/xgettext-c-20: Adjust the source file name from *.c to *.cc to enable handling of raw string literals. --- gettext-tools/src/ChangeLog | 15 ++++++ gettext-tools/src/x-c.c | 78 ++++++++++++++++++++++++------- gettext-tools/src/x-c.h | 9 +++- gettext-tools/tests/ChangeLog | 5 ++ gettext-tools/tests/xgettext-c-20 | 8 ++-- 5 files changed, 92 insertions(+), 23 deletions(-) diff --git a/gettext-tools/src/ChangeLog b/gettext-tools/src/ChangeLog index fbfa43ffe..bf50cb930 100644 --- a/gettext-tools/src/ChangeLog +++ b/gettext-tools/src/ChangeLog @@ -1,3 +1,18 @@ +2015-01-13 Daiki Ueno + + c++: Differentiate scanning logic from C + To enable raw string literals only in C++, add a flag indicating + the current source language is C++. + Suggested by Vaclav Slavik at: + . + * x-c.h (extract_cxx): New declaration. + (SCANNERS_C): Use extract_cxx for "C++". + * x-c.c (cxx_extensions): New variable. + (phase5_get): Respect cxx_extensions and recognize raw string + literals only when it is set. + (extract_cxx): New function. + (extract_c, extract_objc): Reset cxx_extensions. + 2014-12-24 Daiki Ueno * gettext 0.19.4 released. diff --git a/gettext-tools/src/x-c.c b/gettext-tools/src/x-c.c index 001cfd91d..32b92d2ae 100644 --- a/gettext-tools/src/x-c.c +++ b/gettext-tools/src/x-c.c @@ -829,6 +829,9 @@ phase4_ungetc (int c) /* True if ObjectiveC extensions are recognized. */ static bool objc_extensions; +/* True if C++ extensions are recognized. */ +static bool cxx_extensions; + enum token_type_ty { token_type_character_constant, /* 'x' */ @@ -1097,7 +1100,7 @@ phase5_get (token_ty *tp) int bufpos; int c; int last_was_backslash; - bool raw_expected = false; + bool raw_expected; if (phase5_pushback_length) { @@ -1177,10 +1180,14 @@ phase5_get (token_ty *tp) continue; default: - /* Recognize C++ string literals prefixed by R, u8, u8R, - u, uR, U, UR, L, or LR. It is defined in ISO/IEC - 9899:2011 2.14.5. Since gettext's argument is a byte - sequence, we are only interested in u8, R, and u8R. */ + /* Recognize string literals prefixed by R, u8, u8R, u, + uR, U, UR, L, or LR. It is defined in the C standard + ISO/IEC 9899:201x and the C++ standard ISO/IEC + 14882:2011. The raw string literals prefixed by R, + u8R, uR, UR, or LR are only valid in C++. + + Since gettext's argument is a byte sequence, we are + only interested in u8, R, and u8R. */ if (c == '"') { bool is_prefix = false; @@ -1188,37 +1195,61 @@ phase5_get (token_ty *tp) switch (buffer[0]) { case 'R': - if (bufpos == 1) - is_prefix = true; + if (cxx_extensions && bufpos == 1) + { + is_prefix = true; + raw_expected = true; + } break; case 'u': if (bufpos == 1) - is_prefix = true; + { + is_prefix = true; + raw_expected = false; + } else switch (buffer[1]) { case 'R': - if (bufpos == 2) - is_prefix = true; + if (cxx_extensions && bufpos == 2) + { + is_prefix = true; + raw_expected = true; + } break; case '8': - if (bufpos == 2 - || (bufpos == 3 && buffer[2] == 'R')) - is_prefix = true; + if (bufpos == 2) + { + is_prefix = true; + raw_expected = false; + } + else if (cxx_extensions + && bufpos == 3 && buffer[2] == 'R') + { + is_prefix = true; + raw_expected = true; + } break; } break; case 'U': case 'L': - if (bufpos == 1 - || (bufpos == 2 && buffer[1] == 'R')) - is_prefix = true; + if (bufpos == 1) + { + is_prefix = true; + raw_expected = false; + } + else if (cxx_extensions + && bufpos == 2 && buffer[1] == 'R') + { + is_prefix = true; + raw_expected = true; + } break; } if (is_prefix) { - raw_expected = buffer[bufpos - 1] == 'R'; bufpos = 0; goto string; } @@ -2160,6 +2191,18 @@ extract_c (FILE *f, msgdomain_list_ty *mdlp) { objc_extensions = false; + cxx_extensions = false; + extract_whole_file (f, real_filename, logical_filename, flag_table, mdlp); +} + +void +extract_cxx (FILE *f, + const char *real_filename, const char *logical_filename, + flag_context_list_table_ty *flag_table, + msgdomain_list_ty *mdlp) +{ + objc_extensions = false; + cxx_extensions = true; extract_whole_file (f, real_filename, logical_filename, flag_table, mdlp); } @@ -2170,5 +2213,6 @@ extract_objc (FILE *f, msgdomain_list_ty *mdlp) { objc_extensions = true; + cxx_extensions = false; extract_whole_file (f, real_filename, logical_filename, flag_table, mdlp); } diff --git a/gettext-tools/src/x-c.h b/gettext-tools/src/x-c.h index 64e4953ff..9e819a029 100644 --- a/gettext-tools/src/x-c.h +++ b/gettext-tools/src/x-c.h @@ -45,7 +45,7 @@ extern "C" { &flag_table_c, \ &formatstring_c, NULL, \ &literalstring_c }, \ - { "C++", extract_c, \ + { "C++", extract_cxx, \ &flag_table_c, \ &formatstring_c, NULL, \ &literalstring_c }, \ @@ -58,11 +58,16 @@ extern "C" { &formatstring_gcc_internal, &formatstring_gfc_internal, \ &literalstring_c }, \ -/* Scan a C/C++ file and add its translatable strings to mdlp. */ +/* Scan a C file and add its translatable strings to mdlp. */ extern void extract_c (FILE *fp, const char *real_filename, const char *logical_filename, flag_context_list_table_ty *flag_table, msgdomain_list_ty *mdlp); +/* Scan a C++ file and add its translatable strings to mdlp. */ +extern void extract_cxx (FILE *fp, const char *real_filename, + const char *logical_filename, + flag_context_list_table_ty *flag_table, + msgdomain_list_ty *mdlp); /* Scan an ObjectiveC file and add its translatable strings to mdlp. */ extern void extract_objc (FILE *fp, const char *real_filename, const char *logical_filename, diff --git a/gettext-tools/tests/ChangeLog b/gettext-tools/tests/ChangeLog index 47c51ff63..8ab049aa5 100644 --- a/gettext-tools/tests/ChangeLog +++ b/gettext-tools/tests/ChangeLog @@ -1,3 +1,8 @@ +2015-01-13 Daiki Ueno + + * xgettext-c-20: Adjust the source file name from *.c to *.cc to + enable handling of raw string literals. + 2014-12-24 Daiki Ueno * gettext 0.19.4 released. diff --git a/gettext-tools/tests/xgettext-c-20 b/gettext-tools/tests/xgettext-c-20 index 73e4442a0..274096262 100755 --- a/gettext-tools/tests/xgettext-c-20 +++ b/gettext-tools/tests/xgettext-c-20 @@ -5,21 +5,21 @@ : ${XGETTEXT=xgettext} -cat <<\EOF > err0.c +cat <<\EOF > err0.cc gettext ("aaa( bbb )ccc"); EOF -(LANGUAGE= LC_ALL=C ${XGETTEXT} --no-location --no-wrap -o - err0.c 2>&1; exit) | grep 'unterminated string literal' || exit 1 +(LANGUAGE= LC_ALL=C ${XGETTEXT} --no-location --no-wrap -o - err0.cc 2>&1; exit) | grep 'unterminated string literal' || exit 1 -cat <<\EOF > err1.c +cat <<\EOF > err1.cc gettext (R"aaa( bbb )ccc"); EOF -(LANGUAGE= LC_ALL=C ${XGETTEXT} --no-location --no-wrap -o - err1.c 2>&1; exit) | grep 'unterminated string literal' || exit 1 +(LANGUAGE= LC_ALL=C ${XGETTEXT} --no-location --no-wrap -o - err1.cc 2>&1; exit) | grep 'unterminated string literal' || exit 1 cat <<\EOF > xg-c-20.cc #define X "" -- 2.47.2