From 63f9a5c7abf36e1fd4e30a45a6d3f56e6890611d Mon Sep 17 00:00:00 2001 From: Bruno Haible Date: Sun, 28 May 2023 12:14:07 +0200 Subject: [PATCH] xgettext: Support digit separators as in ISO C 23. * gettext-tools/src/x-c.c (phase5_get): Recognize digit separators. * gettext-tools/tests/xgettext-c-2: Add more test cases of integer and floating-point literals. * NEWS: Mention it. --- NEWS | 11 ++++++---- gettext-tools/src/x-c.c | 36 +++++++++++++++++++++++++++++++- gettext-tools/tests/xgettext-c-2 | 25 ++++++++++++++++++++++ 3 files changed, 67 insertions(+), 5 deletions(-) diff --git a/NEWS b/NEWS index be252ca1d..0332e52f1 100644 --- a/NEWS +++ b/NEWS @@ -1,4 +1,4 @@ -Version 0.21.2 - February 2023 +Version 0.21.2 - May 2023 * PO file format: - When a #: line contains references to file names that contain spaces, @@ -12,9 +12,12 @@ Version 0.21.2 - February 2023 option. * Programming languages support: - - C, C++: xgettext now supports gettext-like functions that take wide strings - (of type 'const wchar_t *', 'const char16_t *', or 'const char32_t *') as - arguments. + - C, C++: + o xgettext now supports gettext-like functions that take wide strings + (of type 'const wchar_t *', 'const char16_t *', or 'const char32_t *') + as arguments. + o xgettext now recognizes numbers with digit separators, as defined by + ISO C 23, as tokens. - Tcl: xgettext now supports the \x, \u, and \U escapes as defined in Tcl 8.6. diff --git a/gettext-tools/src/x-c.c b/gettext-tools/src/x-c.c index a5a1ea504..206b0e65c 100644 --- a/gettext-tools/src/x-c.c +++ b/gettext-tools/src/x-c.c @@ -1042,7 +1042,7 @@ phase7_getc () case 'b': return '\b'; - /* The \e escape is preculiar to gcc, and assumes an ASCII + /* The \e escape is peculiar to gcc, and assumes an ASCII character set (or superset). We don't provide support for it here. */ @@ -1613,6 +1613,40 @@ phase5_get (token_ty *tp) break; } } + else + { + /* In C23, a single-quote between two hexadecimal digits + can be part of a number token. It's called a "digit + separator". See ISO C 23 § 6.4.4.1 and § 6.4.4.2. */ + if (bufpos > 0) + { + char prev = buffer[bufpos - 1]; + if ((prev >= '0' && prev <= '9') + || (prev >= 'A' && prev <= 'F') + || (prev >= 'a' && prev <= 'f')) + { + int c1 = phase4_getc (); + if ((c1 >= '0' && c1 <= '9') + || (c1 >= 'A' && c1 <= 'F') + || (c1 >= 'a' && c1 <= 'f')) + { + if (bufpos >= bufmax) + { + bufmax = 2 * bufmax + 10; + buffer = xrealloc (buffer, bufmax); + } + buffer[bufpos++] = c; + c = c1; + continue; + } + /* The two phase4_getc() calls that returned c and c1 + did nothing more than to call phase3_getc(), + without any lookahead. Therefore 2 pushback + characters are supported in this case. */ + phase4_ungetc (c1); + } + } + } FALLTHROUGH; default: phase4_ungetc (c); diff --git a/gettext-tools/tests/xgettext-c-2 b/gettext-tools/tests/xgettext-c-2 index c90786d2e..21d742e54 100755 --- a/gettext-tools/tests/xgettext-c-2 +++ b/gettext-tools/tests/xgettext-c-2 @@ -21,6 +21,16 @@ static char *s = ""; _("after string") static double d = 10e-1; _("after double") +static int x = 0x2a; +_("after hex integer") +static int b = 0b101011; +_("after binary integer") +static int i_c23 = 1'000; +_("after integer with digit separator") +static int x_c23 = 0xFE23'FFFF'FFFF'4321; +_("after hex integer with digit separator") +static double d_c23 = 0.333'333; +_("after double with digit separator") EOF : ${XGETTEXT=xgettext} @@ -46,6 +56,21 @@ msgstr "" msgid "after double" msgstr "" + +msgid "after hex integer" +msgstr "" + +msgid "after binary integer" +msgstr "" + +msgid "after integer with digit separator" +msgstr "" + +msgid "after hex integer with digit separator" +msgstr "" + +msgid "after double with digit separator" +msgstr "" EOF : ${DIFF=diff} -- 2.47.2