From: Bruno Haible Date: Sat, 27 Jul 2024 14:32:16 +0000 (+0200) Subject: xgettext: Java: Improve the support of the method .formatted(). X-Git-Tag: v0.23~225 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=1ff84af4b1b20413cc42244fcc9a9e941c773c25;p=thirdparty%2Fgettext.git xgettext: Java: Improve the support of the method .formatted(). * gettext-tools/src/x-java.c (enum token_type_ty): New enum items token_type_conditional, token_type_colon, token_type_assign, token_type_operator. (phase5_get): Recognize all kinds of operators that may occur in expressions. (extract_parenthesized): Invoke new_sub_region instead of inheriting_region. Handle 'return' keyword specially. Invoke set_format_flag_on_region instead of iterating through the region. When encountering an operator, close the inner_region and open a new inner_region. * gettext-tools/tests/xgettext-java-8: Add many more test cases. --- diff --git a/gettext-tools/src/x-java.c b/gettext-tools/src/x-java.c index 4cf5e8047..cad9f2cdb 100644 --- a/gettext-tools/src/x-java.c +++ b/gettext-tools/src/x-java.c @@ -56,7 +56,12 @@ /* The Java syntax is defined in the Java Language Specification (available from https://docs.oracle.com/javase/specs/), - chapter 3 "Lexical Structure". */ + chapter 3 "Lexical Structure". + + It supports string formatting through functions and methods, namely + through the Java.formatted method added in Java 15: + https://docs.oracle.com/en%2Fjava%2Fjavase%2F17%2Fdocs%2Fapi%2F%2F/java.base/java/lang/String.html#formatted%28java.lang.Object...%29 + */ /* ====================== Keyword set customization. ====================== */ @@ -574,8 +579,21 @@ enum token_type_ty token_type_number, /* 1.23 */ token_type_symbol, /* identifier, keyword, null */ token_type_plus, /* + */ + token_type_conditional, /* ? */ + token_type_colon, /* : */ + token_type_assign, /* = */ + token_type_operator, /* other operator: + - ++ -- ~ ! + * / % + << >> >>> + < > <= >= == != + & ^ | && || + *= /= %= += -= <<= >>= >>>= &= ^= |= + -> + (switch expressions are not recognized yet.) + */ token_type_semicolon, /* ; */ - token_type_other /* character literal, misc. operator */ + token_type_other /* character literal, unknown operator */ }; typedef enum token_type_ty token_type_ty; @@ -1298,28 +1316,238 @@ phase5_get (token_ty *tp) } case '+': - c = phase4_getc (); + c = phase3_getc (); if (RED (c) == '+') /* Operator ++ */ - tp->type = token_type_other; + tp->type = token_type_operator; else if (RED (c) == '=') /* Operator += */ - tp->type = token_type_other; + tp->type = token_type_operator; else { /* Operator + */ - phase4_ungetc (c); + phase3_ungetc (c); tp->type = token_type_plus; } return; + case '-': + c = phase3_getc (); + if (RED (c) == '-') + /* Operator -- */ + tp->type = token_type_operator; + else if (RED (c) == '>') + /* Operator -> */ + tp->type = token_type_operator; + else if (RED (c) == '=') + /* Operator -= */ + tp->type = token_type_operator; + else + { + /* Operator - */ + phase3_ungetc (c); + tp->type = token_type_operator; + } + return; + + case '~': + /* Operator ~ */ + tp->type = token_type_operator; + return; + + case '!': + c = phase3_getc (); + if (RED (c) == '=') + /* Operator != */ + tp->type = token_type_operator; + else + { + /* Operator ! */ + phase3_ungetc (c); + tp->type = token_type_operator; + } + return; + + case '*': + c = phase3_getc (); + if (RED (c) == '=') + /* Operator *= */ + tp->type = token_type_operator; + else + { + /* Operator * */ + phase3_ungetc (c); + tp->type = token_type_operator; + } + return; + + case '/': + c = phase3_getc (); + if (RED (c) == '=') + /* Operator /= */ + tp->type = token_type_operator; + else + { + /* Operator / */ + phase3_ungetc (c); + tp->type = token_type_operator; + } + return; + + case '%': + c = phase3_getc (); + if (RED (c) == '=') + /* Operator %= */ + tp->type = token_type_operator; + else + { + /* Operator % */ + phase3_ungetc (c); + tp->type = token_type_operator; + } + return; + + case '<': + c = phase3_getc (); + if (RED (c) == '=') + /* Operator <= */ + tp->type = token_type_operator; + else if (RED (c) == '<') + { + int c2 = phase3_getc (); + if (RED (c2) == '=') + /* Operator <<= */ + tp->type = token_type_operator; + else + { + /* Operator << */ + phase3_ungetc (c2); + tp->type = token_type_operator; + } + } + else + { + /* Operator < */ + phase3_ungetc (c); + tp->type = token_type_operator; + } + return; + + case '>': + c = phase3_getc (); + if (RED (c) == '=') + /* Operator >= */ + tp->type = token_type_operator; + else if (RED (c) == '>') + { + int c2 = phase3_getc (); + if (RED (c2) == '=') + /* Operator >>= */ + tp->type = token_type_operator; + else if (RED (c) == '>') + { + int c3 = phase3_getc (); + if (RED (c3) == '=') + /* Operator >>>= */ + tp->type = token_type_operator; + else + { + /* Operator >>> */ + phase3_ungetc (c3); + tp->type = token_type_operator; + } + } + else + { + /* Operator >> */ + phase3_ungetc (c2); + tp->type = token_type_operator; + } + } + else + { + /* Operator > */ + phase3_ungetc (c); + tp->type = token_type_operator; + } + return; + + case '&': + c = phase3_getc (); + if (RED (c) == '&') + /* Operator && */ + tp->type = token_type_operator; + else if (RED (c) == '=') + /* Operator &= */ + tp->type = token_type_operator; + else + { + /* Operator & */ + phase3_ungetc (c); + tp->type = token_type_operator; + } + return; + + case '^': + c = phase3_getc (); + if (RED (c) == '=') + /* Operator ^= */ + tp->type = token_type_operator; + else + { + /* Operator ^ */ + phase3_ungetc (c); + tp->type = token_type_operator; + } + return; + + case '|': + c = phase3_getc (); + if (RED (c) == '|') + /* Operator || */ + tp->type = token_type_operator; + else if (RED (c) == '=') + /* Operator |= */ + tp->type = token_type_operator; + else + { + /* Operator | */ + phase3_ungetc (c); + tp->type = token_type_operator; + } + return; + + case '=': + c = phase3_getc (); + if (RED (c) == '=') + /* Operator == */ + tp->type = token_type_operator; + else + { + /* Assignment operator = */ + phase3_ungetc (c); + tp->type = token_type_assign; + } + return; + + case '?': + /* Operator ?, used in ternary conditionals. */ + tp->type = token_type_conditional; + return; + + case ':': + /* Operator :, used in ternary conditionals. */ + tp->type = token_type_colon; + return; + + case ';': /* Semicolon. */ tp->type = token_type_semicolon; return; default: - /* Misc. operator. */ + /* Unknown operator. */ tp->type = token_type_other; return; } @@ -1477,10 +1705,11 @@ extract_parenthesized (message_list_ty *mlp, token_type_ty terminator, /* Context iterator that will be used if the next token is a '('. */ flag_context_list_iterator_ty next_context_iter = passthrough_context_list_iterator; + /* Current context. */ + flag_context_ty curr_context = + flag_context_list_iterator_advance (&context_iter); /* Current region. */ - flag_region_ty *inner_region = - inheriting_region (outer_region, - flag_context_list_iterator_advance (&context_iter)); + flag_region_ty *inner_region = new_sub_region (outer_region, curr_context); /* Start state is 0. */ state = 0; @@ -1535,43 +1764,53 @@ extract_parenthesized (message_list_ty *mlp, token_type_ty terminator, break; } - for (dottedname = sum;;) + /* 'return' is a keyword, not a function-like symbol. + It needs to be treated specially, because in + return (EXPR).formatted() + the extracted strings in EXPR need to be marked as + java-printf-format, whereas in + foobar (EXPR).formatted() + they should not. */ + if (strcmp (sum, "return") != 0) { - void *keyword_value; - - if (hash_find_entry (&keywords, dottedname, strlen (dottedname), - &keyword_value) - == 0) + for (dottedname = sum;;) { - next_shapes = (const struct callshapes *) keyword_value; - state = 1; - break; + void *keyword_value; + + if (hash_find_entry (&keywords, dottedname, strlen (dottedname), + &keyword_value) + == 0) + { + next_shapes = (const struct callshapes *) keyword_value; + state = 1; + break; + } + + dottedname = strchr (dottedname, '.'); + if (dottedname == NULL) + { + state = 0; + break; + } + dottedname++; } - dottedname = strchr (dottedname, '.'); - if (dottedname == NULL) + for (dottedname = sum;;) { - state = 0; - break; + context_list = + flag_context_list_table_lookup ( + flag_context_list_table, + dottedname, strlen (dottedname)); + if (context_list != NULL) + break; + + dottedname = strchr (dottedname, '.'); + if (dottedname == NULL) + break; + dottedname++; } - dottedname++; - } - - for (dottedname = sum;;) - { - context_list = - flag_context_list_table_lookup ( - flag_context_list_table, - dottedname, strlen (dottedname)); - if (context_list != NULL) - break; - - dottedname = strchr (dottedname, '.'); - if (dottedname == NULL) - break; - dottedname++; + next_context_iter = flag_context_list_iterator (context_list); } - next_context_iter = flag_context_list_iterator (context_list); free (sum); continue; @@ -1605,16 +1844,8 @@ extract_parenthesized (message_list_ty *mlp, token_type_ty terminator, { /* Mark the messages found in the region as java-printf-format a posteriori. */ - inner_region->for_formatstring[XFORMAT_SECONDARY].is_format = yes_according_to_context; - struct remembered_message_list_ty *rmlp = - inner_region->for_formatstring[XFORMAT_SECONDARY].remembered; - size_t i; - for (i = 0; i < rmlp->nitems; i++) - { - struct remembered_message_ty *rmp = &rmlp->item[i]; - set_format_flag_from_context (rmp->mp, rmp->plural, &rmp->pos, - XFORMAT_SECONDARY, inner_region); - } + set_format_flag_on_region (inner_region, + XFORMAT_SECONDARY, yes_according_to_context); } x_java_unlex (&token3); } @@ -1676,10 +1907,56 @@ extract_parenthesized (message_list_ty *mlp, token_type_ty terminator, case token_type_comma: arg++; unref_region (inner_region); - inner_region = - inheriting_region (outer_region, - flag_context_list_iterator_advance ( - &context_iter)); + curr_context = flag_context_list_iterator_advance (&context_iter); + inner_region = new_sub_region (outer_region, curr_context); + next_context_iter = passthrough_context_list_iterator; + state = 0; + continue; + + case token_type_conditional: + /* In an expression A ? B : C, each of A, B, C is a distinct + sub-region, and since the value of A is not the value of entire + expression, if later set_format_flag_on_region is called on this + region or an ancestor region, it shall not have an effect on the + remembered messages of A. */ + inner_region->inherit_from_parent_region = false; + unref_region (inner_region); + inner_region = new_sub_region (outer_region, curr_context); + next_context_iter = passthrough_context_list_iterator; + state = 0; + continue; + + case token_type_colon: + /* In an expression A ? B : C, each of A, B, C is a distinct + sub-region. */ + unref_region (inner_region); + inner_region = new_sub_region (outer_region, curr_context); + next_context_iter = passthrough_context_list_iterator; + state = 0; + continue; + + case token_type_assign: + /* In an expression A = B, A and B are distinct sub-regions. + The value of B is the value of the entire expression. */ + inner_region->inherit_from_parent_region = false; + unref_region (inner_region); + inner_region = new_sub_region (outer_region, curr_context); + next_context_iter = passthrough_context_list_iterator; + state = 0; + continue; + + case token_type_plus: + case token_type_operator: + /* When an expression contains one of these operators, neither the + value on the left of the operator nor the value on the right of the + operator is string-valued and the value of the entire expression. + Therefore, if later set_format_flag_on_region is called on this + region or an ancestor region, it shall not have an effect on the + remembered messages of this region. */ + inner_region->inherit_from_parent_region = false; + unref_region (inner_region); + inner_region = new_sub_region (outer_region, curr_context); + inner_region->inherit_from_parent_region = false; next_context_iter = passthrough_context_list_iterator; state = 0; continue; @@ -1722,7 +1999,6 @@ extract_parenthesized (message_list_ty *mlp, token_type_ty terminator, case token_type_dot: case token_type_number: - case token_type_plus: case token_type_other: next_context_iter = null_context_list_iterator; state = 0; diff --git a/gettext-tools/tests/xgettext-java-8 b/gettext-tools/tests/xgettext-java-8 index 15cd7bcb7..863b48245 100644 --- a/gettext-tools/tests/xgettext-java-8 +++ b/gettext-tools/tests/xgettext-java-8 @@ -8,10 +8,56 @@ cat <<\EOF > xg-j-8.java "%s".formatted(_("Hello 1")); _("Explanation: %s").formatted(_("Hello 2")); -_("No error 1"); -_("No error 2").formatted(); -(_("No error 3")).formatted(); -foo(_("No error 4")).formatted(); +_("test case 1"); +_("test case 2").formatted(); +(_("test case 3")); +(_("test case 4")).formatted(); +((_("test case 5"))); +((_("test case 6"))).formatted(); +return _("test case 10"); +return _("test case 11").formatted(); +return (_("test case 12").formatted()); +return (_("test case 13")).formatted(); +foo (_("test case 15")); +foo (_("test case 16")).formatted(); +(foo (_("test case 17"))).formatted(); +(foo (_("test case 18")).formatted()); +foo (_("test case 19").formatted()); +foo + _("test case 20"); +foo + _("test case 21").formatted(); +(foo + _("test case 22")).formatted(); +_("test case 23") + foo; +_("test case 24").formatted() + foo; +(_("test case 25") + foo).formatted(); +_("test case 26 a") + _("test case 26 b"); +_("test case 27 a").formatted() + _("test case 27 b"); +_("test case 28 a") + _("test case 28 b").formatted(); +_("test case 29 a").formatted() + _("test case 29 b").formatted(); +(_("test case 30 a") + _("test case 30 b")).formatted(); +return _("test case 31 a") + _("test case 31 b"); +return _("test case 32 a").formatted() + _("test case 32 b"); +return _("test case 33 a") + _("test case 33 b").formatted(); +return _("test case 34 a").formatted() + _("test case 34 b").formatted(); +return (_("test case 35 a") + _("test case 35 b")).formatted(); +return _("test case 36 a") + (_("test case 36 b")).formatted() + _("test case 36 c").formatted(); +return _("test case 37 a") + _("test case 37 b").formatted() + (_("test case 37 c")).formatted(); +return _("test case 38 a") + (_("test case 38 b")).formatted() + (_("test case 38 c")).formatted(); +foo ? _("test case 40 a") : _("test case 40 b"); +foo ? _("test case 41 a").formatted() : _("test case 41 b"); +foo ? _("test case 42 a") : _("test case 42 b").formatted(); +foo ? _("test case 43 a").formatted() : _("test case 43 b").formatted(); +(foo ? _("test case 44 a") : _("test case 44 b")); +(foo ? _("test case 45 a") : _("test case 45 b")).formatted(); +s = (_("test case 46")).formatted(); +(s = _("test case 47")).formatted(); +s += (_("test case 48")).formatted(); +(s += _("test case 49")).formatted(); +return _("test case 50 a") + + _("test case 50 b").formatted() + + (_("test case 50 c")).formatted() + + foo(_("test case 50 d")).formatted() + + (b ? _("test case 50 e") : _("test case 50 f")).formatted() + + (b ? foo + _("test case 50 g") : _("test case 50 h") + bar).formatted(); EOF ${XGETTEXT} --omit-header --no-location \ @@ -31,18 +77,262 @@ msgstr "" msgid "Hello 2" msgstr "" -msgid "No error 1" +msgid "test case 1" msgstr "" #, java-printf-format -msgid "No error 2" +msgid "test case 2" +msgstr "" + +msgid "test case 3" +msgstr "" + +#, java-printf-format +msgid "test case 4" +msgstr "" + +msgid "test case 5" +msgstr "" + +#, java-printf-format +msgid "test case 6" +msgstr "" + +msgid "test case 10" +msgstr "" + +#, java-printf-format +msgid "test case 11" +msgstr "" + +#, java-printf-format +msgid "test case 12" +msgstr "" + +#, java-printf-format +msgid "test case 13" +msgstr "" + +msgid "test case 15" +msgstr "" + +msgid "test case 16" +msgstr "" + +msgid "test case 17" +msgstr "" + +msgid "test case 18" +msgstr "" + +#, java-printf-format +msgid "test case 19" +msgstr "" + +msgid "test case 20" +msgstr "" + +#, java-printf-format +msgid "test case 21" +msgstr "" + +msgid "test case 22" +msgstr "" + +msgid "test case 23" +msgstr "" + +#, java-printf-format +msgid "test case 24" +msgstr "" + +msgid "test case 25" +msgstr "" + +msgid "test case 26 a" +msgstr "" + +msgid "test case 26 b" +msgstr "" + +#, java-printf-format +msgid "test case 27 a" +msgstr "" + +msgid "test case 27 b" +msgstr "" + +msgid "test case 28 a" +msgstr "" + +#, java-printf-format +msgid "test case 28 b" +msgstr "" + +#, java-printf-format +msgid "test case 29 a" +msgstr "" + +#, java-printf-format +msgid "test case 29 b" +msgstr "" + +msgid "test case 30 a" +msgstr "" + +msgid "test case 30 b" +msgstr "" + +msgid "test case 31 a" +msgstr "" + +msgid "test case 31 b" +msgstr "" + +#, java-printf-format +msgid "test case 32 a" +msgstr "" + +msgid "test case 32 b" +msgstr "" + +msgid "test case 33 a" +msgstr "" + +#, java-printf-format +msgid "test case 33 b" +msgstr "" + +#, java-printf-format +msgid "test case 34 a" msgstr "" #, java-printf-format -msgid "No error 3" +msgid "test case 34 b" +msgstr "" + +msgid "test case 35 a" +msgstr "" + +msgid "test case 35 b" +msgstr "" + +msgid "test case 36 a" +msgstr "" + +#, java-printf-format +msgid "test case 36 b" +msgstr "" + +#, java-printf-format +msgid "test case 36 c" +msgstr "" + +msgid "test case 37 a" +msgstr "" + +#, java-printf-format +msgid "test case 37 b" +msgstr "" + +#, java-printf-format +msgid "test case 37 c" +msgstr "" + +msgid "test case 38 a" +msgstr "" + +#, java-printf-format +msgid "test case 38 b" +msgstr "" + +#, java-printf-format +msgid "test case 38 c" +msgstr "" + +msgid "test case 40 a" +msgstr "" + +msgid "test case 40 b" +msgstr "" + +#, java-printf-format +msgid "test case 41 a" +msgstr "" + +msgid "test case 41 b" +msgstr "" + +msgid "test case 42 a" +msgstr "" + +#, java-printf-format +msgid "test case 42 b" +msgstr "" + +#, java-printf-format +msgid "test case 43 a" +msgstr "" + +#, java-printf-format +msgid "test case 43 b" +msgstr "" + +msgid "test case 44 a" +msgstr "" + +msgid "test case 44 b" +msgstr "" + +#, java-printf-format +msgid "test case 45 a" +msgstr "" + +#, java-printf-format +msgid "test case 45 b" +msgstr "" + +#, java-printf-format +msgid "test case 46" +msgstr "" + +#, java-printf-format +msgid "test case 47" +msgstr "" + +#, java-printf-format +msgid "test case 48" +msgstr "" + +msgid "test case 49" +msgstr "" + +msgid "test case 50 a" +msgstr "" + +#, java-printf-format +msgid "test case 50 b" +msgstr "" + +#, java-printf-format +msgid "test case 50 c" +msgstr "" + +msgid "test case 50 d" +msgstr "" + +#, java-printf-format +msgid "test case 50 e" +msgstr "" + +#, java-printf-format +msgid "test case 50 f" +msgstr "" + +msgid "test case 50 g" msgstr "" -msgid "No error 4" +msgid "test case 50 h" msgstr "" EOF