From: Bruno Haible Date: Tue, 11 Feb 2025 10:37:40 +0000 (+0100) Subject: xgettext: Rust: Support named format string arguments. X-Git-Tag: v0.24~17 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=3cbdb13073fbc9eda5753a66ad4c03822722bf5f;p=thirdparty%2Fgettext.git xgettext: Rust: Support named format string arguments. Reported by Kévin Commaille in . * autogen.sh (GNULIB_MODULES_TOOLS_FOR_SRC): Add unictype/property-xid-start, unictype/property-xid-continue. (GNULIB_MODULES_LIBGETTEXTPO): Likewise. * gettext-tools/src/format-rust.c (struct named_arg): New type. (struct spec): Add fields named_arg_count, named. (named_arg_compare): New function. (format_parse): Recognize named arguments. (format_check): Search for differences also between the named arguments. (format_print): Print also the named arguments. * gettext-tools/tests/format-rust-1: Add test cases for named arguments. * gettext-tools/tests/format-rust-2: Likewise. * gettext-tools/doc/gettext.texi (rust-format): Describe rust-format in detail. --- diff --git a/autogen.sh b/autogen.sh index 421f87fd8..cd51773ca 100755 --- a/autogen.sh +++ b/autogen.sh @@ -259,6 +259,8 @@ if ! $skip_gnulib; then unictype/ctype-space unictype/property-white-space unictype/syntax-java-whitespace + unictype/property-xid-start + unictype/property-xid-continue unilbrk/ulc-width-linebreaks uniname/uniname uninorm/nfc @@ -401,6 +403,8 @@ if ! $skip_gnulib; then strnlen unictype/ctype-space unictype/property-white-space + unictype/property-xid-start + unictype/property-xid-continue unilbrk/ulc-width-linebreaks unistr/u8-mbtouc unistr/u8-mbtoucr diff --git a/gettext-tools/doc/gettext.texi b/gettext-tools/doc/gettext.texi index 446d7b11b..0fa7087ff 100644 --- a/gettext-tools/doc/gettext.texi +++ b/gettext-tools/doc/gettext.texi @@ -10105,6 +10105,44 @@ These are those supported by the @code{format!} built-in with the restrictions listed in @url{https://crates.io/crates/formatx}, section "Limitations". +A Rust format string consists of +@itemize @bullet +@item +an opening brace @samp{@{}, +@item +an optional non-empty sequence of digits or an optional identifier, +@item +optionally, a @samp{:} and a format specifier, +where a format specifier is of the form +@code{[[@var{fill}]@var{align}][@var{sign}][#][0][@var{minimumwidth}][.@var{precision}][@var{type}]} +where +@itemize - +@item +the @var{fill} character is any character, +@item +the @var{align} flag is one of @samp{<}, @samp{>}, @samp{^}, +@item +the @var{sign} is one of @samp{+}, @samp{-}, +@item +the # flag is @samp{#}, +@item +the 0 flag is @samp{0}, +@item +@var{minimumwidth} is a non-empty sequence of digits, +@item +@var{precision} is a non-empty sequence of digits, +@item +@var{type} is @samp{?}, +@end itemize +@item +optional white-space, +@item +a closing brace @samp{@}}. +@end itemize +@noindent +Brace characters @samp{@{} and @samp{@}} can be escaped by doubling them: +@samp{@{@{} and @samp{@}@}}. + @node ruby-format @subsection Ruby Format Strings diff --git a/gettext-tools/src/format-rust.c b/gettext-tools/src/format-rust.c index d478e8347..e3dbabaf9 100644 --- a/gettext-tools/src/format-rust.c +++ b/gettext-tools/src/format-rust.c @@ -46,7 +46,11 @@ A format string directive here consists of - an opening brace '{', - - an optional non-empty sequence of digits, + - an optional non-empty sequence of digits + or an optional identifier_or_keyword according to + + (that is, a XID_Start character and a sequence of XID_Continue characters + or an underscore '_' and a non-empty sequence of XID_Continue characters), - optionally, a ':' and a format specifier, where a format specifier is of the form [[fill]align][sign][#][0][minimumwidth][.precision][type] where @@ -66,7 +70,23 @@ Numbered ('{m}') and unnumbered ('{}') argument specifications cannot be used in the same string; that's unsupported (although it does not always lead to - an error at runtime, see ). */ + an error at runtime, see ). + + Named ('{name}') and unnamed ('{m}', '{}') argument specifications can be + used in the same string. In the formatx! arguments, all unnamed arguments + must come before all named arguments; but this is not a restriction for the + format string. + + In the 'format!' built-in, all arguments that are passed must be used by the + format string, but this is not a requirement for formatx!: + formatx!("{1} {1}", 44, 55) + formatx!("{}", 9, a = 47) + are valid (see ). */ + +struct named_arg +{ + char *name; +}; struct numbered_arg { @@ -77,7 +97,9 @@ struct numbered_arg struct spec { unsigned int directives; + unsigned int named_arg_count; unsigned int numbered_arg_count; + struct named_arg *named; struct numbered_arg *numbered; }; @@ -88,6 +110,13 @@ struct spec the token, and true is returned. Otherwise, FORMATP will be unchanged and false is returned. */ +static int +named_arg_compare (const void *p1, const void *p2) +{ + return strcmp (((const struct named_arg *) p1)->name, + ((const struct named_arg *) p2)->name); +} + static int numbered_arg_compare (const void *p1, const void *p2) { @@ -102,14 +131,18 @@ format_parse (const char *format, bool translated, char *fdi, char **invalid_reason) { struct spec spec; + unsigned int named_allocated; unsigned int numbered_allocated; bool seen_numbered_args; unsigned int unnumbered_arg_count; struct spec *result; spec.directives = 0; + spec.named_arg_count = 0; spec.numbered_arg_count = 0; + spec.named = NULL; spec.numbered = NULL; + named_allocated = 0; numbered_allocated = 0; seen_numbered_args = false; unnumbered_arg_count = 0; @@ -129,6 +162,7 @@ format_parse (const char *format, bool translated, char *fdi, else { const char *const format_start = format; + bool seen_identifier_or_keyword = false; unsigned int arg_id; if (c_isdigit (*format)) @@ -161,16 +195,67 @@ format_parse (const char *format, bool translated, char *fdi, } else { - /* Numbered and unnumbered specifications are exclusive. */ - if (seen_numbered_args > 0) + /* Try to parse an identifier_or_keyword (that is, + - a XID_Start character and a sequence of XID_Continue + characters + - or an underscore '_' and a non-empty sequence of + XID_Continue characters). */ + { + ucs4_t uc1; + int n1 = u8_mbtouc (&uc1, + (const uint8_t *) format, + strnlen (format, 4)); + if (n1 > 0 + && (uc_is_property_xid_start (uc1) || uc1 == '_')) + { + const char *name_start = format; + const char *f = format + n1; + + for (;;) + { + ucs4_t uc; + int n = u8_mbtouc (&uc, + (const uint8_t *) f, + strnlen (f, 4)); + if (n > 0 && uc_is_property_xid_continue (uc)) + f += n; + else + break; + } + if (uc1 != '_' || f > format + 1) + { + const char *name_end = f; + size_t n = name_end - name_start; + char *name = XNMALLOC (n + 1, char); + memcpy (name, name_start, n); + name[n] = '\0'; + + if (named_allocated == spec.named_arg_count) + { + named_allocated = 2 * named_allocated + 1; + spec.named = (struct named_arg *) xrealloc (spec.named, named_allocated * sizeof (struct named_arg)); + } + spec.named[spec.named_arg_count].name = name; + spec.named_arg_count++; + + format = f; + seen_identifier_or_keyword = true; + } + } + } + if (!seen_identifier_or_keyword) { - *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED (); - FDI_SET (format - 1, FMTDIR_ERROR); - goto bad_format; - } + /* Numbered and unnumbered specifications are exclusive. */ + if (seen_numbered_args > 0) + { + *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED (); + FDI_SET (format - 1, FMTDIR_ERROR); + goto bad_format; + } - arg_id = unnumbered_arg_count; - unnumbered_arg_count++; + arg_id = unnumbered_arg_count; + unnumbered_arg_count++; + } } c = *format; @@ -253,13 +338,16 @@ format_parse (const char *format, bool translated, char *fdi, spec.directives++; - if (numbered_allocated == spec.numbered_arg_count) + if (!seen_identifier_or_keyword) { - numbered_allocated = 2 * numbered_allocated + 1; - spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, numbered_allocated * sizeof (struct numbered_arg)); + if (numbered_allocated == spec.numbered_arg_count) + { + numbered_allocated = 2 * numbered_allocated + 1; + spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, numbered_allocated * sizeof (struct numbered_arg)); + } + spec.numbered[spec.numbered_arg_count].number = arg_id; + spec.numbered_arg_count++; } - spec.numbered[spec.numbered_arg_count].number = arg_id; - spec.numbered_arg_count++; FDI_SET (format, FMTDIR_END); } @@ -288,11 +376,39 @@ format_parse (const char *format, bool translated, char *fdi, spec.numbered_arg_count = j; } + /* Sort the named argument array, and eliminate duplicates. */ + if (spec.named_arg_count > 1) + { + unsigned int i, j; + + qsort (spec.named, spec.named_arg_count, sizeof (struct named_arg), + named_arg_compare); + + /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i. */ + for (i = j = 0; i < spec.named_arg_count; i++) + if (j > 0 && strcmp (spec.named[i].name, spec.named[j-1].name) == 0) + free (spec.named[i].name); + else + { + if (j < i) + spec.named[j].name = spec.named[i].name; + j++; + } + spec.named_arg_count = j; + } + result = XMALLOC (struct spec); *result = spec; return result; bad_format: + if (spec.named != NULL) + { + unsigned int i; + for (i = 0; i < spec.named_arg_count; i++) + free (spec.named[i].name); + free (spec.named); + } if (spec.numbered != NULL) free (spec.numbered); return NULL; @@ -303,6 +419,13 @@ format_free (void *descr) { struct spec *spec = (struct spec *) descr; + if (spec->named != NULL) + { + unsigned int i; + for (i = 0; i < spec->named_arg_count; i++) + free (spec->named[i].name); + free (spec->named); + } free (spec->numbered); free (spec); } @@ -324,6 +447,49 @@ format_check (void *msgid_descr, void *msgstr_descr, bool equality, struct spec *spec2 = (struct spec *) msgstr_descr; bool err = false; + if (spec1->named_arg_count + spec2->named_arg_count > 0) + { + unsigned int i, j; + unsigned int n1 = spec1->named_arg_count; + unsigned int n2 = spec2->named_arg_count; + + /* Check the argument names in spec2 are contained in those of spec1. + Both arrays are sorted. We search for the first difference. */ + for (i = 0, j = 0; i < n1 || j < n2; ) + { + int cmp = (i >= n1 ? 1 : + j >= n2 ? -1 : + strcmp (spec1->named[i].name, spec2->named[j].name)); + + if (cmp > 0) + { + if (error_logger) + error_logger (error_logger_data, + _("a format specification for argument '%s', as in '%s', doesn't exist in '%s'"), + spec2->named[j].name, pretty_msgstr, + pretty_msgid); + err = true; + break; + } + else if (cmp < 0) + { + if (equality) + { + if (error_logger) + error_logger (error_logger_data, + _("a format specification for argument '%s' doesn't exist in '%s'"), + spec1->named[i].name, pretty_msgstr); + err = true; + break; + } + else + i++; + } + else + j++, i++; + } + } + if (spec1->numbered_arg_count + spec2->numbered_arg_count > 0) { unsigned int i, j; @@ -409,7 +575,7 @@ format_print (void *descr) { struct spec *spec = (struct spec *) descr; unsigned int last; - unsigned int i; + unsigned int i, j; if (spec == NULL) { @@ -432,6 +598,12 @@ format_print (void *descr) printf ("*"); last = number + 1; } + for (j = 0; j < spec->named_arg_count; j++) + { + if (i > 0 || j > 0) + printf (" "); + printf ("%s", spec->named[j].name); + } printf (")"); } diff --git a/gettext-tools/tests/format-rust-1 b/gettext-tools/tests/format-rust-1 index 9e1d2ed06..34e1ed6f0 100755 --- a/gettext-tools/tests/format-rust-1 +++ b/gettext-tools/tests/format-rust-1 @@ -10,22 +10,46 @@ cat <<\EOF > f-rs-1.data "abc{{}}" # Valid: a numeric argument "abc{0}" -# Invalid: a named argument +# Valid: a named argument "abc{value}" # Valid: an omitted number "abc{}" # Invalid: unterminated directive "abc{1" +# Invalid: unterminated directive +"abc{v" +# Valid: a numeric argument and a named argument +"abc{0}def{value}" +# Valid: a numeric argument without number and a named argument +"abc{}def{value}" +# Valid: a named argument and a numeric argument +"abc{value}def{0}" +# Valid: a named argument and a numeric argument without number +"abc{value}def{}" +# Invalid: numeric arguments with and without number are unsupported +"abc{}def{1}" +# Invalid: numeric arguments with and without number are unsupported +"abc{1}def{}" # Valid: format specifier "abc{1:0}" # Valid: format specifier +"abc{value:0}" +# Valid: format specifier "abc{1:<<-#012.34}" +# Valid: format specifier +"abc{value:<<-#012.34}" # Invalid: conversion in format specifier "abc{1:<<-#012.34e}" +# Invalid: conversion in format specifier +"abc{value:<<-#012.34e}" # Invalid: empty precision "abc{1:8.}" +# Invalid: empty precision +"abc{value:8.}" # Invalid: invalid format specifier "abc{1:}" +# Invalid: invalid format specifier +"abc{value:}" EOF : ${XGETTEXT=xgettext} diff --git a/gettext-tools/tests/format-rust-2 b/gettext-tools/tests/format-rust-2 index bbeb74e51..55b54ac32 100755 --- a/gettext-tools/tests/format-rust-2 +++ b/gettext-tools/tests/format-rust-2 @@ -10,24 +10,42 @@ msgstr "xyz{1}{0}{2}" # Valid: permutation msgid "abc{2}{0}{1}def" msgstr "xyz{1}{0}{2}" +# Valid: permutation +msgid "abc{w}{u}{v}def" +msgstr "xyz{v}{u}{w}" +# Valid: permutation +msgid "abc{1}{u}{0}def" +msgstr "xyz{0}{u}{1}" # Invalid: missing argument msgid "abc{1}def{0}" msgstr "xyz{0}" # Invalid: missing argument msgid "abc{0}def{1}" msgstr "xyz{1}" +# Invalid: missing argument +msgid "abc{y}def{x}" +msgstr "xyz{x}" # Invalid: added argument msgid "abc{}def" msgstr "xyz{}uvw{}" # Invalid: added argument msgid "abc{0}def" msgstr "xyz{0}uvw{1}" +# Invalid: added argument +msgid "abc{x}def" +msgstr "xyz{x}uvw{y}" # Valid: multiple reuse of same argument msgid "{2} {0} {1}" msgstr "{1} {0} {2} {0}" +# Valid: multiple reuse of same argument +msgid "{w} {u} {v}" +msgstr "{v} {u} {w} {u}" # Valid: single reuse of same argument msgid "{1} {0} {2} {0}" msgstr "{2} {0} {1}" +# Valid: single reuse of same argument +msgid "{v} {u} {w} {u}" +msgstr "{w} {u} {v}" # Valid: "{{" is an escape of "{" msgid "abc{{{1}{2}" msgstr "{2}abc{1}"