From: Bruno Haible Date: Mon, 17 Oct 2005 10:22:28 +0000 (+0000) Subject: Improve strictness of plural form checking. X-Git-Tag: v0.15~362 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=d09bccebaf5e67085fc23a8126e9fe533f702a30;p=thirdparty%2Fgettext.git Improve strictness of plural form checking. --- diff --git a/NEWS b/NEWS index 23da5d3fe..844096da2 100644 --- a/NEWS +++ b/NEWS @@ -12,6 +12,20 @@ etc. that take a context argument. For more information, see the node "Contexts" in the manual. +* msgfmt's format string checking is now stricter in the presence of plural + forms. For example, in German, with nplurals=2 and plural=(n != 1), + the translation + + #, c-format + msgid "%d fatal error" + msgid_plural "%d fatal errors" + msgstr[0] "ein fataler Fehler" + msgstr[1] "fatale Fehler" + + was earlier considered valid and now gives an error when "msgfmt --check" + is used: + "number of format specifications in 'msgid' and 'msgstr[1]' does not match" + * Programming languages support: - Python: diff --git a/gettext-tools/src/ChangeLog b/gettext-tools/src/ChangeLog index 245cadeb3..78b2e58ab 100644 --- a/gettext-tools/src/ChangeLog +++ b/gettext-tools/src/ChangeLog @@ -1,3 +1,20 @@ +2005-10-05 Bruno Haible + + * format.h (check_msgid_msgstr_format): Add plural_distribution + argument. + * format.c (check_msgid_msgstr_format): Add plural_distribution + argument. Use it for index dependent strictness. + * msgl-check.h (check_message): Add plural_distribution argument. + * msgl-check.c: Include xalloc.h. + (check_plural_eval): Create a plural_distribution array as additional + output parameter. + (check_plural): Return plural_distribution array as additional output + parameter. + (check_pair, check_message): Add plural_distribution argument. + (check_message_list): Pass the plural_distribution from check_plural + to check_message. + * gettext-po.c (po_message_check_format): Update. + 2005-10-04 Bruno Haible Combine all msgfmt checks in a single place. diff --git a/gettext-tools/src/format.c b/gettext-tools/src/format.c index e232799ce..90dfd6ec6 100644 --- a/gettext-tools/src/format.c +++ b/gettext-tools/src/format.c @@ -58,11 +58,16 @@ struct formatstring_parser *formatstring_parsers[NFORMATS] = }; /* Check whether both formats strings contain compatible format - specifications. Return the number of errors that were seen. */ + specifications. + PLURAL_DISTRIBUTION is either NULL or an array of nplurals elements, + PLURAL_DISTRIBUTION[j] being true if the value j appears to be assumed + infinitely often by the plural formula. + Return the number of errors that were seen. */ int check_msgid_msgstr_format (const char *msgid, const char *msgid_plural, const char *msgstr, size_t msgstr_len, const enum is_format is_format[NFORMATS], + const unsigned char *plural_distribution, formatstring_error_logger_t error_logger) { int seen_errors = 0; @@ -99,15 +104,7 @@ check_msgid_msgstr_format (const char *msgid, const char *msgid_plural, { char buf[18+1]; const char *pretty_msgstr = "msgstr"; - /* Use strict checking (require same number of format directives - on both sides) if the message has no plurals, or if msgid_plural - exists but on the msgstr[] side there is only msgstr[0]. - Use relaxed checking when there are at least two msgstr[] forms. - We are too lazy to check which of the plural forms applies to - infinitely many values of N. */ bool has_plural_translations = (strlen (msgstr) + 1 < msgstr_len); - bool strict_checking = - (msgid_plural == NULL || !has_plural_translations); const char *p_end = msgstr + msgstr_len; const char *p; @@ -125,6 +122,20 @@ check_msgid_msgstr_format (const char *msgid, const char *msgid_plural, if (msgstr_descr != NULL) { + /* Use strict checking (require same number of format + directives on both sides) if the message has no plurals, + or if msgid_plural exists but on the msgstr[] side + there is only msgstr[0], or if plural_distribution[j] + indicates that the variant applies to infinitely many + values of N. + Use relaxed checking when there are at least two + msgstr[] forms and the plural_distribution array does + not give more precise information. */ + bool strict_checking = + (msgid_plural == NULL + || !has_plural_translations + || (plural_distribution != NULL && plural_distribution[j])); + if (parser->check (msgid_descr, msgstr_descr, strict_checking, error_logger, pretty_msgstr)) diff --git a/gettext-tools/src/format.h b/gettext-tools/src/format.h index 872dc37ac..8d6edbd69 100644 --- a/gettext-tools/src/format.h +++ b/gettext-tools/src/format.h @@ -107,11 +107,16 @@ extern void struct interval **intervalsp, size_t *lengthp); /* Check whether both formats strings contain compatible format - specifications. Return the number of errors that were seen. */ + specifications. + PLURAL_DISTRIBUTION is either NULL or an array of nplurals elements, + PLURAL_DISTRIBUTION[j] being true if the value j appears to be assumed + infinitely often by the plural formula. + Return the number of errors that were seen. */ extern int check_msgid_msgstr_format (const char *msgid, const char *msgid_plural, const char *msgstr, size_t msgstr_len, const enum is_format is_format[NFORMATS], + const unsigned char *plural_distribution, formatstring_error_logger_t error_logger); diff --git a/gettext-tools/src/gettext-po.c b/gettext-tools/src/gettext-po.c index 26e2a9ff6..b17ec5a94 100644 --- a/gettext-tools/src/gettext-po.c +++ b/gettext-tools/src/gettext-po.c @@ -1118,7 +1118,7 @@ po_message_check_format (po_message_t message, po_xerror_handler_t handler) handler->xerror2; if (!mp->obsolete) - check_message (mp, &mp->pos, 0, 1, 0, 0, 0, 0); + check_message (mp, &mp->pos, 0, 1, NULL, 0, 0, 0, 0); /* Restore error handler. */ po_xerror = textmode_xerror; @@ -1156,7 +1156,7 @@ po_message_check_format (po_message_t message, po_error_handler_t handler) check_msgid_msgstr_format (mp->msgid, mp->msgid_plural, mp->msgstr, mp->msgstr_len, - mp->is_format, po_error_logger); + mp->is_format, NULL, po_error_logger); /* Restore error handler. */ po_error = error; diff --git a/gettext-tools/src/msgl-check.c b/gettext-tools/src/msgl-check.c index 0a94c2425..3e12d5680 100644 --- a/gettext-tools/src/msgl-check.c +++ b/gettext-tools/src/msgl-check.c @@ -31,6 +31,7 @@ #include #include +#include "xalloc.h" #include "xerror.h" #include "po-xerror.h" #include "format.h" @@ -116,12 +117,29 @@ uninstall_sigfpe_handler () #endif } -/* Check the values returned by plural_eval. */ +/* Check the values returned by plural_eval. + Return the number of errors that were seen. + If no errors, returns in *PLURAL_DISTRIBUTION either NULL or an array + of length NPLURALS_VALUE describing which plural formula values appear + infinitely often. */ static int check_plural_eval (struct expression *plural_expr, unsigned long nplurals_value, - const message_ty *header) + const message_ty *header, + unsigned char **plural_distribution) { + /* Do as if the plural formula assumes a value N infinitely often if it + assumes it at least 5 times. */ +#define OFTEN 5 + unsigned char *distribution; + + /* Allocate a distribution array. */ + if (nplurals_value <= 100) + distribution = (unsigned char *) xcalloc (nplurals_value, 1); + else + /* nplurals_value is nonsense. Don't risk an out-of-memory. */ + distribution = NULL; + if (sigsetjmp (sigfpe_exit, 1) == 0) { unsigned long n; @@ -155,11 +173,24 @@ check_plural_eval (struct expression *plural_expr, free (msg); return 1; } + + if (distribution != NULL && distribution[val] < OFTEN) + distribution[val]++; } /* End of protection against arithmetic exceptions. */ uninstall_sigfpe_handler (); + /* Normalize the distribution[val] statistics. */ + if (distribution != NULL) + { + unsigned long val; + + for (val = 0; val < nplurals_value; val++) + distribution[val] = (distribution[val] == OFTEN ? 1 : 0); + } + *plural_distribution = distribution; + return 0; } else @@ -191,8 +222,13 @@ check_plural_eval (struct expression *plural_expr, } po_xerror (PO_SEVERITY_ERROR, header, NULL, 0, 0, false, msg); + + if (distribution != NULL) + free (distribution); + return 1; } +#undef OFTEN } @@ -228,9 +264,11 @@ plural_help (const char *nullentry) /* Perform plural expression checking. - Return the number of errors that were seen. */ + Return the number of errors that were seen. + If no errors, returns in *PLURAL_DISTRIBUTION either NULL or an array + describing which plural formula values appear infinitely often. */ static int -check_plural (message_list_ty *mlp) +check_plural (message_list_ty *mlp, unsigned char **plural_distribution) { int seen_errors = 0; const message_ty *has_plural; @@ -240,6 +278,7 @@ check_plural (message_list_ty *mlp) const message_ty *max_pos; size_t j; message_ty *header; + unsigned char *distribution = NULL; /* Determine whether mlp has plural entries. */ has_plural = NULL; @@ -398,7 +437,9 @@ check_plural (message_list_ty *mlp) /* See whether nplurals and plural fit together. */ if (!seen_errors) - seen_errors = check_plural_eval (plural_expr, nplurals_value, header); + seen_errors = + check_plural_eval (plural_expr, nplurals_value, header, + &distribution); /* Check the number of plurals of the translations. */ if (!seen_errors) @@ -448,6 +489,17 @@ check_plural (message_list_ty *mlp) seen_errors++; } + /* distribution is not needed if we report errors. + Also, if there was an error due to max_nplurals > nplurals_value, + we must not use distribution because we would be doing out-of-bounds + array accesses. */ + if (seen_errors > 0 && distribution != NULL) + { + free (distribution); + distribution = NULL; + } + *plural_distribution = distribution; + return seen_errors; } @@ -472,7 +524,10 @@ formatstring_error_logger (const char *format, ...) } -/* Perform miscellaneous checks on a message. */ +/* Perform miscellaneous checks on a message. + PLURAL_DISTRIBUTION is either NULL or an array of nplurals elements, + PLURAL_DISTRIBUTION[j] being true if the value j appears to be assumed + infinitely often by the plural formula. */ static int check_pair (const message_ty *mp, const char *msgid, @@ -481,7 +536,7 @@ check_pair (const message_ty *mp, const char *msgstr, size_t msgstr_len, const enum is_format is_format[NFORMATS], int check_newlines, - int check_format_strings, + int check_format_strings, const unsigned char *plural_distribution, int check_compatibility, int check_accelerators, char accelerator_char) { @@ -595,7 +650,8 @@ plural handling is a GNU gettext extension")); curr_msgid_pos = *msgid_pos; seen_errors += check_msgid_msgstr_format (msgid, msgid_plural, msgstr, msgstr_len, - is_format, formatstring_error_logger); + is_format, plural_distribution, + formatstring_error_logger); } if (check_accelerators && msgid_plural == NULL) @@ -717,12 +773,15 @@ some header fields still have the initial default value\n")); /* Perform all checks on a non-obsolete message. + PLURAL_DISTRIBUTION is either NULL or an array of nplurals elements, + PLURAL_DISTRIBUTION[j] being true if the value j appears to be assumed + infinitely often by the plural formula. Return the number of errors that were seen. */ int check_message (const message_ty *mp, const lex_pos_ty *msgid_pos, int check_newlines, - int check_format_strings, + int check_format_strings, const unsigned char *plural_distribution, int check_header, int check_compatibility, int check_accelerators, char accelerator_char) @@ -734,7 +793,9 @@ check_message (const message_ty *mp, mp->msgid, msgid_pos, mp->msgid_plural, mp->msgstr, mp->msgstr_len, mp->is_format, - check_newlines, check_format_strings, check_compatibility, + check_newlines, + check_format_strings, plural_distribution, + check_compatibility, check_accelerators, accelerator_char); } @@ -750,10 +811,11 @@ check_message_list (message_list_ty *mlp, int check_accelerators, char accelerator_char) { int seen_errors = 0; + unsigned char *plural_distribution = NULL; size_t j; if (check_header) - seen_errors += check_plural (mlp); + seen_errors += check_plural (mlp, &plural_distribution); for (j = 0; j < mlp->nitems; j++) { @@ -761,7 +823,8 @@ check_message_list (message_list_ty *mlp, if (!mp->obsolete) seen_errors += check_message (mp, &mp->pos, - check_newlines, check_format_strings, + check_newlines, + check_format_strings, plural_distribution, check_header, check_compatibility, check_accelerators, accelerator_char); } diff --git a/gettext-tools/src/msgl-check.h b/gettext-tools/src/msgl-check.h index 612d33c77..acc7f65ee 100644 --- a/gettext-tools/src/msgl-check.h +++ b/gettext-tools/src/msgl-check.h @@ -29,11 +29,14 @@ extern "C" { /* Perform all checks on a non-obsolete message. + PLURAL_DISTRIBUTION is either NULL or an array of nplurals elements, + PLURAL_DISTRIBUTION[j] being true if the value j appears to be assumed + infinitely often by the plural formula. Return the number of errors that were seen. */ extern int check_message (const message_ty *mp, const lex_pos_ty *msgid_pos, int check_newlines, - int check_format_strings, + int check_format_strings, const unsigned char *plural_distribution, int check_header, int check_compatibility, int check_accelerators, char accelerator_char); diff --git a/gettext-tools/tests/ChangeLog b/gettext-tools/tests/ChangeLog index b2b0d6350..b11453622 100644 --- a/gettext-tools/tests/ChangeLog +++ b/gettext-tools/tests/ChangeLog @@ -1,3 +1,8 @@ +2005-10-05 Bruno Haible + + * msgfmt-15: New file. + * Makefile.am (TESTS): Add it. + 2005-10-04 Bruno Haible * msgfmt-10: Change expected error message, to match new line numbers. diff --git a/gettext-tools/tests/Makefile.am b/gettext-tools/tests/Makefile.am index 7bc98c5bf..d4f609c59 100644 --- a/gettext-tools/tests/Makefile.am +++ b/gettext-tools/tests/Makefile.am @@ -40,6 +40,7 @@ TESTS = gettext-1 gettext-2 gettext-3 gettext-4 gettext-5 gettext-6 gettext-7 \ msgfilter-1 msgfilter-2 msgfilter-3 msgfilter-4 \ msgfmt-1 msgfmt-2 msgfmt-3 msgfmt-4 msgfmt-5 msgfmt-6 msgfmt-7 \ msgfmt-8 msgfmt-9 msgfmt-10 msgfmt-11 msgfmt-12 msgfmt-13 msgfmt-14 \ + msgfmt-15 \ msgfmt-properties-1 \ msgfmt-qt-1 \ msggrep-1 msggrep-2 msggrep-3 msggrep-4 msggrep-5 msggrep-6 msggrep-7 \