From: Bruno Haible Date: Mon, 13 Feb 2006 13:12:08 +0000 (+0000) Subject: Parser for Boost format strings. X-Git-Tag: v0.15~315 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=556f3b9c2018fffe76c8a36fcd5569ec7ebda06b;p=thirdparty%2Fgettext.git Parser for Boost format strings. --- diff --git a/gettext-tools/src/format-boost.c b/gettext-tools/src/format-boost.c new file mode 100644 index 000000000..ccf4f6101 --- /dev/null +++ b/gettext-tools/src/format-boost.c @@ -0,0 +1,743 @@ +/* Boost format strings. + Copyright (C) 2001-2004, 2006 Free Software Foundation, Inc. + Written by Bruno Haible , 2006. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + +#ifdef HAVE_CONFIG_H +# include +#endif + +#include +#include + +#include "format.h" +#include "c-ctype.h" +#include "xalloc.h" +#include "xerror.h" +#include "format-invalid.h" +#include "gettext.h" + +#define _(str) gettext (str) + +/* Boost format strings are described in + boost_1_33_1/libs/format/doc/format.html + and implemented in + boost_1_33_1/boost/format/parsing.hpp. + A directive (other than '%%') + - starts with '%' or '%|'; in the latter case it must end in '|', + - is continued either by + - 'm%' where m is a positive integer, starting with a nonzero digit; + in this case the directive must not have started with '%|'; or + - the following: + - optional: 'm$' where m is a positive integer, starting with a + nonzero digit, + - optional: any of the characters '#', '0', '-', ' ', '+', "'", + '_', '=', 'h', 'l', + - optional: a width specification: '*' (reads an argument) or '*m$' + or a nonempty digit sequence, + - optional: a '.' and a precision specification: '*' (reads an + argument) or '*m$' or a nonempty digit sequence, + - optional: any of the characters 'h', 'l', 'L', + - if the directive started with '%|': + an optional specifier and a final '|', + otherwise + a mandatory specifier. + If no specifier is given, it needs an argument of any type. + The possible specifiers are: + - 'c', 'C', that need a character argument, + - 's', 'S', that need an argument of any type, + - 'i', 'd', 'o', 'u', 'x', 'X', that need an integer argument, + - 'e', 'E', 'f', 'g', 'G', that need a floating-point argument, + - 'p', that needs a 'void *' argument, + - 't', that doesn't need an argument, + - 'TX', where X is any character, that doesn't need an argument, + - 'n', that needs a pointer to integer. + The Boost format string interpreter doesn't actually care about + the argument types, but we do, because it increases the likelihood + of detecting translator mistakes. + Numbered ('%m%' or '%m$' or '*m$') and unnumbered argument specifications + cannot be used in the same string. + */ + +enum format_arg_type +{ + FAT_NONE = 0, + /* Basic types */ + FAT_INTEGER = 1, + FAT_DOUBLE = 2, + FAT_CHAR = 3, + FAT_POINTER = 4, + FAT_ANY = 5 +}; + +struct numbered_arg +{ + unsigned int number; + enum format_arg_type type; +}; + +struct spec +{ + unsigned int directives; + unsigned int numbered_arg_count; + unsigned int allocated; + struct numbered_arg *numbered; +}; + +/* Locale independent test for a decimal digit. + Argument can be 'char' or 'unsigned char'. (Whereas the argument of + isdigit must be an 'unsigned char'.) */ +#undef isdigit +#define isdigit(c) ((unsigned int) ((c) - '0') < 10) + + +static int +numbered_arg_compare (const void *p1, const void *p2) +{ + unsigned int n1 = ((const struct numbered_arg *) p1)->number; + unsigned int n2 = ((const struct numbered_arg *) p2)->number; + + return (n1 > n2 ? 1 : n1 < n2 ? -1 : 0); +} + +static void * +format_parse (const char *format, bool translated, char **invalid_reason) +{ + struct spec spec; + unsigned int unnumbered_arg_count; + struct spec *result; + + spec.directives = 0; + spec.numbered_arg_count = 0; + spec.allocated = 0; + spec.numbered = NULL; + unnumbered_arg_count = 0; + + for (; *format != '\0';) + if (*format++ == '%') + { + /* A directive. */ + spec.directives++; + + if (*format == '%') + format++; + else + { + bool brackets = false; + bool done = false; + unsigned int number = 0; + enum format_arg_type type = FAT_NONE; + + if (*format == '|') + { + format++; + brackets = true; + } + + if (isdigit (*format) && *format != '0') + { + const char *f = format; + unsigned int m = 0; + + do + { + m = 10 * m + (*f - '0'); + f++; + } + while (isdigit (*f)); + + if ((!brackets && *f == '%') || *f == '$') + { + if (m == 0) /* can happen if m overflows */ + { + *invalid_reason = INVALID_ARGNO_0 (spec.directives); + goto bad_format; + } + number = m; + if (*f == '%') + { + type = FAT_ANY; + done = true; + } + format = ++f; + } + } + + if (!done) + { + /* Parse flags. */ + for (;;) + { + if (*format == ' ' || *format == '+' || *format == '-' + || *format == '#' || *format == '0' || *format == '\'' + || *format == '_' || *format == '=' || *format == 'h' + || *format == 'l') + format++; + else + break; + } + + /* Parse width. */ + if (*format == '*') + { + unsigned int width_number = 0; + + format++; + + if (isdigit (*format)) + { + const char *f = format; + unsigned int m = 0; + + do + { + m = 10 * m + (*f - '0'); + f++; + } + while (isdigit (*f)); + + if (*f == '$') + { + if (m == 0) + { + *invalid_reason = + INVALID_WIDTH_ARGNO_0 (spec.directives); + goto bad_format; + } + width_number = m; + format = ++f; + } + } + + if (width_number) + { + /* Numbered argument. */ + + /* Numbered and unnumbered specifications are + exclusive. */ + if (unnumbered_arg_count > 0) + { + *invalid_reason = + INVALID_MIXES_NUMBERED_UNNUMBERED (); + goto bad_format; + } + + if (spec.allocated == spec.numbered_arg_count) + { + spec.allocated = 2 * spec.allocated + 1; + spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg)); + } + spec.numbered[spec.numbered_arg_count].number = width_number; + spec.numbered[spec.numbered_arg_count].type = FAT_INTEGER; + spec.numbered_arg_count++; + } + else + { + /* Unnumbered argument. */ + + /* Numbered and unnumbered specifications are + exclusive. */ + if (spec.numbered_arg_count > 0) + { + *invalid_reason = + INVALID_MIXES_NUMBERED_UNNUMBERED (); + goto bad_format; + } + + if (spec.allocated == unnumbered_arg_count) + { + spec.allocated = 2 * spec.allocated + 1; + spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg)); + } + spec.numbered[unnumbered_arg_count].number = unnumbered_arg_count + 1; + spec.numbered[unnumbered_arg_count].type = FAT_INTEGER; + unnumbered_arg_count++; + } + } + else if (isdigit (*format)) + { + do format++; while (isdigit (*format)); + } + + /* Parse precision. */ + if (*format == '.') + { + format++; + + if (*format == '*') + { + unsigned int precision_number = 0; + + format++; + + if (isdigit (*format)) + { + const char *f = format; + unsigned int m = 0; + + do + { + m = 10 * m + (*f - '0'); + f++; + } + while (isdigit (*f)); + + if (*f == '$') + { + if (m == 0) + { + *invalid_reason = + INVALID_PRECISION_ARGNO_0 (spec.directives); + goto bad_format; + } + precision_number = m; + format = ++f; + } + } + + if (precision_number) + { + /* Numbered argument. */ + + /* Numbered and unnumbered specifications are + exclusive. */ + if (unnumbered_arg_count > 0) + { + *invalid_reason = + INVALID_MIXES_NUMBERED_UNNUMBERED (); + goto bad_format; + } + + if (spec.allocated == spec.numbered_arg_count) + { + spec.allocated = 2 * spec.allocated + 1; + spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg)); + } + spec.numbered[spec.numbered_arg_count].number = precision_number; + spec.numbered[spec.numbered_arg_count].type = FAT_INTEGER; + spec.numbered_arg_count++; + } + else + { + /* Unnumbered argument. */ + + /* Numbered and unnumbered specifications are + exclusive. */ + if (spec.numbered_arg_count > 0) + { + *invalid_reason = + INVALID_MIXES_NUMBERED_UNNUMBERED (); + goto bad_format; + } + + if (spec.allocated == unnumbered_arg_count) + { + spec.allocated = 2 * spec.allocated + 1; + spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg)); + } + spec.numbered[unnumbered_arg_count].number = unnumbered_arg_count + 1; + spec.numbered[unnumbered_arg_count].type = FAT_INTEGER; + unnumbered_arg_count++; + } + } + else if (isdigit (*format)) + { + do format++; while (isdigit (*format)); + } + } + + /* Parse size. */ + for (;;) + { + if (*format == 'h' || *format == 'l' || *format == 'L') + format++; + else + break; + } + + switch (*format++) + { + case 'c': case 'C': + type = FAT_CHAR; + break; + case 's': case 'S': + type = FAT_ANY; + break; + case 'i': case 'd': case 'o': case 'u': case 'x': case 'X': + type = FAT_INTEGER; + break; + case 'e': case 'E': case 'f': case 'g': case 'G': + type = FAT_DOUBLE; + break; + case 'p': + type = FAT_POINTER; + break; + case 't': + type = FAT_NONE; + break; + case 'T': + if (*format == '\0') + { + *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE (); + goto bad_format; + } + format++; + type = FAT_NONE; + break; + case 'n': + type = FAT_NONE; + break; + case '|': + if (brackets) + { + --format; + type = FAT_ANY; + break; + } + /*FALLTHROUGH*/ + default: + --format; + *invalid_reason = + (*format == '\0' + ? INVALID_UNTERMINATED_DIRECTIVE () + : INVALID_CONVERSION_SPECIFIER (spec.directives, + *format)); + goto bad_format; + } + if (brackets) + { + if (*format != '|') + { + *invalid_reason = + (*format == '\0' + ? INVALID_UNTERMINATED_DIRECTIVE () + : xasprintf (_("The directive number %u starts with | but does not end with |."), + spec.directives)); + goto bad_format; + } + format++; + } + } + + if (type != FAT_NONE) + { + if (number) + { + /* Numbered argument. */ + + /* Numbered and unnumbered specifications are exclusive. */ + if (unnumbered_arg_count > 0) + { + *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED (); + goto bad_format; + } + + if (spec.allocated == spec.numbered_arg_count) + { + spec.allocated = 2 * spec.allocated + 1; + spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg)); + } + spec.numbered[spec.numbered_arg_count].number = number; + spec.numbered[spec.numbered_arg_count].type = type; + spec.numbered_arg_count++; + } + else + { + /* Unnumbered argument. */ + + /* Numbered and unnumbered specifications are exclusive. */ + if (spec.numbered_arg_count > 0) + { + *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED (); + goto bad_format; + } + + if (spec.allocated == unnumbered_arg_count) + { + spec.allocated = 2 * spec.allocated + 1; + spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg)); + } + spec.numbered[unnumbered_arg_count].number = unnumbered_arg_count + 1; + spec.numbered[unnumbered_arg_count].type = type; + unnumbered_arg_count++; + } + } + } + } + + /* Convert the unnumbered argument array to numbered arguments. */ + if (unnumbered_arg_count > 0) + spec.numbered_arg_count = unnumbered_arg_count; + /* Sort the numbered argument array, and eliminate duplicates. */ + else if (spec.numbered_arg_count > 1) + { + unsigned int i, j; + bool err; + + qsort (spec.numbered, spec.numbered_arg_count, + sizeof (struct numbered_arg), numbered_arg_compare); + + /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i. */ + err = false; + for (i = j = 0; i < spec.numbered_arg_count; i++) + if (j > 0 && spec.numbered[i].number == spec.numbered[j-1].number) + { + enum format_arg_type type1 = spec.numbered[i].type; + enum format_arg_type type2 = spec.numbered[j-1].type; + enum format_arg_type type_both; + + if (type1 == type2 || type2 == FAT_ANY) + type_both = type1; + else if (type1 == FAT_ANY) + type_both = type2; + else + { + /* Incompatible types. */ + type_both = FAT_NONE; + if (!err) + *invalid_reason = + INVALID_INCOMPATIBLE_ARG_TYPES (spec.numbered[i].number); + err = true; + } + + spec.numbered[j-1].type = type_both; + } + else + { + if (j < i) + { + spec.numbered[j].number = spec.numbered[i].number; + spec.numbered[j].type = spec.numbered[i].type; + } + j++; + } + spec.numbered_arg_count = j; + if (err) + /* *invalid_reason has already been set above. */ + goto bad_format; + } + + result = (struct spec *) xmalloc (sizeof (struct spec)); + *result = spec; + return result; + + bad_format: + if (spec.numbered != NULL) + free (spec.numbered); + return NULL; +} + +static void +format_free (void *descr) +{ + struct spec *spec = (struct spec *) descr; + + if (spec->numbered != NULL) + free (spec->numbered); + free (spec); +} + +static int +format_get_number_of_directives (void *descr) +{ + struct spec *spec = (struct spec *) descr; + + return spec->directives; +} + +static bool +format_check (void *msgid_descr, void *msgstr_descr, bool equality, + formatstring_error_logger_t error_logger, + const char *pretty_msgstr) +{ + struct spec *spec1 = (struct spec *) msgid_descr; + struct spec *spec2 = (struct spec *) msgstr_descr; + bool err = false; + + if (spec1->numbered_arg_count + spec2->numbered_arg_count > 0) + { + unsigned int i, j; + unsigned int n1 = spec1->numbered_arg_count; + unsigned int n2 = spec2->numbered_arg_count; + + /* Check the argument names are the same. + Both arrays are sorted. We search for the first difference. */ + for (i = 0, j = 0; i < n1 || j < n2; ) + { + int cmp = (i >= n1 ? 1 : + j >= n2 ? -1 : + spec1->numbered[i].number > spec2->numbered[j].number ? 1 : + spec1->numbered[i].number < spec2->numbered[j].number ? -1 : + 0); + + if (cmp > 0) + { + if (error_logger) + error_logger (_("a format specification for argument %u, as in '%s', doesn't exist in 'msgid'"), + spec2->numbered[j].number, pretty_msgstr); + err = true; + break; + } + else if (cmp < 0) + { + if (equality) + { + if (error_logger) + error_logger (_("a format specification for argument %u doesn't exist in '%s'"), + spec1->numbered[i].number, pretty_msgstr); + err = true; + break; + } + else + i++; + } + else + j++, i++; + } + /* Check the argument types are the same. */ + if (!err) + for (i = 0, j = 0; j < n2; ) + { + if (spec1->numbered[i].number == spec2->numbered[j].number) + { + if (spec1->numbered[i].type != spec2->numbered[j].type) + { + if (error_logger) + error_logger (_("format specifications in 'msgid' and '%s' for argument %u are not the same"), + pretty_msgstr, spec2->numbered[j].number); + err = true; + break; + } + j++, i++; + } + else + i++; + } + } + + return err; +} + + +struct formatstring_parser formatstring_boost = +{ + format_parse, + format_free, + format_get_number_of_directives, + NULL, + format_check +}; + + +#ifdef TEST + +/* Test program: Print the argument list specification returned by + format_parse for strings read from standard input. */ + +#include +#include "getline.h" + +static void +format_print (void *descr) +{ + struct spec *spec = (struct spec *) descr; + unsigned int last; + unsigned int i; + + if (spec == NULL) + { + printf ("INVALID"); + return; + } + + printf ("("); + last = 1; + for (i = 0; i < spec->numbered_arg_count; i++) + { + unsigned int number = spec->numbered[i].number; + + if (i > 0) + printf (" "); + if (number < last) + abort (); + for (; last < number; last++) + printf ("_ "); + switch (spec->numbered[i].type) + { + case FAT_INTEGER: + printf ("i"); + break; + case FAT_DOUBLE: + printf ("f"); + break; + case FAT_CHAR: + printf ("c"); + break; + case FAT_POINTER: + printf ("p"); + break; + case FAT_ANY: + printf ("*"); + break; + default: + abort (); + } + last = number + 1; + } + printf (")"); +} + +int +main () +{ + for (;;) + { + char *line = NULL; + size_t line_size = 0; + int line_len; + char *invalid_reason; + void *descr; + + line_len = getline (&line, &line_size, stdin); + if (line_len < 0) + break; + if (line_len > 0 && line[line_len - 1] == '\n') + line[--line_len] = '\0'; + + invalid_reason = NULL; + descr = format_parse (line, false, &invalid_reason); + + format_print (descr); + printf ("\n"); + if (descr == NULL) + printf ("%s\n", invalid_reason); + + free (invalid_reason); + free (line); + } + + return 0; +} + +/* + * For Emacs M-x compile + * Local Variables: + * compile-command: "/bin/sh ../libtool --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../lib -I../intl -DHAVE_CONFIG_H -DTEST format-boost.c ../lib/libgettextlib.la" + * End: + */ + +#endif /* TEST */ +