From: Bruno Haible Date: Mon, 27 Aug 2001 12:05:00 +0000 (+0000) Subject: Python format string checking. X-Git-Tag: v0.11~532 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=308605e3046d0700095258392743a7acc79bdf5b;p=thirdparty%2Fgettext.git Python format string checking. --- diff --git a/src/format-python.c b/src/format-python.c new file mode 100644 index 000000000..6840f0394 --- /dev/null +++ b/src/format-python.c @@ -0,0 +1,610 @@ +/* Python format strings. + Copyright (C) 2001 Free Software Foundation, Inc. + Written by Bruno Haible , 2001. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + +#ifdef HAVE_CONFIG_H +# include +#endif + +#include +#include +#include + +#include "format.h" +#include "system.h" +#include "error.h" +#include "progname.h" +#include "libgettext.h" + +#define _(str) gettext (str) + +/* Python format strings are described in + Python Library reference + 2. Built-in Types, Exceptions and Functions + 2.1. Built-in Types + 2.1.5. Sequence Types + 2.1.5.2. String Formatting Operations + Any string or Unicode string can act as format string via the '%' operator, + implemented in stringobject.c and unicodeobject.c. + A directive + - starts with '%' + - is optionally followed by '(ident)' where ident is any sequence of + characters with balanced left and right parentheses, + - is optionally followed by any of the characters '-' (left justification), + '+' (sign), ' ' (blank), '#' (alt), '0' (zero), each of which acts as a + flag, + - is optionally followed by a width specification: '*' (reads an argument) + or a nonempty digit sequence, + - is optionally followed by '.' and a precision specification: '*' (reads + an argument) or a nonempty digit sequence, + - is optionally followed by a size specifier, one of 'h' 'l' 'L'. + - is finished by a specifier + - '%', that needs no argument, + - 'c', that needs a character argument, + - 's', 'r', that need a string argument, + - 'i', 'd', 'u', 'o', 'x', 'X', that need an integer argument, + - 'e', 'E', 'f', 'g', 'G', that need a floating-point argument. + Use of '(ident)' and use of unnamed argument specifications are exclusive, + because the first requires a mapping as argument, while the second requires + a tuple as argument. + */ + +enum format_arg_type +{ + FAT_NONE, + FAT_ANY, + FAT_CHARACTER, + FAT_STRING, + FAT_INTEGER, + FAT_FLOAT +}; + +struct named_arg +{ + char *name; + enum format_arg_type type; +}; + +struct unnamed_arg +{ + enum format_arg_type type; +}; + +struct spec +{ + unsigned int directives; + unsigned int named_arg_count; + unsigned int unnamed_arg_count; + unsigned int allocated; + struct named_arg *named; + struct unnamed_arg *unnamed; +}; + +/* Locale independent test for a decimal digit. + Argument can be 'char' or 'unsigned char'. (Whereas the argument of + isdigit must be an 'unsigned char'.) */ +#undef isdigit +#define isdigit(c) ((unsigned int) ((c) - '0') < 10) + + +/* Prototypes for local functions. Needed to ensure compiler checking of + function argument counts despite of K&R C function definition syntax. */ +static int named_arg_compare PARAMS ((const void *p1, const void *p2)); +static void *format_parse PARAMS ((const char *format)); +static void format_free PARAMS ((void *descr)); +static int format_get_number_of_directives PARAMS ((void *descr)); +static bool format_check PARAMS ((const lex_pos_ty *pos, + void *msgid_descr, void *msgstr_descr)); + + +static int +named_arg_compare (p1, p2) + const void *p1; + const void *p2; +{ + return strcmp (((const struct named_arg *) p1)->name, + ((const struct named_arg *) p2)->name); +} + +static void * +format_parse (format) + const char *format; +{ + struct spec spec; + struct spec *result; + + spec.directives = 0; + spec.named_arg_count = 0; + spec.unnamed_arg_count = 0; + spec.allocated = 0; + spec.named = NULL; + spec.unnamed = NULL; + + for (; *format != '\0';) + if (*format++ == '%') + { + /* A directive. */ + char *name = NULL; + enum format_arg_type type; + + spec.directives++; + + if (*format == '(') + { + unsigned int depth; + const char *name_start; + const char *name_end; + size_t n; + + name_start = ++format; + depth = 0; + for (; *format != '\0'; format++) + { + if (*format == '(') + depth++; + else if (*format == ')') + { + if (depth == 0) + break; + else + depth--; + } + } + if (*format == '\0') + goto bad_format; + name_end = format++; + + n = name_end - name_start; + name = (char *) xmalloc (n + 1); + memcpy (name, name_start, n); + name[n] = '\0'; + } + + while (*format == '-' || *format == '+' || *format == ' ' + || *format == '#' || *format == '0') + format++; + + if (*format == '*') + { + format++; + + /* Named and unnamed specifications are exclusive. */ + if (spec.named_arg_count > 0) + goto bad_format; + + if (spec.allocated == spec.unnamed_arg_count) + { + spec.allocated = 2 * spec.allocated + 1; + spec.unnamed = (struct unnamed_arg *) xrealloc (spec.unnamed, spec.allocated * sizeof (struct unnamed_arg)); + } + spec.unnamed[spec.unnamed_arg_count].type = FAT_INTEGER; + spec.unnamed_arg_count++; + } + else if (isdigit (*format)) + { + do format++; while (isdigit (*format)); + } + + if (*format == '.') + { + format++; + + if (*format == '*') + { + format++; + + /* Named and unnamed specifications are exclusive. */ + if (spec.named_arg_count > 0) + goto bad_format; + + if (spec.allocated == spec.unnamed_arg_count) + { + spec.allocated = 2 * spec.allocated + 1; + spec.unnamed = (struct unnamed_arg *) xrealloc (spec.unnamed, spec.allocated * sizeof (struct unnamed_arg)); + } + spec.unnamed[spec.unnamed_arg_count].type = FAT_INTEGER; + spec.unnamed_arg_count++; + } + else if (isdigit (*format)) + { + do format++; while (isdigit (*format)); + } + } + + if (*format == 'h' || *format == 'l' || *format == 'L') + format++; + + switch (*format) + { + case '%': + type = FAT_ANY; + break; + case 'c': + type = FAT_CHARACTER; + break; + case 's': case 'r': + type = FAT_STRING; + break; + case 'i': case 'd': case 'u': case 'o': case 'x': case 'X': + type = FAT_INTEGER; + break; + case 'e': case 'E': case 'f': case 'g': case 'G': + type = FAT_FLOAT; + break; + default: + goto bad_format; + } + + if (name != NULL) + { + /* Named argument. */ + + /* Named and unnamed specifications are exclusive. */ + if (spec.unnamed_arg_count > 0) + goto bad_format; + + if (spec.allocated == spec.named_arg_count) + { + spec.allocated = 2 * spec.allocated + 1; + spec.named = (struct named_arg *) xrealloc (spec.named, spec.allocated * sizeof (struct named_arg)); + } + spec.named[spec.named_arg_count].name = name; + spec.named[spec.named_arg_count].type = type; + spec.named_arg_count++; + } + else if (*format != '%') + { + /* Unnamed argument. */ + + /* Named and unnamed specifications are exclusive. */ + if (spec.named_arg_count > 0) + goto bad_format; + + if (spec.allocated == spec.unnamed_arg_count) + { + spec.allocated = 2 * spec.allocated + 1; + spec.unnamed = (struct unnamed_arg *) xrealloc (spec.unnamed, spec.allocated * sizeof (struct unnamed_arg)); + } + spec.unnamed[spec.unnamed_arg_count].type = type; + spec.unnamed_arg_count++; + } + + format++; + } + + /* Sort the named argument array, and eliminate duplicates. */ + if (spec.named_arg_count > 1) + { + unsigned int i, j; + bool err; + + qsort (spec.named, spec.named_arg_count, sizeof (struct named_arg), + named_arg_compare); + + /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i. */ + err = false; + for (i = j = 0; i < spec.named_arg_count; i++) + if (j > 0 && strcmp (spec.named[i].name, spec.named[j-1].name) == 0) + { + enum format_arg_type type1 = spec.named[i].type; + enum format_arg_type type2 = spec.named[j-1].type; + enum format_arg_type type_both; + + if (type1 == type2 || type2 == FAT_ANY) + type_both = type1; + else if (type1 == FAT_ANY) + type_both = type2; + else + /* Incompatible types. */ + type_both = FAT_NONE, err = true; + + spec.named[j-1].type = type_both; + free (spec.named[i].name); + } + else + { + if (j < i) + { + spec.named[j].name = spec.named[i].name; + spec.named[j].type = spec.named[i].type; + } + j++; + } + spec.named_arg_count = j; + if (err) + goto bad_format; + } + + result = (struct spec *) xmalloc (sizeof (struct spec)); + *result = spec; + return result; + + bad_format: + if (spec.named != NULL) + { + unsigned int i; + for (i = 0; i < spec.named_arg_count; i++) + free (spec.named[i].name); + free (spec.named); + } + if (spec.unnamed != NULL) + free (spec.unnamed); + return NULL; +} + +static void +format_free (descr) + void *descr; +{ + struct spec *spec = (struct spec *) descr; + + if (spec->named != NULL) + { + unsigned int i; + for (i = 0; i < spec->named_arg_count; i++) + free (spec->named[i].name); + free (spec->named); + } + if (spec->unnamed != NULL) + free (spec->unnamed); + free (spec); +} + +static int +format_get_number_of_directives (descr) + void *descr; +{ + struct spec *spec = (struct spec *) descr; + + return spec->directives; +} + +static bool +format_check (pos, msgid_descr, msgstr_descr) + const lex_pos_ty *pos; + void *msgid_descr; + void *msgstr_descr; +{ + struct spec *spec1 = (struct spec *) msgid_descr; + struct spec *spec2 = (struct spec *) msgstr_descr; + bool err = false; + + if (spec1->named_arg_count > 0 && spec2->unnamed_arg_count > 0) + { + error_with_progname = false; + error_at_line (0, 0, pos->file_name, pos->line_number, + _("format specifications in 'msgid' expect a mapping, those in 'msgstr' expect a tuple")); + error_with_progname = true; + err = true; + } + else if (spec1->unnamed_arg_count > 0 && spec2->named_arg_count > 0) + { + error_with_progname = false; + error_at_line (0, 0, pos->file_name, pos->line_number, + _("format specifications in 'msgid' expect a tuple, those in 'msgstr' expect a mapping")); + error_with_progname = true; + err = true; + } + else + { + if (spec1->named_arg_count + spec2->named_arg_count > 0) + { + unsigned int i; + unsigned int n = MAX (spec1->named_arg_count, spec2->named_arg_count); + + /* Check the argument names are the same. + Both arrays are sorted. We search for the first difference. */ + for (i = 0; i < n; i++) + { + int cmp = (i >= spec1->named_arg_count ? 1 : + i >= spec2->named_arg_count ? -1 : + strcmp (spec1->named[i].name, spec2->named[i].name)); + + if (cmp > 0) + { + error_with_progname = false; + error_at_line (0, 0, pos->file_name, pos->line_number, + _("a format specification for argument '%s' doesn't exist in 'msgid'"), + spec2->named[i].name); + error_with_progname = true; + err = true; + break; + } + else if (cmp < 0) + { + error_with_progname = false; + error_at_line (0, 0, pos->file_name, pos->line_number, + _("a format specification for argument '%s' doesn't exist in 'msgstr'"), + spec1->named[i].name); + error_with_progname = true; + err = true; + break; + } + } + /* Check the argument types are the same. */ + if (!err) + for (i = 0; i < spec2->named_arg_count; i++) + if (spec1->named[i].type != spec2->named[i].type) + { + error_with_progname = false; + error_at_line (0, 0, pos->file_name, pos->line_number, + _("format specifications in 'msgid' and 'msgstr' for argument '%s' are not the same"), + spec2->named[i].name); + error_with_progname = true; + err = true; + break; + } + } + + if (spec1->unnamed_arg_count + spec2->unnamed_arg_count > 0) + { + unsigned int i; + + /* Check the argument types are the same. */ + if (spec1->unnamed_arg_count != spec2->unnamed_arg_count) + { + error_with_progname = false; + error_at_line (0, 0, pos->file_name, pos->line_number, + _("number of format specifications in 'msgid' and 'msgstr' does not match")); + error_with_progname = true; + err = true; + } + else + for (i = 0; i < spec1->unnamed_arg_count; i++) + if (spec1->unnamed[i].type != spec2->unnamed[i].type) + { + error_with_progname = false; + error_at_line (0, 0, pos->file_name, pos->line_number, + _("format specifications in 'msgid' and 'msgstr' for argument %u are not the same"), + i + 1); + error_with_progname = true; + err = true; + } + } + } + + return err; +} + + +struct formatstring_parser formatstring_python = +{ + format_parse, + format_free, + format_get_number_of_directives, + format_check +}; + + +#ifdef TEST + +/* Test program: Print the argument list specification returned by + format_parse for strings read from standard input. */ + +#include +#include "getline.h" + +static void +format_print (descr) + void *descr; +{ + struct spec *spec = (struct spec *) descr; + unsigned int i; + + if (spec == NULL) + { + printf ("INVALID"); + return; + } + + if (spec->named_arg_count > 0) + { + if (spec->unnamed_arg_count > 0) + abort (); + + printf ("{"); + for (i = 0; i < spec->named_arg_count; i++) + { + if (i > 0) + printf (", "); + printf ("'%s':", spec->named[i].name); + switch (spec->named[i].type) + { + case FAT_ANY: + printf ("*"); + break; + case FAT_CHARACTER: + printf ("c"); + break; + case FAT_STRING: + printf ("s"); + break; + case FAT_INTEGER: + printf ("i"); + break; + case FAT_FLOAT: + printf ("f"); + break; + default: + abort (); + } + } + printf ("}"); + } + else + { + printf ("("); + for (i = 0; i < spec->unnamed_arg_count; i++) + { + if (i > 0) + printf (" "); + switch (spec->unnamed[i].type) + { + case FAT_ANY: + printf ("*"); + break; + case FAT_CHARACTER: + printf ("c"); + break; + case FAT_STRING: + printf ("s"); + break; + case FAT_INTEGER: + printf ("i"); + break; + case FAT_FLOAT: + printf ("f"); + break; + default: + abort (); + } + } + printf (")"); + } +} + +int +main () +{ + for (;;) + { + char *line = NULL; + size_t line_len = 0; + void *descr; + + if (getline (&line, &line_len, stdin) < 0) + break; + + descr = format_parse (line); + + format_print (descr); + printf ("\n"); + + free (line); + } + + return 0; +} + +/* + * For Emacs M-x compile + * Local Variables: + * compile-command: "gcc -O -g -Wall -I.. -I../lib -I../intl -DHAVE_CONFIG_H -DTEST format-python.c ../lib/libnlsut.a" + * End: + */ + +#endif /* TEST */ diff --git a/tests/format-python-1 b/tests/format-python-1 new file mode 100755 index 000000000..81b6a4886 --- /dev/null +++ b/tests/format-python-1 @@ -0,0 +1,136 @@ +#! /bin/sh + +# Test recognition of Python format strings. + +tmpfiles="" +trap 'rm -fr $tmpfiles' 1 2 3 15 + +tmpfiles="$tmpfiles f-p-1.data" +cat <<\EOF > f-p-1.data +# Valid: no argument +"abc%%" +# Valid: one character argument +"abc%c" +# Valid: one string argument +"abc%s" +# Valid: one string argument +"abc%r" +# Valid: one integer argument +"abc%i" +# Valid: one integer argument +"abc%d" +# Valid: one integer argument +"abc%u" +# Valid: one integer argument +"abc%o" +# Valid: one integer argument +"abc%x" +# Valid: one integer argument +"abc%X" +# Valid: one floating-point argument +"abc%e" +# Valid: one floating-point argument +"abc%E" +# Valid: one floating-point argument +"abc%f" +# Valid: one floating-point argument +"abc%g" +# Valid: one floating-point argument +"abc%G" +# Valid: one argument with flags +"abc%0#g" +# Valid: one argument with width +"abc%2g" +# Valid: one argument with width +"abc%*g" +# Valid: one argument with precision +"abc%.4g" +# Valid: one argument with precision +"abc%.*g" +# Valid: one argument with width and precision +"abc%14.4g" +# Valid: one argument with width and precision +"abc%14.*g" +# Valid: one argument with width and precision +"abc%*.4g" +# Valid: one argument with width and precision +"abc%*.*g" +# Valid: one argument with size specifier +"abc%hi" +# Valid: one argument with size specifier +"abc%li" +# Valid: one argument with size specifier +"abc%Li" +# Invalid: unterminated +"abc%" +# Invalid: unknown format specifier +"abc%y" +# Invalid: flags after width +"abc%*0g" +# Invalid: twice precision +"abc%.4.2g" +# Invalid: two size specifiers +"abc%lli" +# Valid: three arguments +"abc%d%u%u" +# Valid: a named argument +"abc%(value)d" +# Valid: an empty name +"abc%()d" +# Invalid: unterminated name +"abc%(value" +# Valid: ignored named argument +"abc%(dummy)%" +# Invalid: flags before name +"abc%0(value)d" +# Valid: three arguments, two with equal names +"abc%(addr)4x,%(char)c,%(addr)u" +# Invalid: argument with conflicting types +"abc%(addr)4x,%(char)c,%(addr)s" +# Valid: no conflict +"abc%(addr)r,%(addr)s" +# Invalid: mixing of named and unnamed arguments +"abc%d%(addr)x" +# Valid: named argument with constant precision +"abc%(addr).9x" +# Invalid: mixing of named and unnamed arguments +"abc%(addr).*x" +EOF + +: ${XGETTEXT=xgettext} +n=0 +while read comment; do + read string + n=`expr $n + 1` + tmpfiles="$tmpfiles f-p-1-$n.in f-p-1-$n.po" + cat < f-p-1-$n.in +gettext(${string}); +EOF + ${XGETTEXT} -L Python -o f-p-1-$n.po f-p-1-$n.in || exit 1 + test -f f-p-1-$n.po || exit 1 + fail= + if echo "$comment" | grep 'Valid:' > /dev/null; then + if grep python-format f-p-1-$n.po > /dev/null; then + : + else + fail=yes + fi + else + if grep python-format f-p-1-$n.po > /dev/null; then + fail=yes + else + : + fi + fi + if test -n "$fail"; then + echo "Format string recognition error:" 1>&2 + cat f-p-1-$n.in 1>&2 + echo "Got:" 1>&2 + cat f-p-1-$n.po 1>&2 + exit 1 + fi +done < f-p-1.data + +rm -fr $tmpfiles + +exit 0 diff --git a/tests/format-python-2 b/tests/format-python-2 new file mode 100755 index 000000000..81b33ac0b --- /dev/null +++ b/tests/format-python-2 @@ -0,0 +1,141 @@ +#! /bin/sh + +# Test checking of Python format strings. + +tmpfiles="" +trap 'rm -fr $tmpfiles' 1 2 3 15 + +tmpfiles="$tmpfiles f-p-2.data" +cat <<\EOF > f-p-2.data +# Valid: %% doesn't count +msgid "abc%%def" +msgstr "xyz" +# Invalid: invalid msgstr +msgid "abc%%def" +msgstr "xyz%" +# Valid: same arguments, with different widths +msgid "abc%2sdef" +msgstr "xyz%3s" +# Invalid: too few arguments +msgid "abc%sdef%u" +msgstr "xyz%s" +# Invalid: too many arguments +msgid "abc%udef" +msgstr "xyz%uvw%c" +# Valid: same named arguments, with different widths +msgid "abc%(date)5s%(time)4s" +msgstr "xyz%(date)4s%(time)5s" +# Valid: permutation +msgid "abc%(3)d%(1)c%(2)sdef" +msgstr "xyz%(2)s%(1)c%(3)d" +# Invalid: missing argument +msgid "abc%(2)sdef%(1)u" +msgstr "xyz%(1)u" +# Invalid: missing argument +msgid "abc%(1)sdef%(2)u" +msgstr "xyz%(2)u" +# Invalid: added argument +msgid "abc%(foo)udef" +msgstr "xyz%(foo)uvw%(char)c" +# Invalid: added argument +msgid "abc%(foo)udef" +msgstr "xyz%(foo)uvw%(zoo)c" +# Invalid: unnamed vs. named arguments +msgid "abc%sdef" +msgstr "xyz%(value)s" +# Invalid: named vs. unnamed arguments +msgid "abc%(value)sdef" +msgstr "xyz%s" +# Valid: type compatibility +msgid "abc%s" +msgstr "xyz%r" +# Valid: type compatibility +msgid "abc%r" +msgstr "xyz%s" +# Valid: type compatibility +msgid "abc%i" +msgstr "xyz%d" +# Valid: type compatibility +msgid "abc%i" +msgstr "xyz%u" +# Valid: type compatibility +msgid "abc%i" +msgstr "xyz%o" +# Valid: type compatibility +msgid "abc%i" +msgstr "xyz%x" +# Valid: type compatibility +msgid "abc%i" +msgstr "xyz%X" +# Valid: type compatibility +msgid "abc%e" +msgstr "xyz%E" +# Valid: type compatibility +msgid "abc%e" +msgstr "xyz%f" +# Valid: type compatibility +msgid "abc%e" +msgstr "xyz%g" +# Valid: type compatibility +msgid "abc%e" +msgstr "xyz%G" +# Invalid: type incompatibility +msgid "abc%c" +msgstr "xyz%s" +# Invalid: type incompatibility +msgid "abc%c" +msgstr "xyz%i" +# Invalid: type incompatibility +msgid "abc%c" +msgstr "xyz%e" +# Invalid: type incompatibility +msgid "abc%s" +msgstr "xyz%i" +# Invalid: type incompatibility +msgid "abc%s" +msgstr "xyz%e" +# Invalid: type incompatibility +msgid "abc%i" +msgstr "xyz%e" +# Invalid: type incompatibility for width +msgid "abc%g%*g" +msgstr "xyz%*g%g" +EOF + +: ${MSGFMT=msgfmt} +n=0 +while read comment; do + read msgid_line + read msgstr_line + n=`expr $n + 1` + tmpfiles="$tmpfiles f-p-2-$n.po f-p-2-$n.mo" + cat < f-p-2-$n.po +#, python-format +${msgid_line} +${msgstr_line} +EOF + fail= + if echo "$comment" | grep 'Valid:' > /dev/null; then + if ${MSGFMT} -c -o f-p-2-$n.mo f-p-2-$n.po; then + : + else + fail=yes + fi + else + ${MSGFMT} -c -o f-p-2-$n.mo f-p-2-$n.po 2> /dev/null + if test $? = 1; then + : + else + fail=yes + fi + fi + if test -n "$fail"; then + echo "Format string checking error:" 1>&2 + cat f-p-2-$n.po 1>&2 + exit 1 + fi +done < f-p-2.data + +rm -fr $tmpfiles + +exit 0