From: Bruno Haible Date: Wed, 3 Sep 2003 10:21:42 +0000 (+0000) Subject: Handling of sh format strings. X-Git-Tag: v0.13~280 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=1a0dc6f621f1497f4966980a34ea6f8c55b519b3;p=thirdparty%2Fgettext.git Handling of sh format strings. --- diff --git a/gettext-tools/ChangeLog b/gettext-tools/ChangeLog index 6ee75090a..1dc1b92ac 100644 --- a/gettext-tools/ChangeLog +++ b/gettext-tools/ChangeLog @@ -1,3 +1,7 @@ +2003-08-31 Bruno Haible + + * windows/gettextsrc.def: Add formatstring_sh. + 2003-08-29 Bruno Haible * Makefile.am (config.h.msvc): Define HAVE_DECL__SNPRINTF, diff --git a/gettext-tools/src/ChangeLog b/gettext-tools/src/ChangeLog index a1f86ad1a..bb0886234 100644 --- a/gettext-tools/src/ChangeLog +++ b/gettext-tools/src/ChangeLog @@ -1,3 +1,17 @@ +2003-08-31 Bruno Haible + + * format-sh.c: New file. + * message.h (format_sh): New enum value. + (NFORMATS): Increment. + * format.h (formatstring_sh): New declaration. + * format.c (formatstring_parsers): Add entry for sh. + * message.c (format_language, format_language_pretty): Likewise. + * Makefile.am (FORMAT_SOURCE): Add format-sh.c. + * Makefile.msvc (OBJECTS): Add format-sh.obj. + (format-sh.obj): New rule. + * Makefile.vms (OBJECTS): Add format-sh.obj. + (format-sh.obj): New rule. + 2003-08-31 Bruno Haible * format-gcc-internal.c (isdigit): Remove unused macro. diff --git a/gettext-tools/src/Makefile.am b/gettext-tools/src/Makefile.am index 7ad4999f9..bf1ac5ee0 100644 --- a/gettext-tools/src/Makefile.am +++ b/gettext-tools/src/Makefile.am @@ -91,9 +91,10 @@ read-properties.c open-po.c dir-list.c str-list.c # xgettext and msgfmt deal with format strings. FORMAT_SOURCE = format.c format-invalid.h \ -format-c.c format-python.c format-lisp.c format-elisp.c format-librep.c \ -format-java.c format-awk.c format-pascal.c format-ycp.c format-tcl.c \ -format-perl.c format-perl-brace.c format-php.c format-gcc-internal.c +format-c.c format-sh.c format-python.c format-lisp.c format-elisp.c \ +format-librep.c format-java.c format-awk.c format-pascal.c format-ycp.c \ +format-tcl.c format-perl.c format-perl-brace.c format-php.c \ +format-gcc-internal.c # libgettextsrc contains all code that is needed by at least two programs. libgettextsrc_la_SOURCES = \ diff --git a/gettext-tools/src/Makefile.msvc b/gettext-tools/src/Makefile.msvc index 5f6132775..42c0d8c2b 100644 --- a/gettext-tools/src/Makefile.msvc +++ b/gettext-tools/src/Makefile.msvc @@ -130,6 +130,7 @@ OBJECTS = \ plural-table.obj \ format.obj \ format-c.obj \ + format-sh.obj \ format-python.obj \ format-lisp.obj \ format-elisp.obj \ @@ -242,6 +243,9 @@ format.obj : format.c format-c.obj : format-c.c $(CC) $(INCLUDES) $(CFLAGS) $(PICFLAGS) -c format-c.c +format-sh.obj : format-sh.c + $(CC) $(INCLUDES) $(CFLAGS) $(PICFLAGS) -c format-sh.c + format-python.obj : format-python.c $(CC) $(INCLUDES) $(CFLAGS) $(PICFLAGS) -c format-python.c diff --git a/gettext-tools/src/Makefile.vms b/gettext-tools/src/Makefile.vms index d23c871ea..dbecb01be 100644 --- a/gettext-tools/src/Makefile.vms +++ b/gettext-tools/src/Makefile.vms @@ -76,6 +76,7 @@ OBJECTS = \ plural-table.obj, \ format.obj, \ format-c.obj, \ + format-sh.obj, \ format-python.obj, \ format-lisp.obj, \ format-elisp.obj, \ @@ -186,6 +187,9 @@ format.obj : format.c format-c.obj : format-c.c $(CC) $(INCLUDES) $(CFLAGS) /define=($(DEFS)) format-c.c +format-sh.obj : format-sh.c + $(CC) $(INCLUDES) $(CFLAGS) /define=($(DEFS)) format-sh.c + format-python.obj : format-python.c $(CC) $(INCLUDES) $(CFLAGS) /define=($(DEFS)) format-python.c diff --git a/gettext-tools/src/format-sh.c b/gettext-tools/src/format-sh.c new file mode 100644 index 000000000..542e846bb --- /dev/null +++ b/gettext-tools/src/format-sh.c @@ -0,0 +1,383 @@ +/* Shell format strings. + Copyright (C) 2003 Free Software Foundation, Inc. + Written by Bruno Haible , 2003. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + +#ifdef HAVE_CONFIG_H +# include +#endif + +#include +#include +#include + +#include "format.h" +#include "c-ctype.h" +#include "xmalloc.h" +#include "format-invalid.h" +#include "error.h" +#include "error-progname.h" +#include "gettext.h" + +#define _(str) gettext (str) + +/* Shell format strings are simply strings subjects to variable substitution. + A variable substitution starts with '$' and is finished by either + - a nonempty sequence of alphanumeric ASCII characters, the first being + not a digit, or + - an opening brace '{', some other characters with balanced '{' and '}', + and a closing brace '}', or + - a single ASCII character, like '$' or '?'. + + FIXME: POSIX has more complicated rules for determining the matching brace: + "Any '}' escaped by a backslash or within a quoted string, and characters + in embedded arithmetic expansions, command substitutions, and variable + expansions, shall not be examined in determining the matching '}'." + Not yet implemented here. + */ + +struct named_arg +{ + char *name; +}; + +struct spec +{ + unsigned int directives; + unsigned int named_arg_count; + unsigned int allocated; + struct named_arg *named; +}; + + +static int +named_arg_compare (const void *p1, const void *p2) +{ + return strcmp (((const struct named_arg *) p1)->name, + ((const struct named_arg *) p2)->name); +} + +#define INVALID_NON_ASCII_VARIABLE() \ + xstrdup (_("The string refers to a shell variable with a non-ASCII name.")) +#define INVALID_EMPTY_VARIABLE() \ + xstrdup (_("The string refers to a shell variable with an empty name.")) + +static void * +format_parse (const char *format, char **invalid_reason) +{ + struct spec spec; + struct spec *result; + + spec.directives = 0; + spec.named_arg_count = 0; + spec.allocated = 0; + spec.named = NULL; + + for (; *format != '\0';) + if (*format++ == '$') + { + /* A variable substitution. */ + char *name; + + spec.directives++; + + if (*format == '{') + { + unsigned int depth; + const char *name_start; + const char *name_end; + size_t n; + + name_start = ++format; + depth = 0; + for (; *format != '\0'; format++) + { + if (*format == '{') + depth++; + else if (*format == '}') + { + if (depth == 0) + break; + else + depth--; + } + if (!c_isascii (*format)) + { + *invalid_reason = INVALID_NON_ASCII_VARIABLE(); + goto bad_format; + } + } + if (*format == '\0') + { + *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE (); + goto bad_format; + } + name_end = format++; + + n = name_end - name_start; + if (n == 0) + { + *invalid_reason = INVALID_EMPTY_VARIABLE(); + goto bad_format; + } + name = (char *) xmalloc (n + 1); + memcpy (name, name_start, n); + name[n] = '\0'; + } + else if (c_isalpha (*format) || *format == '_') + { + const char *name_start; + const char *name_end; + size_t n; + + name_start = format; + do + format++; + while (*format != '\0' && (c_isalnum (*format) || *format == '_')); + name_end = format; + + n = name_end - name_start; + name = (char *) xmalloc (n + 1); + memcpy (name, name_start, n); + name[n] = '\0'; + } + else if (*format != '\0') + { + if (!c_isascii (*format)) + { + *invalid_reason = INVALID_NON_ASCII_VARIABLE(); + goto bad_format; + } + name = (char *) xmalloc (2); + name[0] = *format++; + name[1] = '\0'; + } + else + { + *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE (); + goto bad_format; + } + + /* Named argument. */ + if (spec.allocated == spec.named_arg_count) + { + spec.allocated = 2 * spec.allocated + 1; + spec.named = (struct named_arg *) xrealloc (spec.named, spec.allocated * sizeof (struct named_arg)); + } + spec.named[spec.named_arg_count].name = name; + spec.named_arg_count++; + } + + /* Sort the named argument array, and eliminate duplicates. */ + if (spec.named_arg_count > 1) + { + unsigned int i, j; + + qsort (spec.named, spec.named_arg_count, sizeof (struct named_arg), + named_arg_compare); + + /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i. */ + for (i = j = 0; i < spec.named_arg_count; i++) + if (j > 0 && strcmp (spec.named[i].name, spec.named[j-1].name) == 0) + free (spec.named[i].name); + else + { + if (j < i) + spec.named[j].name = spec.named[i].name; + j++; + } + spec.named_arg_count = j; + } + + result = (struct spec *) xmalloc (sizeof (struct spec)); + *result = spec; + return result; + + bad_format: + if (spec.named != NULL) + { + unsigned int i; + for (i = 0; i < spec.named_arg_count; i++) + free (spec.named[i].name); + free (spec.named); + } + return NULL; +} + +static void +format_free (void *descr) +{ + struct spec *spec = (struct spec *) descr; + + if (spec->named != NULL) + { + unsigned int i; + for (i = 0; i < spec->named_arg_count; i++) + free (spec->named[i].name); + free (spec->named); + } + free (spec); +} + +static int +format_get_number_of_directives (void *descr) +{ + struct spec *spec = (struct spec *) descr; + + return spec->directives; +} + +static bool +format_check (const lex_pos_ty *pos, void *msgid_descr, void *msgstr_descr, + bool equality, bool noisy, const char *pretty_msgstr) +{ + struct spec *spec1 = (struct spec *) msgid_descr; + struct spec *spec2 = (struct spec *) msgstr_descr; + bool err = false; + + if (spec1->named_arg_count + spec2->named_arg_count > 0) + { + unsigned int i, j; + unsigned int n1 = spec1->named_arg_count; + unsigned int n2 = spec2->named_arg_count; + + /* Check the argument names are the same. + Both arrays are sorted. We search for the first difference. */ + for (i = 0, j = 0; i < n1 || j < n2; ) + { + int cmp = (i >= n1 ? 1 : + j >= n2 ? -1 : + strcmp (spec1->named[i].name, spec2->named[j].name)); + + if (cmp > 0) + { + if (noisy) + { + error_with_progname = false; + error_at_line (0, 0, pos->file_name, pos->line_number, + _("a format specification for argument '%s', as in '%s', doesn't exist in 'msgid'"), + spec2->named[j].name, pretty_msgstr); + error_with_progname = true; + } + err = true; + break; + } + else if (cmp < 0) + { + if (equality) + { + if (noisy) + { + error_with_progname = false; + error_at_line (0, 0, pos->file_name, pos->line_number, + _("a format specification for argument '%s' doesn't exist in '%s'"), + spec1->named[i].name, pretty_msgstr); + error_with_progname = true; + } + err = true; + break; + } + else + i++; + } + else + j++, i++; + } + } + + return err; +} + + +struct formatstring_parser formatstring_sh = +{ + format_parse, + format_free, + format_get_number_of_directives, + format_check +}; + + +#ifdef TEST + +/* Test program: Print the argument list specification returned by + format_parse for strings read from standard input. */ + +#include +#include "getline.h" + +static void +format_print (void *descr) +{ + struct spec *spec = (struct spec *) descr; + unsigned int i; + + if (spec == NULL) + { + printf ("INVALID"); + return; + } + + printf ("{"); + for (i = 0; i < spec->named_arg_count; i++) + { + if (i > 0) + printf (", "); + printf ("'%s'", spec->named[i].name); + } + printf ("}"); +} + +int +main () +{ + for (;;) + { + char *line = NULL; + size_t line_size = 0; + int line_len; + char *invalid_reason; + void *descr; + + line_len = getline (&line, &line_size, stdin); + if (line_len < 0) + break; + if (line_len > 0 && line[line_len - 1] == '\n') + line[--line_len] = '\0'; + + invalid_reason = NULL; + descr = format_parse (line, &invalid_reason); + + format_print (descr); + printf ("\n"); + if (descr == NULL) + printf ("%s\n", invalid_reason); + + free (invalid_reason); + free (line); + } + + return 0; +} + +/* + * For Emacs M-x compile + * Local Variables: + * compile-command: "/bin/sh ../libtool --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../lib -I../intl -DHAVE_CONFIG_H -DTEST format-sh.c ../lib/libgettextlib.la" + * End: + */ + +#endif /* TEST */ diff --git a/gettext-tools/src/format.c b/gettext-tools/src/format.c index b89185b15..cfa57f7bf 100644 --- a/gettext-tools/src/format.c +++ b/gettext-tools/src/format.c @@ -27,6 +27,7 @@ struct formatstring_parser *formatstring_parsers[NFORMATS] = { /* format_c */ &formatstring_c, + /* format_sh */ &formatstring_sh, /* format_python */ &formatstring_python, /* format_lisp */ &formatstring_lisp, /* format_elisp */ &formatstring_elisp, diff --git a/gettext-tools/src/format.h b/gettext-tools/src/format.h index 1d173800a..e29d09584 100644 --- a/gettext-tools/src/format.h +++ b/gettext-tools/src/format.h @@ -62,6 +62,7 @@ struct formatstring_parser /* Format string parsers, each defined in its own file. */ extern DLL_VARIABLE struct formatstring_parser formatstring_c; +extern DLL_VARIABLE struct formatstring_parser formatstring_sh; extern DLL_VARIABLE struct formatstring_parser formatstring_python; extern DLL_VARIABLE struct formatstring_parser formatstring_lisp; extern DLL_VARIABLE struct formatstring_parser formatstring_elisp; diff --git a/gettext-tools/src/message.c b/gettext-tools/src/message.c index f8c32e964..516b85655 100644 --- a/gettext-tools/src/message.c +++ b/gettext-tools/src/message.c @@ -35,6 +35,7 @@ const char *const format_language[NFORMATS] = { /* format_c */ "c", + /* format_sh */ "sh", /* format_python */ "python", /* format_lisp */ "lisp", /* format_elisp */ "elisp", @@ -54,6 +55,7 @@ const char *const format_language[NFORMATS] = const char *const format_language_pretty[NFORMATS] = { /* format_c */ "C", + /* format_sh */ "Shell", /* format_python */ "Python", /* format_lisp */ "Lisp", /* format_elisp */ "Emacs Lisp", diff --git a/gettext-tools/src/message.h b/gettext-tools/src/message.h index 20aae574f..46a2f2d3f 100644 --- a/gettext-tools/src/message.h +++ b/gettext-tools/src/message.h @@ -41,6 +41,7 @@ extern "C" { enum format_type { format_c, + format_sh, format_python, format_lisp, format_elisp, @@ -56,7 +57,7 @@ enum format_type format_php, format_gcc_internal }; -#define NFORMATS 15 /* Number of format_type enum values. */ +#define NFORMATS 16 /* Number of format_type enum values. */ extern DLL_VARIABLE const char *const format_language[NFORMATS]; extern DLL_VARIABLE const char *const format_language_pretty[NFORMATS]; diff --git a/gettext-tools/tests/ChangeLog b/gettext-tools/tests/ChangeLog index fd5b53d0c..431f18546 100644 --- a/gettext-tools/tests/ChangeLog +++ b/gettext-tools/tests/ChangeLog @@ -1,3 +1,9 @@ +2003-08-31 Bruno Haible + + * format-sh-1: New file. + * format-sh-2: New file. + * Makefile.am (TESTS): Add them. + 2003-09-01 Bruno Haible * xgettext-26: Add explicit symbol import to 'use' command. diff --git a/gettext-tools/tests/Makefile.am b/gettext-tools/tests/Makefile.am index e5ea98bc8..f15b4780a 100644 --- a/gettext-tools/tests/Makefile.am +++ b/gettext-tools/tests/Makefile.am @@ -64,6 +64,7 @@ TESTS = gettext-1 gettext-2 \ format-perl-1 format-perl-2 \ format-perl-brace-1 format-perl-brace-2 \ format-perl-mixed-1 format-perl-mixed-2 \ + format-sh-1 format-sh-2 \ format-tcl-1 format-tcl-2 \ format-ycp-1 format-ycp-2 \ plural-1 plural-2 \ diff --git a/gettext-tools/tests/format-sh-1 b/gettext-tools/tests/format-sh-1 new file mode 100755 index 000000000..d438eac10 --- /dev/null +++ b/gettext-tools/tests/format-sh-1 @@ -0,0 +1,75 @@ +#! /bin/sh + +# Test recognition of Shell format strings. + +tmpfiles="" +trap 'rm -fr $tmpfiles' 1 2 3 15 + +tmpfiles="$tmpfiles f-sh-1.data" +cat <<\EOF > f-sh-1.data +# Invalid: no argument +"abc" +# Valid: one argument +"abc$file" +# Valid: one argument +"abc$f_x" +# Valid: one argument +"abc$0" +# Valid: one argument +"abc$$" +# Valid: one argument +"abc${tmpdir-/tmp}" +# Invalid: unterminated +"abc$" +# Invalid: unterminated name +"abc${A" +# Invalid: non-ASCII character +"abc$ß" +# Invalid: non-ASCII character +"abc${ß}" +# Invalid: an empty name +"abc${}" +# Valid: three arguments +"abc$dir$file" +# Valid: three arguments, two with equal names +"abc$addr$char$addr" +EOF + +: ${XGETTEXT=xgettext} +n=0 +while read comment; do + read string + n=`expr $n + 1` + tmpfiles="$tmpfiles f-sh-1-$n.in f-sh-1-$n.po" + sed -e 's,\$,\\$,g' < f-sh-1-$n.in +gettext ${string}; +EOF + ${XGETTEXT} -L Shell --from-code=ISO-8859-1 -o f-sh-1-$n.po f-sh-1-$n.in || exit 1 + test -f f-sh-1-$n.po || exit 1 + fail= + if echo "$comment" | grep 'Valid:' > /dev/null; then + if grep sh-format f-sh-1-$n.po > /dev/null; then + : + else + fail=yes + fi + else + if grep sh-format f-sh-1-$n.po > /dev/null; then + fail=yes + else + : + fi + fi + if test -n "$fail"; then + echo "Format string recognition error:" 1>&2 + cat f-sh-1-$n.in 1>&2 + echo "Got:" 1>&2 + cat f-sh-1-$n.po 1>&2 + exit 1 + fi + rm -f f-sh-1-$n.in f-sh-1-$n.po +done < f-sh-1.data + +rm -fr $tmpfiles + +exit 0 diff --git a/gettext-tools/tests/format-sh-2 b/gettext-tools/tests/format-sh-2 new file mode 100755 index 000000000..e19cc39d0 --- /dev/null +++ b/gettext-tools/tests/format-sh-2 @@ -0,0 +1,64 @@ +#! /bin/sh + +# Test checking of Shell format strings. + +tmpfiles="" +trap 'rm -fr $tmpfiles' 1 2 3 15 + +tmpfiles="$tmpfiles f-sh-2.data" +cat <<\EOF > f-sh-2.data +# Invalid: invalid msgstr +msgid "abc$file" +msgstr "xyz$file$" +# Valid: same arguments, permutation +msgid "abc$file in $dir" +msgstr "xyz$dir o $file" +# Invalid: missing argument +msgid "abc$dir/$file" +msgstr "xyz$file" +# Invalid: added argument +msgid "abc$file" +msgstr "xyz$file in $dir" +# Invalid: different default value +msgid "abc${file-/tmpdir}" +msgstr "xyz$file" +EOF + +: ${MSGFMT=msgfmt} +n=0 +while read comment; do + read msgid_line + read msgstr_line + n=`expr $n + 1` + tmpfiles="$tmpfiles f-sh-2-$n.po f-sh-2-$n.mo" + cat < f-sh-2-$n.po +#, sh-format +${msgid_line} +${msgstr_line} +EOF + fail= + if echo "$comment" | grep 'Valid:' > /dev/null; then + if ${MSGFMT} --check-format -o f-sh-2-$n.mo f-sh-2-$n.po; then + : + else + fail=yes + fi + else + ${MSGFMT} --check-format -o f-sh-2-$n.mo f-sh-2-$n.po 2> /dev/null + if test $? = 1; then + : + else + fail=yes + fi + fi + if test -n "$fail"; then + echo "Format string checking error:" 1>&2 + cat f-sh-2-$n.po 1>&2 + exit 1 + fi + rm -f f-sh-2-$n.po f-sh-2-$n.mo +done < f-sh-2.data + +rm -fr $tmpfiles + +exit 0 diff --git a/gettext-tools/windows/gettextsrc.def b/gettext-tools/windows/gettextsrc.def index 93486f5ad..b93979e8b 100644 --- a/gettext-tools/windows/gettextsrc.def +++ b/gettext-tools/windows/gettextsrc.def @@ -16,6 +16,7 @@ formatstring_perl formatstring_perl_brace formatstring_php formatstring_python +formatstring_sh formatstring_smalltalk formatstring_tcl formatstring_ycp