From: Bruno Haible Date: Thu, 7 Feb 2002 13:14:55 +0000 (+0000) Subject: New awk backend. X-Git-Tag: v0.11.1~86 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=332d33b34c195c34cafa11594a013d97093f08f0;p=thirdparty%2Fgettext.git New awk backend. --- diff --git a/doc/ChangeLog b/doc/ChangeLog index fc8c11bc2..b8f8079aa 100644 --- a/doc/ChangeLog +++ b/doc/ChangeLog @@ -1,3 +1,7 @@ +2002-01-27 Bruno Haible + + * gettext.texi (gawk): Update. + 2002-02-02 Bruno Haible * gettext.texi (Python): Update. diff --git a/doc/gettext.texi b/doc/gettext.texi index 600bfa822..6edc36dec 100644 --- a/doc/gettext.texi +++ b/doc/gettext.texi @@ -6943,7 +6943,7 @@ gawk 3.1 or newer @code{_"abc"} @item gettext/ngettext functions -@code{dcgettext} +@code{dcgettext}, missing @code{dcngettext} in gawk-3.1.0 @item textdomain @code{TEXTDOMAIN} variable @@ -6961,14 +6961,15 @@ automatic, but missing @code{setlocale (LC_MESSAGES, "")} in gawk-3.1.0 use @item Extractor -@code{gawk --gen-po} +@code{xgettext} @item Formatting with positions @code{printf "%2$d %1$d"} (GNU awk only) @item Portability On platforms without gettext, no translation. On non-GNU awks, you must -define @code{dcgettext} and @code{bindtextdomain} yourself. +define @code{dcgettext}, @code{dcngettext} and @code{bindtextdomain} +yourself. @item po-mode marking --- diff --git a/src/ChangeLog b/src/ChangeLog index 4742b46cd..ef6eb4ce1 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,22 @@ +2002-01-27 Bruno Haible + + * message.h (format_type): New enum value 'format_awk'. + (NFORMATS): Increment. + * message.c (format_language): Add format_awk entry. + (format_language_pretty): Likewise. + * format.h (formatstring_awk): New declaration. + * format-awk.c: New file. + * format.c (formatstring_parsers): Add formatstring_awk. + * x-awk.h: New file. + * x-awk.c: New file. + * xgettext.c: Include x-awk.h. + (main): Call x_awk_extract_all, x_awk_keyword. + (language_to_scanner): Add awk rule. + (extension_to_language): Add awk rule. + * Makefile.am (noinst_HEADERS): Add x-awk.h. + (FORMAT_SOURCE): Add format-awk.c. + (xgettext_SOURCES): Add x-awk.c. + 2002-02-02 Bruno Haible * x-python.h: New file. diff --git a/src/Makefile.am b/src/Makefile.am index cffbd9ee9..1b5ed1fb2 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -33,7 +33,7 @@ po-gram-gen.h po-hash-gen.h msgl-charset.h msgl-equal.h msgl-iconv.h \ msgl-ascii.h msgl-cat.h msgl-english.h msgfmt.h msgunfmt.h read-mo.h \ write-mo.h read-java.h write-java.h po-time.h plural-table.h format.h \ xgettext.h x-c.h x-po.h x-python.h x-lisp.h x-elisp.h x-librep.h x-java.h \ -x-ycp.h x-rst.h +x-awk.h x-ycp.h x-rst.h EXTRA_DIST = FILES project-id \ gnu/gettext/DumpResource.java gnu/gettext/GetURL.java @@ -67,8 +67,8 @@ open-po.c dir-list.c str-list.c # xgettext and msgfmt deal with format strings. FORMAT_SOURCE = format.c \ -format-c.c format-java.c format-lisp.c format-elisp.c format-librep.c \ -format-python.c format-pascal.c format-ycp.c +format-c.c format-python.c format-lisp.c format-elisp.c format-librep.c \ +format-java.c format-awk.c format-pascal.c format-ycp.c # libgettextsrc contains all code that is needed by at least two programs. libgettextsrc_la_SOURCES = \ @@ -87,8 +87,8 @@ msgfmt_SOURCES = msgfmt.c write-mo.c write-java.c plural-eval.c msgmerge_SOURCES = msgmerge.c msgunfmt_SOURCES = msgunfmt.c read-mo.c read-java.c xgettext_SOURCES = xgettext.c \ - x-c.c x-po.c x-python.c x-lisp.c x-elisp.c x-librep.c x-java.l x-ycp.c \ - x-rst.c + x-c.c x-po.c x-python.c x-lisp.c x-elisp.c x-librep.c x-java.l x-awk.c \ + x-ycp.c x-rst.c msgattrib_SOURCES = msgattrib.c msgcat_SOURCES = msgcat.c msgcomm_SOURCES = msgcomm.c diff --git a/src/format-awk.c b/src/format-awk.c new file mode 100644 index 000000000..add968fd8 --- /dev/null +++ b/src/format-awk.c @@ -0,0 +1,629 @@ +/* awk format strings. + Copyright (C) 2001-2002 Free Software Foundation, Inc. + Written by Bruno Haible , 2002. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + +#ifdef HAVE_CONFIG_H +# include +#endif + +#include +#include + +#include "format.h" +#include "xmalloc.h" +#include "error.h" +#include "progname.h" +#include "gettext.h" + +#define _(str) gettext (str) + +/* awk format strings are described in the gawk-3.1 documentation and + implemented in gawk-3.1.0/builtin.c: format_tree(). + A directive + - starts with '%' or '%m$' where m is a positive integer, + - is optionally followed by any of the characters '#', '0', '-', ' ', '+', + each of which acts as a flag, + - is optionally followed by a width specification: '*' (reads an argument) + or '*m$' or a nonempty digit sequence, + - is optionally followed by '.' and a precision specification: '*' (reads + an argument) or '*m$' or a nonempty digit sequence, + - is finished by a specifier + - '%', that needs no argument, + - 'c', that need a character argument, + - 's', that need a string argument, + - 'i', 'd', that need a signed integer argument, + - 'o', 'u', 'x', 'X', that need an unsigned integer argument, + - 'e', 'E', 'f', 'g', 'G', that need a floating-point argument. + Numbered ('%m$' or '*m$') and unnumbered argument specifications cannot + be used in the same string. + */ + +enum format_arg_type +{ + FAT_NONE, + FAT_CHARACTER, + FAT_STRING, + FAT_INTEGER, + FAT_UNSIGNED_INTEGER, + FAT_FLOAT +}; + +struct numbered_arg +{ + unsigned int number; + enum format_arg_type type; +}; + +struct spec +{ + unsigned int directives; + unsigned int numbered_arg_count; + unsigned int allocated; + struct numbered_arg *numbered; +}; + +/* Locale independent test for a decimal digit. + Argument can be 'char' or 'unsigned char'. (Whereas the argument of + isdigit must be an 'unsigned char'.) */ +#undef isdigit +#define isdigit(c) ((unsigned int) ((c) - '0') < 10) + + +/* Prototypes for local functions. Needed to ensure compiler checking of + function argument counts despite of K&R C function definition syntax. */ +static int numbered_arg_compare PARAMS ((const void *p1, const void *p2)); +static void *format_parse PARAMS ((const char *format)); +static void format_free PARAMS ((void *descr)); +static int format_get_number_of_directives PARAMS ((void *descr)); +static bool format_check PARAMS ((const lex_pos_ty *pos, + void *msgid_descr, void *msgstr_descr, + bool equality, + bool noisy, const char *pretty_msgstr)); + + +static int +numbered_arg_compare (p1, p2) + const void *p1; + const void *p2; +{ + unsigned int n1 = ((const struct numbered_arg *) p1)->number; + unsigned int n2 = ((const struct numbered_arg *) p2)->number; + + return (n1 > n2 ? 1 : n1 < n2 ? -1 : 0); +} + +static void * +format_parse (format) + const char *format; +{ + struct spec spec; + unsigned int unnumbered_arg_count; + struct spec *result; + + spec.directives = 0; + spec.numbered_arg_count = 0; + spec.allocated = 0; + spec.numbered = NULL; + unnumbered_arg_count = 0; + + for (; *format != '\0';) + if (*format++ == '%') + { + /* A directive. */ + unsigned int number = 0; + enum format_arg_type type; + + spec.directives++; + + if (isdigit (*format)) + { + const char *f = format; + unsigned int m = 0; + + do + { + m = 10 * m + (*f - '0'); + f++; + } + while (isdigit (*f)); + + if (*f == '$') + { + if (m == 0) + goto bad_format; + number = m; + format = ++f; + } + } + + /* Parse flags. */ + while (*format == ' ' || *format == '+' || *format == '-' + || *format == '#' || *format == '0') + format++; + + /* Parse width. */ + if (*format == '*') + { + unsigned int width_number = 0; + + format++; + + if (isdigit (*format)) + { + const char *f = format; + unsigned int m = 0; + + do + { + m = 10 * m + (*f - '0'); + f++; + } + while (isdigit (*f)); + + if (*f == '$') + { + if (m == 0) + goto bad_format; + width_number = m; + format = ++f; + } + } + + if (width_number) + { + /* Numbered argument. */ + + /* Numbered and unnumbered specifications are exclusive. */ + if (unnumbered_arg_count > 0) + goto bad_format; + + if (spec.allocated == spec.numbered_arg_count) + { + spec.allocated = 2 * spec.allocated + 1; + spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg)); + } + spec.numbered[spec.numbered_arg_count].number = width_number; + spec.numbered[spec.numbered_arg_count].type = FAT_INTEGER; + spec.numbered_arg_count++; + } + else + { + /* Unnumbered argument. */ + + /* Numbered and unnumbered specifications are exclusive. */ + if (spec.numbered_arg_count > 0) + goto bad_format; + + if (spec.allocated == unnumbered_arg_count) + { + spec.allocated = 2 * spec.allocated + 1; + spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg)); + } + spec.numbered[unnumbered_arg_count].number = unnumbered_arg_count + 1; + spec.numbered[unnumbered_arg_count].type = FAT_INTEGER; + unnumbered_arg_count++; + } + } + else if (isdigit (*format)) + { + do format++; while (isdigit (*format)); + } + + /* Parse precision. */ + if (*format == '.') + { + format++; + + if (*format == '*') + { + unsigned int precision_number = 0; + + format++; + + if (isdigit (*format)) + { + const char *f = format; + unsigned int m = 0; + + do + { + m = 10 * m + (*f - '0'); + f++; + } + while (isdigit (*f)); + + if (*f == '$') + { + if (m == 0) + goto bad_format; + precision_number = m; + format = ++f; + } + } + + if (precision_number) + { + /* Numbered argument. */ + + /* Numbered and unnumbered specifications are exclusive. */ + if (unnumbered_arg_count > 0) + goto bad_format; + + if (spec.allocated == spec.numbered_arg_count) + { + spec.allocated = 2 * spec.allocated + 1; + spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg)); + } + spec.numbered[spec.numbered_arg_count].number = precision_number; + spec.numbered[spec.numbered_arg_count].type = FAT_INTEGER; + spec.numbered_arg_count++; + } + else + { + /* Unnumbered argument. */ + + /* Numbered and unnumbered specifications are exclusive. */ + if (spec.numbered_arg_count > 0) + goto bad_format; + + if (spec.allocated == unnumbered_arg_count) + { + spec.allocated = 2 * spec.allocated + 1; + spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg)); + } + spec.numbered[unnumbered_arg_count].type = unnumbered_arg_count + 1; + spec.numbered[unnumbered_arg_count].type = FAT_INTEGER; + unnumbered_arg_count++; + } + } + else if (isdigit (*format)) + { + do format++; while (isdigit (*format)); + } + } + + switch (*format) + { + case '%': + type = FAT_NONE; + break; + case 'c': + type = FAT_CHARACTER; + break; + case 's': + type = FAT_STRING; + break; + case 'i': case 'd': + type = FAT_INTEGER; + break; + case 'u': case 'o': case 'x': case 'X': + type = FAT_UNSIGNED_INTEGER; + break; + case 'e': case 'E': case 'f': case 'g': case 'G': + type = FAT_FLOAT; + break; + default: + goto bad_format; + } + + if (type != FAT_NONE) + { + if (number) + { + /* Numbered argument. */ + + /* Numbered and unnumbered specifications are exclusive. */ + if (unnumbered_arg_count > 0) + goto bad_format; + + if (spec.allocated == spec.numbered_arg_count) + { + spec.allocated = 2 * spec.allocated + 1; + spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg)); + } + spec.numbered[spec.numbered_arg_count].number = number; + spec.numbered[spec.numbered_arg_count].type = type; + spec.numbered_arg_count++; + } + else + { + /* Unnumbered argument. */ + + /* Numbered and unnumbered specifications are exclusive. */ + if (spec.numbered_arg_count > 0) + goto bad_format; + + if (spec.allocated == unnumbered_arg_count) + { + spec.allocated = 2 * spec.allocated + 1; + spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg)); + } + spec.numbered[unnumbered_arg_count].number = unnumbered_arg_count + 1; + spec.numbered[unnumbered_arg_count].type = type; + unnumbered_arg_count++; + } + } + + format++; + } + + /* Convert the unnumbered argument array to numbered arguments. */ + if (unnumbered_arg_count > 0) + spec.numbered_arg_count = unnumbered_arg_count; + /* Sort the numbered argument array, and eliminate duplicates. */ + else if (spec.numbered_arg_count > 1) + { + unsigned int i, j; + bool err; + + qsort (spec.numbered, spec.numbered_arg_count, + sizeof (struct numbered_arg), numbered_arg_compare); + + /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i. */ + err = false; + for (i = j = 0; i < spec.numbered_arg_count; i++) + if (j > 0 && spec.numbered[i].number == spec.numbered[j-1].number) + { + enum format_arg_type type1 = spec.numbered[i].type; + enum format_arg_type type2 = spec.numbered[j-1].type; + enum format_arg_type type_both; + + if (type1 == type2) + type_both = type1; + else + /* Incompatible types. */ + type_both = FAT_NONE, err = true; + + spec.numbered[j-1].type = type_both; + } + else + { + if (j < i) + { + spec.numbered[j].number = spec.numbered[i].number; + spec.numbered[j].type = spec.numbered[i].type; + } + j++; + } + spec.numbered_arg_count = j; + if (err) + goto bad_format; + } + + result = (struct spec *) xmalloc (sizeof (struct spec)); + *result = spec; + return result; + + bad_format: + if (spec.numbered != NULL) + free (spec.numbered); + return NULL; +} + +static void +format_free (descr) + void *descr; +{ + struct spec *spec = (struct spec *) descr; + + if (spec->numbered != NULL) + free (spec->numbered); + free (spec); +} + +static int +format_get_number_of_directives (descr) + void *descr; +{ + struct spec *spec = (struct spec *) descr; + + return spec->directives; +} + +static bool +format_check (pos, msgid_descr, msgstr_descr, equality, noisy, pretty_msgstr) + const lex_pos_ty *pos; + void *msgid_descr; + void *msgstr_descr; + bool equality; + bool noisy; + const char *pretty_msgstr; +{ + struct spec *spec1 = (struct spec *) msgid_descr; + struct spec *spec2 = (struct spec *) msgstr_descr; + bool err = false; + + if (spec1->numbered_arg_count + spec2->numbered_arg_count > 0) + { + unsigned int i, j; + unsigned int n1 = spec1->numbered_arg_count; + unsigned int n2 = spec2->numbered_arg_count; + + /* Check the argument names are the same. + Both arrays are sorted. We search for the first difference. */ + for (i = 0, j = 0; i < n1 || j < n2; ) + { + int cmp = (i >= n1 ? 1 : + j >= n2 ? -1 : + spec1->numbered[i].number > spec2->numbered[j].number ? 1 : + spec1->numbered[i].number < spec2->numbered[j].number ? -1 : + 0); + + if (cmp > 0) + { + if (noisy) + { + error_with_progname = false; + error_at_line (0, 0, pos->file_name, pos->line_number, + _("a format specification for argument %u, as in '%s', doesn't exist in 'msgid'"), + spec2->numbered[j].number, pretty_msgstr); + error_with_progname = true; + } + err = true; + break; + } + else if (cmp < 0) + { + if (equality) + { + if (noisy) + { + error_with_progname = false; + error_at_line (0, 0, pos->file_name, pos->line_number, + _("a format specification for argument %u doesn't exist in '%s'"), + spec1->numbered[i].number, pretty_msgstr); + error_with_progname = true; + } + err = true; + break; + } + else + i++; + } + else + j++, i++; + } + /* Check the argument types are the same. */ + if (!err) + for (i = 0, j = 0; j < n2; ) + { + if (spec1->numbered[i].number == spec2->numbered[j].number) + { + if (spec1->numbered[i].type != spec2->numbered[j].type) + { + if (noisy) + { + error_with_progname = false; + error_at_line (0, 0, pos->file_name, pos->line_number, + _("format specifications in 'msgid' and '%s' for argument %u are not the same"), + pretty_msgstr, + spec2->numbered[j].number); + error_with_progname = true; + } + err = true; + break; + } + j++, i++; + } + else + i++; + } + } + + return err; +} + + +struct formatstring_parser formatstring_awk = +{ + format_parse, + format_free, + format_get_number_of_directives, + format_check +}; + + +#ifdef TEST + +/* Test program: Print the argument list specification returned by + format_parse for strings read from standard input. */ + +#include +#include "getline.h" + +static void +format_print (descr) + void *descr; +{ + struct spec *spec = (struct spec *) descr; + unsigned int last; + unsigned int i; + + if (spec == NULL) + { + printf ("INVALID"); + return; + } + + printf ("("); + last = 1; + for (i = 0; i < spec->numbered_arg_count; i++) + { + unsigned int number = spec->numbered[i].number; + + if (i > 0) + printf (" "); + if (number < last) + abort (); + for (; last < number; last++) + printf ("_ "); + switch (spec->numbered[i].type) + { + case FAT_CHARACTER: + printf ("c"); + break; + case FAT_STRING: + printf ("s"); + break; + case FAT_INTEGER: + printf ("i"); + break; + case FAT_UNSIGNED_INTEGER: + printf ("[unsigned]i"); + break; + case FAT_FLOAT: + printf ("f"); + break; + default: + abort (); + } + last = number + 1; + } + printf (")"); +} + +int +main () +{ + for (;;) + { + char *line = NULL; + size_t line_len = 0; + void *descr; + + if (getline (&line, &line_len, stdin) < 0) + break; + + descr = format_parse (line); + + format_print (descr); + printf ("\n"); + + free (line); + } + + return 0; +} + +/* + * For Emacs M-x compile + * Local Variables: + * compile-command: "/bin/sh ../libtool --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../lib -I../intl -DHAVE_CONFIG_H -DTEST format-awk.c ../lib/libgettextlib.la" + * End: + */ + +#endif /* TEST */ diff --git a/src/format.c b/src/format.c index e2c2fea68..6b4346059 100644 --- a/src/format.c +++ b/src/format.c @@ -33,6 +33,7 @@ struct formatstring_parser *formatstring_parsers[NFORMATS] = /* format_librep */ &formatstring_librep, /* format_smalltalk */ &formatstring_smalltalk, /* format_java */ &formatstring_java, + /* format_awk */ &formatstring_awk, /* format_pascal */ &formatstring_pascal, /* format_ycp */ &formatstring_ycp }; diff --git a/src/format.h b/src/format.h index b50d052ae..1df1c14a6 100644 --- a/src/format.h +++ b/src/format.h @@ -61,6 +61,7 @@ extern struct formatstring_parser formatstring_elisp; extern struct formatstring_parser formatstring_librep; extern struct formatstring_parser formatstring_smalltalk; extern struct formatstring_parser formatstring_java; +extern struct formatstring_parser formatstring_awk; extern struct formatstring_parser formatstring_pascal; extern struct formatstring_parser formatstring_ycp; diff --git a/src/message.c b/src/message.c index 130821fd0..84ac2d005 100644 --- a/src/message.c +++ b/src/message.c @@ -47,6 +47,7 @@ const char *const format_language[NFORMATS] = /* format_librep */ "librep", /* format_smalltalk */ "smalltalk", /* format_java */ "java", + /* format_awk */ "awk", /* format_pascal */ "object-pascal", /* format_ycp */ "ycp" }; @@ -60,6 +61,7 @@ const char *const format_language_pretty[NFORMATS] = /* format_librep */ "librep", /* format_smalltalk */ "Smalltalk", /* format_java */ "Java", + /* format_awk */ "awk", /* format_pascal */ "Object Pascal", /* format_ycp */ "YCP" }; diff --git a/src/message.h b/src/message.h index 1cd8d0085..aad981b7e 100644 --- a/src/message.h +++ b/src/message.h @@ -41,10 +41,11 @@ enum format_type format_librep, format_smalltalk, format_java, + format_awk, format_pascal, format_ycp }; -#define NFORMATS 9 /* Number of format_type enum values. */ +#define NFORMATS 10 /* Number of format_type enum values. */ extern const char *const format_language[NFORMATS]; extern const char *const format_language_pretty[NFORMATS]; diff --git a/src/x-awk.c b/src/x-awk.c new file mode 100644 index 000000000..8915e5310 --- /dev/null +++ b/src/x-awk.c @@ -0,0 +1,866 @@ +/* xgettext awk backend. + Copyright (C) 2002 Free Software Foundation, Inc. + + This file was written by Bruno Haible , 2002. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include +#include +#include +#include +#include + +#include "message.h" +#include "x-awk.h" +#include "xgettext.h" +#include "error.h" +#include "progname.h" +#include "xmalloc.h" +#include "exit.h" +#include "gettext.h" + +#define _(s) gettext(s) + +#if HAVE_C_BACKSLASH_A +# define ALERT_CHAR '\a' +#else +# define ALERT_CHAR '\7' +#endif + + +/* The awk syntax is defined in the gawk manual page and documentation. + See also gawk/awkgram.y. */ + +enum token_type_ty +{ + token_type_eof, + token_type_lparen, /* ( */ + token_type_rparen, /* ) */ + token_type_comma, /* , */ + token_type_string, /* "abc" */ + token_type_i18nstring, /* _"abc" */ + token_type_symbol, /* symbol, number */ + token_type_other /* regexp, misc. operator */ +}; +typedef enum token_type_ty token_type_ty; + +typedef struct token_ty token_ty; +struct token_ty +{ + token_type_ty type; + char *string; /* for token_type_{symbol,string,i18nstring} */ + int line_number; +}; + + +/* Prototypes for local functions. Needed to ensure compiler checking of + function argument counts despite of K&R C function definition syntax. */ +static void init_keywords PARAMS ((void)); +static int phase1_getc PARAMS ((void)); +static void phase1_ungetc PARAMS ((int c)); +static int phase2_getc PARAMS ((void)); +static void phase2_ungetc PARAMS ((int c)); +static int phase7_getc PARAMS ((void)); +static inline void free_token PARAMS ((token_ty *tp)); +static void x_awk_lex PARAMS ((token_ty *tp)); +static bool extract_parenthesized PARAMS ((message_list_ty *mlp, + int commas_to_skip, + int plural_commas)); + + +/* ====================== Keyword set customization. ====================== */ + +/* If true extract all strings. */ +static bool extract_all = false; + +static hash_table keywords; +static bool default_keywords = true; + + +void +x_awk_extract_all () +{ + extract_all = true; +} + + +void +x_awk_keyword (name) + const char *name; +{ + if (name == NULL) + default_keywords = false; + else + { + const char *end; + int argnum1; + int argnum2; + const char *colon; + + if (keywords.table == NULL) + init_hash (&keywords, 100); + + split_keywordspec (name, &end, &argnum1, &argnum2); + + /* The characters between name and end should form a valid C identifier. + A colon means an invalid parse in split_keywordspec(). */ + colon = strchr (name, ':'); + if (colon == NULL || colon >= end) + { + if (argnum1 == 0) + argnum1 = 1; + insert_entry (&keywords, name, end - name, + (void *) (long) (argnum1 + (argnum2 << 10))); + } + } +} + +/* Finish initializing the keywords hash table. + Called after argument processing, before each file is processed. */ +static void +init_keywords () +{ + if (default_keywords) + { + x_awk_keyword ("dcgettext"); + x_awk_keyword ("dcngettext:1,2"); + default_keywords = false; + } +} + + +/* ================== Reading of characters and tokens. =================== */ + +/* Real filename, used in error messages about the input file. */ +static const char *real_file_name; + +/* Logical filename and line number, used to label the extracted messages. */ +static char *logical_file_name; +static int line_number; + +/* The input file stream. */ +static FILE *fp; + +/* These are for tracking whether comments count as immediately before + keyword. */ +static int last_comment_line; +static int last_non_comment_line; + + +/* 1. line_number handling. */ + +static int +phase1_getc () +{ + int c = getc (fp); + + if (c == EOF) + { + if (ferror (fp)) + error (EXIT_FAILURE, errno, _("error while reading \"%s\""), + real_file_name); + return EOF; + } + + if (c == '\n') + line_number++; + + return c; +} + +static void +phase1_ungetc (c) + int c; +{ + if (c != EOF) + { + if (c == '\n') + --line_number; + + ungetc (c, fp); + } +} + + +/* 2. Replace each comment that is not inside a string literal or regular + expression with a newline character. We need to remember the comment + for later, because it may be attached to a keyword string. */ + +static int +phase2_getc () +{ + static char *buffer; + static size_t bufmax; + size_t buflen; + int lineno; + int c; + + c = phase1_getc (); + if (c == '#') + { + buflen = 0; + lineno = line_number; + for (;;) + { + c = phase1_getc (); + if (c == '\n' || c == EOF) + break; + if (buflen >= bufmax) + { + bufmax += 100; + buffer = xrealloc (buffer, bufmax); + } + buffer[buflen++] = c; + } + if (buflen >= bufmax) + { + bufmax += 100; + buffer = xrealloc (buffer, bufmax); + } + buffer[buflen] = '\0'; + xgettext_comment_add (buffer); + last_comment_line = lineno; + } + return c; +} + +static void +phase2_ungetc (c) + int c; +{ + if (c != EOF) + phase1_ungetc (c); +} + + +/* 7. Replace escape sequences within character strings with their + single character equivalents. */ + +#define P7_QUOTES (1000 + '"') + +static int +phase7_getc () +{ + int c; + + for (;;) + { + /* Use phase 1, because phase 2 elides comments. */ + c = phase1_getc (); + + if (c == EOF || c == '\n') + break; + if (c == '"') + return P7_QUOTES; + if (c != '\\') + return c; + c = phase1_getc (); + if (c == EOF) + break; + if (c != '\n') + switch (c) + { + case 'a': + return ALERT_CHAR; + case 'b': + return '\b'; + case 'f': + return '\f'; + case 'n': + return '\n'; + case 'r': + return '\r'; + case 't': + return '\t'; + case 'v': + return '\v'; + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': + { + int n = c - '0'; + + c = phase1_getc (); + if (c != EOF) + { + if (c >= '0' && c <= '7') + { + n = (n << 3) + (c - '0'); + c = phase1_getc (); + if (c != EOF) + { + if (c >= '0' && c <= '7') + n = (n << 3) + (c - '0'); + else + phase1_ungetc (c); + } + } + else + phase1_ungetc (c); + } + return (unsigned char) n; + } + case 'x': + { + int n = 0; + + for (;;) + { + c = phase1_getc (); + if (c == EOF) + break; + else if (c >= '0' && c <= '9') + n = (n << 4) + (c - '0'); + else if (c >= 'A' && c <= 'F') + n = (n << 4) + (c - 'A' + 10); + else if (c >= 'a' && c <= 'f') + n = (n << 4) + (c - 'a' + 10); + else + { + phase1_ungetc (c); + break; + } + } + return (unsigned char) n; + } + default: + return c; + } + } + + phase1_ungetc (c); + error_with_progname = false; + error (0, 0, _("%s:%d: warning: unterminated string"), logical_file_name, + line_number); + error_with_progname = true; + return P7_QUOTES; +} + + +/* Free the memory pointed to by a 'struct token_ty'. */ +static inline void +free_token (tp) + token_ty *tp; +{ + switch (tp->type) + { + case token_type_string: + case token_type_i18nstring: + case token_type_symbol: + free (tp->string); + break; + default: + break; + } +} + + +/* Combine characters into tokens. Discard whitespace. */ + +/* There is an ambiguity about '/': It can start a division operator ('/' or + '/=') or it can start a regular expression. The distinction is important + because inside regular expressions, '#' and '"' lose its special meanings. + If you look at the awk grammar, you see that the operator is only allowed + right after a 'variable' or 'simp_exp' nonterminal, and these nonterminals + can only end in the NAME, LENGTH, YSTRING, YNUMBER, ')', ']' terminals. + So we prefer the division operator interpretation only right after + symbol, string, number, ')', ']', with whitespace but no newline allowed + in between. */ +static bool prefer_division_over_regexp; + +static void +x_awk_lex (tp) + token_ty *tp; +{ + static char *buffer; + static int bufmax; + int bufpos; + int c; + + for (;;) + { + tp->line_number = line_number; + c = phase2_getc (); + + switch (c) + { + case EOF: + tp->type = token_type_eof; + return; + + case '\n': + if (last_non_comment_line > last_comment_line) + xgettext_comment_reset (); + /* Newline is not allowed inside expressions. It usually + introduces a fresh statement. */ + prefer_division_over_regexp = false; + /* FALLTHROUGH */ + case '\t': + case ' ': + /* Ignore whitespace and comments. */ + continue; + + case '\\': + /* Backslash ought to be immediately followed by a newline. */ + continue; + } + + last_non_comment_line = tp->line_number; + + switch (c) + { + case '.': + { + int c2 = phase2_getc (); + phase2_ungetc (c2); + if (!(c2 >= '0' && c2 <= '9')) + { + + tp->type = token_type_other; + prefer_division_over_regexp = false; + return; + } + } + /* FALLTHROUGH */ + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': + case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': + case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': + case 'Y': case 'Z': + case '_': + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': + case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': + case 's': case 't': case 'u': case 'v': case 'w': case 'x': + case 'y': case 'z': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + /* Symbol, or part of a number. */ + bufpos = 0; + for (;;) + { + if (bufpos >= bufmax) + { + bufmax += 100; + buffer = xrealloc (buffer, bufmax); + } + buffer[bufpos++] = c; + c = phase2_getc (); + switch (c) + { + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': + case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': + case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': + case 'Y': case 'Z': + case '_': + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': + case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': + case 's': case 't': case 'u': case 'v': case 'w': case 'x': + case 'y': case 'z': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + continue; + default: + if (bufpos == 1 && buffer[0] == '_' && c == '"') + { + tp->type = token_type_i18nstring; + goto case_string; + } + phase2_ungetc (c); + break; + } + break; + } + if (bufpos >= bufmax) + { + bufmax += 100; + buffer = xrealloc (buffer, bufmax); + } + buffer[bufpos] = '\0'; + tp->string = xstrdup (buffer); + tp->type = token_type_symbol; + /* Most identifiers can be variable names; after them we must + interpret '/' as division operator. But for awk's builtin + keywords we have three cases: + (a) Must interpret '/' as division operator. "length". + (b) Must interpret '/' as start of a regular expression. + "do", "exit", "print", "printf", "return". + (c) '/' after this keyword in invalid anyway. All others. + I used the following script for the distinction. + for k in $awk_keywords; do + echo; echo $k; awk "function foo () { $k / 10 }" < /dev/null + done + */ + if (strcmp (buffer, "do") == 0 + || strcmp (buffer, "exit") == 0 + || strcmp (buffer, "print") == 0 + || strcmp (buffer, "printf") == 0 + || strcmp (buffer, "return") == 0) + prefer_division_over_regexp = false; + else + prefer_division_over_regexp = true; + return; + + case '"': + tp->type = token_type_string; + case_string: + bufpos = 0; + for (;;) + { + c = phase7_getc (); + if (c == EOF || c == P7_QUOTES) + break; + if (bufpos >= bufmax) + { + bufmax += 100; + buffer = xrealloc (buffer, bufmax); + } + buffer[bufpos++] = c; + } + if (bufpos >= bufmax) + { + bufmax += 100; + buffer = xrealloc (buffer, bufmax); + } + buffer[bufpos] = '\0'; + tp->string = xstrdup (buffer); + prefer_division_over_regexp = true; + return; + + case '(': + tp->type = token_type_lparen; + prefer_division_over_regexp = false; + return; + + case ')': + tp->type = token_type_rparen; + prefer_division_over_regexp = true; + return; + + case ',': + tp->type = token_type_comma; + prefer_division_over_regexp = false; + return; + + case ']': + tp->type = token_type_other; + prefer_division_over_regexp = true; + return; + + case '/': + if (!prefer_division_over_regexp) + { + /* Regular expression. + Counting brackets is non-trivial. [[] is balanced, and so is + [\]]. Also, /[/]/ is balanced and ends at the third slash. + Do not count [ or ] if either one is preceded by a \. + A '[' should be counted if + a) it is the first one so far (brackets == 0), or + b) it is the '[' in '[:'. + A ']' should be counted if not preceded by a \. + According to POSIX, []] is how you put a ] into a set. + Try to handle that too. + */ + int brackets = 0; + bool pos0 = true; /* true at start of regexp */ + bool pos1_open = false; /* true after [ at start of regexp */ + bool pos2_open_not = false; /* true after [^ at start of regexp */ + + for (;;) + { + c = phase1_getc (); + + if (c == EOF || c == '\n') + { + phase1_ungetc (c); + error_with_progname = false; + error (0, 0, _("%s:%d: warning: unterminated regular expression"), + logical_file_name, line_number); + error_with_progname = true; + break; + } + else if (c == '[') + { + if (brackets == 0) + brackets++; + else + { + c = phase1_getc (); + if (c == ':') + brackets++; + phase1_ungetc (c); + } + if (pos0) + { + pos0 = false; + pos1_open = true; + continue; + } + } + else if (c == ']') + { + if (!(pos1_open || pos2_open_not)) + brackets--; + } + else if (c == '^') + { + if (pos1_open) + { + pos1_open = false; + pos2_open_not = true; + continue; + } + } + else if (c == '\\') + { + c = phase1_getc (); + /* Backslash-newline is valid and ignored. */ + } + else if (c == '/') + { + if (brackets <= 0) + break; + } + + pos0 = false; + pos1_open = false; + pos2_open_not = false; + } + + tp->type = token_type_other; + prefer_division_over_regexp = false; + return; + } + /* FALLTHROUGH */ + + default: + /* We could carefully recognize each of the 2 and 3 character + operators, but it is not necessary, as we only need to recognize + gettext invocations. Don't bother. */ + tp->type = token_type_other; + prefer_division_over_regexp = false; + return; + } + } +} + + +/* ========================= Extracting strings. ========================== */ + +/* The file is broken into tokens. Scan the token stream, looking for + a keyword, followed by a left paren, followed by a string. When we + see this sequence, we have something to remember. We assume we are + looking at a valid C or C++ program, and leave the complaints about + the grammar to the compiler. + + Normal handling: Look for + keyword ( ... msgid ... ) + Plural handling: Look for + keyword ( ... msgid ... msgid_plural ... ) + + We use recursion because the arguments before msgid or between msgid + and msgid_plural can contain subexpressions of the same form. */ + + +/* Extract messages until the next balanced closing parenthesis. + Extracted messages are added to MLP. + When a specific argument shall be extracted, COMMAS_TO_SKIP >= 0 and, + if also a plural argument shall be extracted, PLURAL_COMMAS > 0, + otherwise PLURAL_COMMAS = 0. + When no specific argument shall be extracted, COMMAS_TO_SKIP < 0. + Return true upon eof, false upon closing parenthesis. */ +static bool +extract_parenthesized (mlp, commas_to_skip, plural_commas) + message_list_ty *mlp; + int commas_to_skip; + int plural_commas; +{ + /* Remember the message containing the msgid, for msgid_plural. */ + message_ty *plural_mp = NULL; + + /* 0 when no keyword has been seen. 1 right after a keyword is seen. */ + int state; + /* Parameters of the keyword just seen. Defined only in state 1. */ + int next_commas_to_skip = -1; + int next_plural_commas = 0; + + /* Start state is 0. */ + state = 0; + + while (1) + { + token_ty token; + + x_awk_lex (&token); + switch (token.type) + { + case token_type_symbol: + /* No need to bother if we extract all strings anyway. */ + if (!extract_all) + { + void *keyword_value; + + if (find_entry (&keywords, token.string, strlen (token.string), + &keyword_value) + == 0) + { + int argnum1 = (int) (long) keyword_value & ((1 << 10) - 1); + int argnum2 = (int) (long) keyword_value >> 10; + + next_commas_to_skip = argnum1 - 1; + next_plural_commas = (argnum2 > argnum1 ? argnum2 - argnum1 : 0); + state = 1; + } + else + state = 0; + } + free (token.string); + continue; + + case token_type_lparen: + /* No need to recurse if we extract all strings anyway. */ + if (extract_all) + continue; + if (state + ? extract_parenthesized (mlp, next_commas_to_skip, + next_plural_commas) + : extract_parenthesized (mlp, -1, 0)) + return true; + state = 0; + continue; + + case token_type_rparen: + /* No need to return if we extract all strings anyway. */ + if (extract_all) + continue; + return false; + + case token_type_comma: + /* No need to bother if we extract all strings anyway. */ + if (extract_all) + continue; + if (commas_to_skip >= 0) + { + if (commas_to_skip > 0) + commas_to_skip--; + else + if (plural_mp != NULL && plural_commas > 0) + { + commas_to_skip = plural_commas - 1; + plural_commas = 0; + } + else + commas_to_skip = -1; + } + state = 0; + continue; + + case token_type_string: + { + lex_pos_ty pos; + pos.file_name = logical_file_name; + pos.line_number = token.line_number; + + if (extract_all) + remember_a_message (mlp, token.string, &pos); + else + { + if (commas_to_skip == 0) + { + if (plural_mp == NULL) + { + /* Seen an msgid. */ + message_ty *mp = remember_a_message (mlp, token.string, + &pos); + if (plural_commas > 0) + plural_mp = mp; + } + else + { + /* Seen an msgid_plural. */ + remember_a_message_plural (plural_mp, token.string, + &pos); + plural_mp = NULL; + } + } + else + free (token.string); + state = 0; + } + continue; + } + + case token_type_i18nstring: + { + lex_pos_ty pos; + pos.file_name = logical_file_name; + pos.line_number = token.line_number; + + remember_a_message (mlp, token.string, &pos); + } + state = 0; + continue; + + case token_type_eof: + return true; + + case token_type_other: + state = 0; + continue; + + default: + abort (); + } + } +} + + +void +extract_awk (f, real_filename, logical_filename, mdlp) + FILE *f; + const char *real_filename; + const char *logical_filename; + msgdomain_list_ty *mdlp; +{ + message_list_ty *mlp = mdlp->item[0]->messages; + + fp = f; + real_file_name = real_filename; + logical_file_name = xstrdup (logical_filename); + line_number = 1; + + last_comment_line = -1; + last_non_comment_line = -1; + + prefer_division_over_regexp = false; + + init_keywords (); + + /* Eat tokens until eof is seen. When extract_parenthesized returns + due to an unbalanced closing parenthesis, just restart it. */ + while (!extract_parenthesized (mlp, -1, 0)) + ; + + fp = NULL; + real_file_name = NULL; + logical_file_name = NULL; + line_number = 0; +} diff --git a/src/x-awk.h b/src/x-awk.h new file mode 100644 index 000000000..0b10b0fb5 --- /dev/null +++ b/src/x-awk.h @@ -0,0 +1,32 @@ +/* xgettext awk backend. + Copyright (C) 2002 Free Software Foundation, Inc. + Written by Bruno Haible , 2002. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + + +#define EXTENSIONS_AWK \ + { "awk", "awk" }, \ + +#define SCANNERS_AWK \ + { "awk", extract_awk, &formatstring_awk }, \ + +/* Scan an awk file and add its translatable strings to mdlp. */ +extern void extract_awk PARAMS ((FILE *fp, const char *real_filename, + const char *logical_filename, + msgdomain_list_ty *mdlp)); + +extern void x_awk_keyword PARAMS ((const char *keyword)); +extern void x_awk_extract_all PARAMS ((void)); diff --git a/src/xgettext.c b/src/xgettext.c index d288a30c8..11f174ddd 100644 --- a/src/xgettext.c +++ b/src/xgettext.c @@ -64,6 +64,7 @@ #include "x-elisp.h" #include "x-librep.h" #include "x-java.h" +#include "x-awk.h" #include "x-ycp.h" #include "x-rst.h" @@ -229,6 +230,7 @@ main (argc, argv) x_elisp_extract_all (); x_librep_extract_all (); x_java_extract_all (); + x_awk_extract_all (); break; case 'c': if (optarg == NULL) @@ -284,6 +286,7 @@ main (argc, argv) x_elisp_keyword (optarg); x_librep_keyword (optarg); x_java_keyword (optarg); + x_awk_keyword (optarg); } break; case 'K': @@ -1252,10 +1255,11 @@ language_to_extractor (name) SCANNERS_ELISP SCANNERS_LIBREP SCANNERS_JAVA + SCANNERS_AWK SCANNERS_YCP SCANNERS_RST - /* Here will follow more languages and their scanners: awk, perl, - etc... Make sure new scanners honor the --exclude-file option. */ + /* Here will follow more languages and their scanners: perl, etc... + Make sure new scanners honor the --exclude-file option. */ }; table_ty *tp; @@ -1295,6 +1299,7 @@ extension_to_language (extension) EXTENSIONS_ELISP EXTENSIONS_LIBREP EXTENSIONS_JAVA + EXTENSIONS_AWK EXTENSIONS_YCP EXTENSIONS_RST /* Here will follow more file extensions: sh, pl, tcl ... */ diff --git a/tests/ChangeLog b/tests/ChangeLog index d58af35fe..762397c17 100644 --- a/tests/ChangeLog +++ b/tests/ChangeLog @@ -1,3 +1,10 @@ +2002-01-27 Bruno Haible + + * format-awk-1: New file. + * format-awk-2: New file. + * lang-gawk: New file. + * Makefile.am (TESTS): Add format-awk-1, format-awk-2, lang-gawk. + 2002-02-02 Bruno Haible * xgettext-18: New file. diff --git a/tests/Makefile.am b/tests/Makefile.am index e7e7e335c..64a40284d 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -46,6 +46,7 @@ TESTS = gettext-1 gettext-2 \ xgettext-7 xgettext-8 xgettext-9 xgettext-10 xgettext-11 xgettext-12 \ xgettext-13 xgettext-14 xgettext-15 xgettext-16 xgettext-17 \ xgettext-18 \ + format-awk-1 format-awk-2 \ format-c-1 format-c-2 \ format-elisp-1 format-elisp-2 \ format-java-1 format-java-2 \ @@ -55,7 +56,7 @@ TESTS = gettext-1 gettext-2 \ format-pascal-1 format-pascal-2 \ format-ycp-1 format-ycp-2 \ plural-1 plural-2 \ - lang-c lang-c++ lang-objc lang-python lang-clisp lang-elisp lang-librep lang-java lang-pascal lang-ycp lang-po lang-rst \ + lang-c lang-c++ lang-objc lang-python lang-clisp lang-elisp lang-librep lang-java lang-gawk lang-pascal lang-ycp lang-po lang-rst \ rpath-1a rpath-1b \ rpath-2aaa rpath-2aab rpath-2aac rpath-2aad \ rpath-2aba rpath-2abb rpath-2abc rpath-2abd \ diff --git a/tests/format-awk-1 b/tests/format-awk-1 new file mode 100755 index 000000000..e5a2b3a01 --- /dev/null +++ b/tests/format-awk-1 @@ -0,0 +1,140 @@ +#! /bin/sh + +# Test recognition of awk format strings. + +tmpfiles="" +trap 'rm -fr $tmpfiles' 1 2 3 15 + +tmpfiles="$tmpfiles f-a-1.data" +cat <<\EOF > f-a-1.data +# Valid: no argument +"abc%%" +# Valid: one character argument +"abc%c" +# Valid: one string argument +"abc%s" +# Valid: one integer argument +"abc%i" +# Valid: one integer argument +"abc%d" +# Valid: one integer argument +"abc%o" +# Valid: one integer argument +"abc%u" +# Valid: one integer argument +"abc%x" +# Valid: one integer argument +"abc%X" +# Valid: one floating-point argument +"abc%e" +# Valid: one floating-point argument +"abc%E" +# Valid: one floating-point argument +"abc%f" +# Valid: one floating-point argument +"abc%g" +# Valid: one floating-point argument +"abc%G" +# Valid: one argument with flags +"abc%0#g" +# Valid: one argument with width +"abc%2g" +# Valid: one argument with width +"abc%*g" +# Valid: one argument with precision +"abc%.4g" +# Valid: one argument with precision +"abc%.*g" +# Valid: one argument with width and precision +"abc%14.4g" +# Valid: one argument with width and precision +"abc%14.*g" +# Valid: one argument with width and precision +"abc%*.4g" +# Valid: one argument with width and precision +"abc%*.*g" +# Invalid: unterminated +"abc%" +# Invalid: unknown format specifier +"abc%y" +# Invalid: unknown format specifier +"abc%F" +# Invalid: flags after width +"abc%*0g" +# Invalid: twice precision +"abc%.4.2g" +# Valid: three arguments +"abc%d%u%u" +# Valid: a numbered argument +"abc%1$d" +# Invalid: zero +"abc%0$d" +# Valid: two-digit numbered arguments +"abc%11$def%10$dgh%9$dij%8$dkl%7$dmn%6$dop%5$dqr%4$dst%3$duv%2$dwx%1$dyz" +# Invalid: unterminated number +"abc%1" +# Invalid: flags before number +"abc%+1$d" +# Valid: three arguments, two with same number +"abc%1$4x,%2$c,%1$u" +# Invalid: argument with conflicting types +"abc%1$4x,%2$c,%1$s" +# Valid: no conflict +"abc%1$4x,%2$c,%1$u" +# Invalid: mixing of numbered and unnumbered arguments +"abc%d%2$x" +# Valid: numbered argument with constant precision +"abc%1$.9x" +# Invalid: mixing of numbered and unnumbered arguments +"abc%1$.*x" +# Valid: missing non-final argument +"abc%2$x%3$s" +# Valid: permutation +"abc%2$ddef%1$d" +# Valid: multiple uses of same argument +"abc%2$xdef%1$sghi%2$x" +# Valid: one argument with width +"abc%2$#*1$g" +# Valid: one argument with width and precision +"abc%3$*2$.*1$g" +# Invalid: zero +"abc%2$*0$.*1$g" +EOF + +: ${XGETTEXT=xgettext} +n=0 +while read comment; do + read string + n=`expr $n + 1` + tmpfiles="$tmpfiles f-a-1-$n.in f-a-1-$n.po" + cat < f-a-1-$n.in +dcgettext(${string}); +EOF + ${XGETTEXT} -L awk -o f-a-1-$n.po f-a-1-$n.in || exit 1 + test -f f-a-1-$n.po || exit 1 + fail= + if echo "$comment" | grep 'Valid:' > /dev/null; then + if grep awk-format f-a-1-$n.po > /dev/null; then + : + else + fail=yes + fi + else + if grep awk-format f-a-1-$n.po > /dev/null; then + fail=yes + else + : + fi + fi + if test -n "$fail"; then + echo "Format string recognition error:" 1>&2 + cat f-a-1-$n.in 1>&2 + echo "Got:" 1>&2 + cat f-a-1-$n.po 1>&2 + exit 1 + fi +done < f-a-1.data + +rm -fr $tmpfiles + +exit 0 diff --git a/tests/format-awk-2 b/tests/format-awk-2 new file mode 100755 index 000000000..3636cd060 --- /dev/null +++ b/tests/format-awk-2 @@ -0,0 +1,144 @@ +#! /bin/sh + +# Test checking of awk format strings. + +tmpfiles="" +trap 'rm -fr $tmpfiles' 1 2 3 15 + +tmpfiles="$tmpfiles f-a-2.data" +cat <<\EOF > f-a-2.data +# Valid: %% doesn't count +msgid "abc%%def" +msgstr "xyz" +# Invalid: invalid msgstr +msgid "abc%%def" +msgstr "xyz%" +# Valid: same arguments +msgid "abc%s%gdef" +msgstr "xyz%s%g" +# Valid: same arguments, with different widths +msgid "abc%2sdef" +msgstr "xyz%3s" +# Valid: same arguments but in numbered syntax +msgid "abc%s%gdef" +msgstr "xyz%1$s%2$g" +# Valid: permutation +msgid "abc%s%g%cdef" +msgstr "xyz%3$c%2$g%1$s" +# Invalid: too few arguments +msgid "abc%2$udef%1$s" +msgstr "xyz%1$s" +# Invalid: too few arguments +msgid "abc%sdef%u" +msgstr "xyz%s" +# Invalid: too many arguments +msgid "abc%udef" +msgstr "xyz%uvw%c" +# Valid: same numbered arguments, with different widths +msgid "abc%2$5s%1$4s" +msgstr "xyz%2$4s%1$5s" +# Invalid: missing argument +msgid "abc%2$sdef%1$u" +msgstr "xyz%1$u" +# Invalid: missing argument +msgid "abc%1$sdef%2$u" +msgstr "xyz%2$u" +# Invalid: added argument +msgid "abc%1$udef" +msgstr "xyz%1$uvw%2$c" +# Valid: type compatibility +msgid "abc%i" +msgstr "xyz%d" +# Valid: type compatibility +msgid "abc%o" +msgstr "xyz%u" +# Valid: type compatibility +msgid "abc%u" +msgstr "xyz%x" +# Valid: type compatibility +msgid "abc%u" +msgstr "xyz%X" +# Valid: type compatibility +msgid "abc%e" +msgstr "xyz%E" +# Valid: type compatibility +msgid "abc%e" +msgstr "xyz%f" +# Valid: type compatibility +msgid "abc%e" +msgstr "xyz%g" +# Valid: type compatibility +msgid "abc%e" +msgstr "xyz%G" +# Invalid: type incompatibility +msgid "abc%c" +msgstr "xyz%s" +# Invalid: type incompatibility +msgid "abc%c" +msgstr "xyz%i" +# Invalid: type incompatibility +msgid "abc%c" +msgstr "xyz%o" +# Invalid: type incompatibility +msgid "abc%c" +msgstr "xyz%e" +# Invalid: type incompatibility +msgid "abc%s" +msgstr "xyz%i" +# Invalid: type incompatibility +msgid "abc%s" +msgstr "xyz%o" +# Invalid: type incompatibility +msgid "abc%s" +msgstr "xyz%e" +# Invalid: type incompatibility +msgid "abc%i" +msgstr "xyz%o" +# Invalid: type incompatibility +msgid "abc%i" +msgstr "xyz%e" +# Invalid: type incompatibility +msgid "abc%u" +msgstr "xyz%e" +# Invalid: type incompatibility for width +msgid "abc%g%*g" +msgstr "xyz%*g%g" +EOF + +: ${MSGFMT=msgfmt} +n=0 +while read comment; do + read msgid_line + read msgstr_line + n=`expr $n + 1` + tmpfiles="$tmpfiles f-a-2-$n.po f-a-2-$n.mo" + cat < f-a-2-$n.po +#, awk-format +${msgid_line} +${msgstr_line} +EOF + fail= + if echo "$comment" | grep 'Valid:' > /dev/null; then + if ${MSGFMT} --check-format -o f-a-2-$n.mo f-a-2-$n.po; then + : + else + fail=yes + fi + else + ${MSGFMT} --check-format -o f-a-2-$n.mo f-a-2-$n.po 2> /dev/null + if test $? = 1; then + : + else + fail=yes + fi + fi + if test -n "$fail"; then + echo "Format string checking error:" 1>&2 + cat f-a-2-$n.po 1>&2 + exit 1 + fi +done < f-a-2.data + +rm -fr $tmpfiles + +exit 0 diff --git a/tests/lang-gawk b/tests/lang-gawk new file mode 100755 index 000000000..f83357b3c --- /dev/null +++ b/tests/lang-gawk @@ -0,0 +1,105 @@ +#! /bin/sh + +# Test of gettext facilities in the GNU awk language. +# Assumes an fr_FR locale is installed. +# Assumes the following packages are installed: gawk. + +tmpfiles="" +trap 'rm -fr $tmpfiles' 1 2 3 15 + +tmpfiles="$tmpfiles prog.awk" +cat <<\EOF > prog.awk +BEGIN { + TEXTDOMAIN = "prog" + bindtextdomain ("./") + + print _"'Your command, please?', asked the waiter." + + printf dcngettext ("a piece of cake", "%d pieces of cake", n) "\n", n + + printf _"%s is replaced by %s." "\n", "FF", "EUR" +} +EOF + +tmpfiles="$tmpfiles prog.pot" +: ${XGETTEXT=xgettext} +${XGETTEXT} -o prog.pot --omit-header --no-location prog.awk + +tmpfiles="$tmpfiles prog.ok" +cat < prog.ok +msgid "'Your command, please?', asked the waiter." +msgstr "" + +#, awk-format +msgid "a piece of cake" +msgid_plural "%d pieces of cake" +msgstr[0] "" +msgstr[1] "" + +#, awk-format +msgid "%s is replaced by %s." +msgstr "" +EOF + +: ${DIFF=diff} +${DIFF} prog.ok prog.pot || exit 1 + +tmpfiles="$tmpfiles fr.po" +cat <<\EOF > fr.po +msgid "" +msgstr "" +"Content-Type: text/plain; charset=ISO-8859-1\n" +"Plural-Forms: nplurals=2; plural=(n > 1);\n" + +msgid "'Your command, please?', asked the waiter." +msgstr "«Votre commande, s'il vous plait», dit le garçon." + +# Les gateaux allemands sont les meilleurs du monde. +#, awk-format +msgid "a piece of cake" +msgid_plural "%d pieces of cake" +msgstr[0] "un morceau de gateau" +msgstr[1] "%d morceaux de gateau" + +# Reverse the arguments. +#, awk-format +msgid "%s is replaced by %s." +msgstr "%2$s remplace %1$s." +EOF + +tmpfiles="$tmpfiles fr.po.new" +: ${MSGMERGE=msgmerge} +${MSGMERGE} -q -o fr.po.new fr.po prog.pot + +: ${DIFF=diff} +${DIFF} fr.po fr.po.new || exit 1 + +tmpfiles="$tmpfiles fr" +test -d fr || mkdir fr +test -d fr/LC_MESSAGES || mkdir fr/LC_MESSAGES + +: ${MSGFMT=msgfmt} +${MSGFMT} -o fr/LC_MESSAGES/prog.mo fr.po + +tmpfiles="$tmpfiles prog.ok prog.out" +: ${DIFF=diff} +cat <<\EOF > prog.ok +«Votre commande, s'il vous plait», dit le garçon. +2 morceaux de gateau +EUR remplace FF. +EOF + +# Test for presence of gawk version 3.1.1 or newer. +(gawk --version) >/dev/null 2>/dev/null \ + || { echo "SKIP: lang-awk"; rm -fr $tmpfiles; exit 77; } +case `gawk --version | sed -e 's/^[^0-9]*//'` in + 0.* | 1.* | 2.* | 3.0* | 3.1.0*) + echo "SKIP: lang-awk"; rm -fr $tmpfiles; exit 77;; +esac + +LANGUAGE= LC_ALL=fr_FR gawk -v n=2 -f prog.awk > prog.out || exit 1 +${DIFF} prog.ok prog.out || exit 1 + +rm -fr $tmpfiles + +exit 0