+2002-01-27 Bruno Haible <bruno@clisp.org>
+
+ * gettext.texi (gawk): Update.
+
2002-02-02 Bruno Haible <bruno@clisp.org>
* gettext.texi (Python): Update.
@code{_"abc"}
@item gettext/ngettext functions
-@code{dcgettext}
+@code{dcgettext}, missing @code{dcngettext} in gawk-3.1.0
@item textdomain
@code{TEXTDOMAIN} variable
use
@item Extractor
-@code{gawk --gen-po}
+@code{xgettext}
@item Formatting with positions
@code{printf "%2$d %1$d"} (GNU awk only)
@item Portability
On platforms without gettext, no translation. On non-GNU awks, you must
-define @code{dcgettext} and @code{bindtextdomain} yourself.
+define @code{dcgettext}, @code{dcngettext} and @code{bindtextdomain}
+yourself.
@item po-mode marking
---
+2002-01-27 Bruno Haible <bruno@clisp.org>
+
+ * message.h (format_type): New enum value 'format_awk'.
+ (NFORMATS): Increment.
+ * message.c (format_language): Add format_awk entry.
+ (format_language_pretty): Likewise.
+ * format.h (formatstring_awk): New declaration.
+ * format-awk.c: New file.
+ * format.c (formatstring_parsers): Add formatstring_awk.
+ * x-awk.h: New file.
+ * x-awk.c: New file.
+ * xgettext.c: Include x-awk.h.
+ (main): Call x_awk_extract_all, x_awk_keyword.
+ (language_to_scanner): Add awk rule.
+ (extension_to_language): Add awk rule.
+ * Makefile.am (noinst_HEADERS): Add x-awk.h.
+ (FORMAT_SOURCE): Add format-awk.c.
+ (xgettext_SOURCES): Add x-awk.c.
+
2002-02-02 Bruno Haible <bruno@clisp.org>
* x-python.h: New file.
msgl-ascii.h msgl-cat.h msgl-english.h msgfmt.h msgunfmt.h read-mo.h \
write-mo.h read-java.h write-java.h po-time.h plural-table.h format.h \
xgettext.h x-c.h x-po.h x-python.h x-lisp.h x-elisp.h x-librep.h x-java.h \
-x-ycp.h x-rst.h
+x-awk.h x-ycp.h x-rst.h
EXTRA_DIST = FILES project-id \
gnu/gettext/DumpResource.java gnu/gettext/GetURL.java
# xgettext and msgfmt deal with format strings.
FORMAT_SOURCE = format.c \
-format-c.c format-java.c format-lisp.c format-elisp.c format-librep.c \
-format-python.c format-pascal.c format-ycp.c
+format-c.c format-python.c format-lisp.c format-elisp.c format-librep.c \
+format-java.c format-awk.c format-pascal.c format-ycp.c
# libgettextsrc contains all code that is needed by at least two programs.
libgettextsrc_la_SOURCES = \
msgmerge_SOURCES = msgmerge.c
msgunfmt_SOURCES = msgunfmt.c read-mo.c read-java.c
xgettext_SOURCES = xgettext.c \
- x-c.c x-po.c x-python.c x-lisp.c x-elisp.c x-librep.c x-java.l x-ycp.c \
- x-rst.c
+ x-c.c x-po.c x-python.c x-lisp.c x-elisp.c x-librep.c x-java.l x-awk.c \
+ x-ycp.c x-rst.c
msgattrib_SOURCES = msgattrib.c
msgcat_SOURCES = msgcat.c
msgcomm_SOURCES = msgcomm.c
--- /dev/null
+/* awk format strings.
+ Copyright (C) 2001-2002 Free Software Foundation, Inc.
+ Written by Bruno Haible <haible@clisp.cons.org>, 2002.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <stdbool.h>
+#include <stdlib.h>
+
+#include "format.h"
+#include "xmalloc.h"
+#include "error.h"
+#include "progname.h"
+#include "gettext.h"
+
+#define _(str) gettext (str)
+
+/* awk format strings are described in the gawk-3.1 documentation and
+ implemented in gawk-3.1.0/builtin.c: format_tree().
+ A directive
+ - starts with '%' or '%m$' where m is a positive integer,
+ - is optionally followed by any of the characters '#', '0', '-', ' ', '+',
+ each of which acts as a flag,
+ - is optionally followed by a width specification: '*' (reads an argument)
+ or '*m$' or a nonempty digit sequence,
+ - is optionally followed by '.' and a precision specification: '*' (reads
+ an argument) or '*m$' or a nonempty digit sequence,
+ - is finished by a specifier
+ - '%', that needs no argument,
+ - 'c', that need a character argument,
+ - 's', that need a string argument,
+ - 'i', 'd', that need a signed integer argument,
+ - 'o', 'u', 'x', 'X', that need an unsigned integer argument,
+ - 'e', 'E', 'f', 'g', 'G', that need a floating-point argument.
+ Numbered ('%m$' or '*m$') and unnumbered argument specifications cannot
+ be used in the same string.
+ */
+
+enum format_arg_type
+{
+ FAT_NONE,
+ FAT_CHARACTER,
+ FAT_STRING,
+ FAT_INTEGER,
+ FAT_UNSIGNED_INTEGER,
+ FAT_FLOAT
+};
+
+struct numbered_arg
+{
+ unsigned int number;
+ enum format_arg_type type;
+};
+
+struct spec
+{
+ unsigned int directives;
+ unsigned int numbered_arg_count;
+ unsigned int allocated;
+ struct numbered_arg *numbered;
+};
+
+/* Locale independent test for a decimal digit.
+ Argument can be 'char' or 'unsigned char'. (Whereas the argument of
+ <ctype.h> isdigit must be an 'unsigned char'.) */
+#undef isdigit
+#define isdigit(c) ((unsigned int) ((c) - '0') < 10)
+
+
+/* Prototypes for local functions. Needed to ensure compiler checking of
+ function argument counts despite of K&R C function definition syntax. */
+static int numbered_arg_compare PARAMS ((const void *p1, const void *p2));
+static void *format_parse PARAMS ((const char *format));
+static void format_free PARAMS ((void *descr));
+static int format_get_number_of_directives PARAMS ((void *descr));
+static bool format_check PARAMS ((const lex_pos_ty *pos,
+ void *msgid_descr, void *msgstr_descr,
+ bool equality,
+ bool noisy, const char *pretty_msgstr));
+
+
+static int
+numbered_arg_compare (p1, p2)
+ const void *p1;
+ const void *p2;
+{
+ unsigned int n1 = ((const struct numbered_arg *) p1)->number;
+ unsigned int n2 = ((const struct numbered_arg *) p2)->number;
+
+ return (n1 > n2 ? 1 : n1 < n2 ? -1 : 0);
+}
+
+static void *
+format_parse (format)
+ const char *format;
+{
+ struct spec spec;
+ unsigned int unnumbered_arg_count;
+ struct spec *result;
+
+ spec.directives = 0;
+ spec.numbered_arg_count = 0;
+ spec.allocated = 0;
+ spec.numbered = NULL;
+ unnumbered_arg_count = 0;
+
+ for (; *format != '\0';)
+ if (*format++ == '%')
+ {
+ /* A directive. */
+ unsigned int number = 0;
+ enum format_arg_type type;
+
+ spec.directives++;
+
+ if (isdigit (*format))
+ {
+ const char *f = format;
+ unsigned int m = 0;
+
+ do
+ {
+ m = 10 * m + (*f - '0');
+ f++;
+ }
+ while (isdigit (*f));
+
+ if (*f == '$')
+ {
+ if (m == 0)
+ goto bad_format;
+ number = m;
+ format = ++f;
+ }
+ }
+
+ /* Parse flags. */
+ while (*format == ' ' || *format == '+' || *format == '-'
+ || *format == '#' || *format == '0')
+ format++;
+
+ /* Parse width. */
+ if (*format == '*')
+ {
+ unsigned int width_number = 0;
+
+ format++;
+
+ if (isdigit (*format))
+ {
+ const char *f = format;
+ unsigned int m = 0;
+
+ do
+ {
+ m = 10 * m + (*f - '0');
+ f++;
+ }
+ while (isdigit (*f));
+
+ if (*f == '$')
+ {
+ if (m == 0)
+ goto bad_format;
+ width_number = m;
+ format = ++f;
+ }
+ }
+
+ if (width_number)
+ {
+ /* Numbered argument. */
+
+ /* Numbered and unnumbered specifications are exclusive. */
+ if (unnumbered_arg_count > 0)
+ goto bad_format;
+
+ if (spec.allocated == spec.numbered_arg_count)
+ {
+ spec.allocated = 2 * spec.allocated + 1;
+ spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg));
+ }
+ spec.numbered[spec.numbered_arg_count].number = width_number;
+ spec.numbered[spec.numbered_arg_count].type = FAT_INTEGER;
+ spec.numbered_arg_count++;
+ }
+ else
+ {
+ /* Unnumbered argument. */
+
+ /* Numbered and unnumbered specifications are exclusive. */
+ if (spec.numbered_arg_count > 0)
+ goto bad_format;
+
+ if (spec.allocated == unnumbered_arg_count)
+ {
+ spec.allocated = 2 * spec.allocated + 1;
+ spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg));
+ }
+ spec.numbered[unnumbered_arg_count].number = unnumbered_arg_count + 1;
+ spec.numbered[unnumbered_arg_count].type = FAT_INTEGER;
+ unnumbered_arg_count++;
+ }
+ }
+ else if (isdigit (*format))
+ {
+ do format++; while (isdigit (*format));
+ }
+
+ /* Parse precision. */
+ if (*format == '.')
+ {
+ format++;
+
+ if (*format == '*')
+ {
+ unsigned int precision_number = 0;
+
+ format++;
+
+ if (isdigit (*format))
+ {
+ const char *f = format;
+ unsigned int m = 0;
+
+ do
+ {
+ m = 10 * m + (*f - '0');
+ f++;
+ }
+ while (isdigit (*f));
+
+ if (*f == '$')
+ {
+ if (m == 0)
+ goto bad_format;
+ precision_number = m;
+ format = ++f;
+ }
+ }
+
+ if (precision_number)
+ {
+ /* Numbered argument. */
+
+ /* Numbered and unnumbered specifications are exclusive. */
+ if (unnumbered_arg_count > 0)
+ goto bad_format;
+
+ if (spec.allocated == spec.numbered_arg_count)
+ {
+ spec.allocated = 2 * spec.allocated + 1;
+ spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg));
+ }
+ spec.numbered[spec.numbered_arg_count].number = precision_number;
+ spec.numbered[spec.numbered_arg_count].type = FAT_INTEGER;
+ spec.numbered_arg_count++;
+ }
+ else
+ {
+ /* Unnumbered argument. */
+
+ /* Numbered and unnumbered specifications are exclusive. */
+ if (spec.numbered_arg_count > 0)
+ goto bad_format;
+
+ if (spec.allocated == unnumbered_arg_count)
+ {
+ spec.allocated = 2 * spec.allocated + 1;
+ spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg));
+ }
+ spec.numbered[unnumbered_arg_count].type = unnumbered_arg_count + 1;
+ spec.numbered[unnumbered_arg_count].type = FAT_INTEGER;
+ unnumbered_arg_count++;
+ }
+ }
+ else if (isdigit (*format))
+ {
+ do format++; while (isdigit (*format));
+ }
+ }
+
+ switch (*format)
+ {
+ case '%':
+ type = FAT_NONE;
+ break;
+ case 'c':
+ type = FAT_CHARACTER;
+ break;
+ case 's':
+ type = FAT_STRING;
+ break;
+ case 'i': case 'd':
+ type = FAT_INTEGER;
+ break;
+ case 'u': case 'o': case 'x': case 'X':
+ type = FAT_UNSIGNED_INTEGER;
+ break;
+ case 'e': case 'E': case 'f': case 'g': case 'G':
+ type = FAT_FLOAT;
+ break;
+ default:
+ goto bad_format;
+ }
+
+ if (type != FAT_NONE)
+ {
+ if (number)
+ {
+ /* Numbered argument. */
+
+ /* Numbered and unnumbered specifications are exclusive. */
+ if (unnumbered_arg_count > 0)
+ goto bad_format;
+
+ if (spec.allocated == spec.numbered_arg_count)
+ {
+ spec.allocated = 2 * spec.allocated + 1;
+ spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg));
+ }
+ spec.numbered[spec.numbered_arg_count].number = number;
+ spec.numbered[spec.numbered_arg_count].type = type;
+ spec.numbered_arg_count++;
+ }
+ else
+ {
+ /* Unnumbered argument. */
+
+ /* Numbered and unnumbered specifications are exclusive. */
+ if (spec.numbered_arg_count > 0)
+ goto bad_format;
+
+ if (spec.allocated == unnumbered_arg_count)
+ {
+ spec.allocated = 2 * spec.allocated + 1;
+ spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg));
+ }
+ spec.numbered[unnumbered_arg_count].number = unnumbered_arg_count + 1;
+ spec.numbered[unnumbered_arg_count].type = type;
+ unnumbered_arg_count++;
+ }
+ }
+
+ format++;
+ }
+
+ /* Convert the unnumbered argument array to numbered arguments. */
+ if (unnumbered_arg_count > 0)
+ spec.numbered_arg_count = unnumbered_arg_count;
+ /* Sort the numbered argument array, and eliminate duplicates. */
+ else if (spec.numbered_arg_count > 1)
+ {
+ unsigned int i, j;
+ bool err;
+
+ qsort (spec.numbered, spec.numbered_arg_count,
+ sizeof (struct numbered_arg), numbered_arg_compare);
+
+ /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i. */
+ err = false;
+ for (i = j = 0; i < spec.numbered_arg_count; i++)
+ if (j > 0 && spec.numbered[i].number == spec.numbered[j-1].number)
+ {
+ enum format_arg_type type1 = spec.numbered[i].type;
+ enum format_arg_type type2 = spec.numbered[j-1].type;
+ enum format_arg_type type_both;
+
+ if (type1 == type2)
+ type_both = type1;
+ else
+ /* Incompatible types. */
+ type_both = FAT_NONE, err = true;
+
+ spec.numbered[j-1].type = type_both;
+ }
+ else
+ {
+ if (j < i)
+ {
+ spec.numbered[j].number = spec.numbered[i].number;
+ spec.numbered[j].type = spec.numbered[i].type;
+ }
+ j++;
+ }
+ spec.numbered_arg_count = j;
+ if (err)
+ goto bad_format;
+ }
+
+ result = (struct spec *) xmalloc (sizeof (struct spec));
+ *result = spec;
+ return result;
+
+ bad_format:
+ if (spec.numbered != NULL)
+ free (spec.numbered);
+ return NULL;
+}
+
+static void
+format_free (descr)
+ void *descr;
+{
+ struct spec *spec = (struct spec *) descr;
+
+ if (spec->numbered != NULL)
+ free (spec->numbered);
+ free (spec);
+}
+
+static int
+format_get_number_of_directives (descr)
+ void *descr;
+{
+ struct spec *spec = (struct spec *) descr;
+
+ return spec->directives;
+}
+
+static bool
+format_check (pos, msgid_descr, msgstr_descr, equality, noisy, pretty_msgstr)
+ const lex_pos_ty *pos;
+ void *msgid_descr;
+ void *msgstr_descr;
+ bool equality;
+ bool noisy;
+ const char *pretty_msgstr;
+{
+ struct spec *spec1 = (struct spec *) msgid_descr;
+ struct spec *spec2 = (struct spec *) msgstr_descr;
+ bool err = false;
+
+ if (spec1->numbered_arg_count + spec2->numbered_arg_count > 0)
+ {
+ unsigned int i, j;
+ unsigned int n1 = spec1->numbered_arg_count;
+ unsigned int n2 = spec2->numbered_arg_count;
+
+ /* Check the argument names are the same.
+ Both arrays are sorted. We search for the first difference. */
+ for (i = 0, j = 0; i < n1 || j < n2; )
+ {
+ int cmp = (i >= n1 ? 1 :
+ j >= n2 ? -1 :
+ spec1->numbered[i].number > spec2->numbered[j].number ? 1 :
+ spec1->numbered[i].number < spec2->numbered[j].number ? -1 :
+ 0);
+
+ if (cmp > 0)
+ {
+ if (noisy)
+ {
+ error_with_progname = false;
+ error_at_line (0, 0, pos->file_name, pos->line_number,
+ _("a format specification for argument %u, as in '%s', doesn't exist in 'msgid'"),
+ spec2->numbered[j].number, pretty_msgstr);
+ error_with_progname = true;
+ }
+ err = true;
+ break;
+ }
+ else if (cmp < 0)
+ {
+ if (equality)
+ {
+ if (noisy)
+ {
+ error_with_progname = false;
+ error_at_line (0, 0, pos->file_name, pos->line_number,
+ _("a format specification for argument %u doesn't exist in '%s'"),
+ spec1->numbered[i].number, pretty_msgstr);
+ error_with_progname = true;
+ }
+ err = true;
+ break;
+ }
+ else
+ i++;
+ }
+ else
+ j++, i++;
+ }
+ /* Check the argument types are the same. */
+ if (!err)
+ for (i = 0, j = 0; j < n2; )
+ {
+ if (spec1->numbered[i].number == spec2->numbered[j].number)
+ {
+ if (spec1->numbered[i].type != spec2->numbered[j].type)
+ {
+ if (noisy)
+ {
+ error_with_progname = false;
+ error_at_line (0, 0, pos->file_name, pos->line_number,
+ _("format specifications in 'msgid' and '%s' for argument %u are not the same"),
+ pretty_msgstr,
+ spec2->numbered[j].number);
+ error_with_progname = true;
+ }
+ err = true;
+ break;
+ }
+ j++, i++;
+ }
+ else
+ i++;
+ }
+ }
+
+ return err;
+}
+
+
+struct formatstring_parser formatstring_awk =
+{
+ format_parse,
+ format_free,
+ format_get_number_of_directives,
+ format_check
+};
+
+
+#ifdef TEST
+
+/* Test program: Print the argument list specification returned by
+ format_parse for strings read from standard input. */
+
+#include <stdio.h>
+#include "getline.h"
+
+static void
+format_print (descr)
+ void *descr;
+{
+ struct spec *spec = (struct spec *) descr;
+ unsigned int last;
+ unsigned int i;
+
+ if (spec == NULL)
+ {
+ printf ("INVALID");
+ return;
+ }
+
+ printf ("(");
+ last = 1;
+ for (i = 0; i < spec->numbered_arg_count; i++)
+ {
+ unsigned int number = spec->numbered[i].number;
+
+ if (i > 0)
+ printf (" ");
+ if (number < last)
+ abort ();
+ for (; last < number; last++)
+ printf ("_ ");
+ switch (spec->numbered[i].type)
+ {
+ case FAT_CHARACTER:
+ printf ("c");
+ break;
+ case FAT_STRING:
+ printf ("s");
+ break;
+ case FAT_INTEGER:
+ printf ("i");
+ break;
+ case FAT_UNSIGNED_INTEGER:
+ printf ("[unsigned]i");
+ break;
+ case FAT_FLOAT:
+ printf ("f");
+ break;
+ default:
+ abort ();
+ }
+ last = number + 1;
+ }
+ printf (")");
+}
+
+int
+main ()
+{
+ for (;;)
+ {
+ char *line = NULL;
+ size_t line_len = 0;
+ void *descr;
+
+ if (getline (&line, &line_len, stdin) < 0)
+ break;
+
+ descr = format_parse (line);
+
+ format_print (descr);
+ printf ("\n");
+
+ free (line);
+ }
+
+ return 0;
+}
+
+/*
+ * For Emacs M-x compile
+ * Local Variables:
+ * compile-command: "/bin/sh ../libtool --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../lib -I../intl -DHAVE_CONFIG_H -DTEST format-awk.c ../lib/libgettextlib.la"
+ * End:
+ */
+
+#endif /* TEST */
/* format_librep */ &formatstring_librep,
/* format_smalltalk */ &formatstring_smalltalk,
/* format_java */ &formatstring_java,
+ /* format_awk */ &formatstring_awk,
/* format_pascal */ &formatstring_pascal,
/* format_ycp */ &formatstring_ycp
};
extern struct formatstring_parser formatstring_librep;
extern struct formatstring_parser formatstring_smalltalk;
extern struct formatstring_parser formatstring_java;
+extern struct formatstring_parser formatstring_awk;
extern struct formatstring_parser formatstring_pascal;
extern struct formatstring_parser formatstring_ycp;
/* format_librep */ "librep",
/* format_smalltalk */ "smalltalk",
/* format_java */ "java",
+ /* format_awk */ "awk",
/* format_pascal */ "object-pascal",
/* format_ycp */ "ycp"
};
/* format_librep */ "librep",
/* format_smalltalk */ "Smalltalk",
/* format_java */ "Java",
+ /* format_awk */ "awk",
/* format_pascal */ "Object Pascal",
/* format_ycp */ "YCP"
};
format_librep,
format_smalltalk,
format_java,
+ format_awk,
format_pascal,
format_ycp
};
-#define NFORMATS 9 /* Number of format_type enum values. */
+#define NFORMATS 10 /* Number of format_type enum values. */
extern const char *const format_language[NFORMATS];
extern const char *const format_language_pretty[NFORMATS];
--- /dev/null
+/* xgettext awk backend.
+ Copyright (C) 2002 Free Software Foundation, Inc.
+
+ This file was written by Bruno Haible <haible@clisp.cons.org>, 2002.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "message.h"
+#include "x-awk.h"
+#include "xgettext.h"
+#include "error.h"
+#include "progname.h"
+#include "xmalloc.h"
+#include "exit.h"
+#include "gettext.h"
+
+#define _(s) gettext(s)
+
+#if HAVE_C_BACKSLASH_A
+# define ALERT_CHAR '\a'
+#else
+# define ALERT_CHAR '\7'
+#endif
+
+
+/* The awk syntax is defined in the gawk manual page and documentation.
+ See also gawk/awkgram.y. */
+
+enum token_type_ty
+{
+ token_type_eof,
+ token_type_lparen, /* ( */
+ token_type_rparen, /* ) */
+ token_type_comma, /* , */
+ token_type_string, /* "abc" */
+ token_type_i18nstring, /* _"abc" */
+ token_type_symbol, /* symbol, number */
+ token_type_other /* regexp, misc. operator */
+};
+typedef enum token_type_ty token_type_ty;
+
+typedef struct token_ty token_ty;
+struct token_ty
+{
+ token_type_ty type;
+ char *string; /* for token_type_{symbol,string,i18nstring} */
+ int line_number;
+};
+
+
+/* Prototypes for local functions. Needed to ensure compiler checking of
+ function argument counts despite of K&R C function definition syntax. */
+static void init_keywords PARAMS ((void));
+static int phase1_getc PARAMS ((void));
+static void phase1_ungetc PARAMS ((int c));
+static int phase2_getc PARAMS ((void));
+static void phase2_ungetc PARAMS ((int c));
+static int phase7_getc PARAMS ((void));
+static inline void free_token PARAMS ((token_ty *tp));
+static void x_awk_lex PARAMS ((token_ty *tp));
+static bool extract_parenthesized PARAMS ((message_list_ty *mlp,
+ int commas_to_skip,
+ int plural_commas));
+
+
+/* ====================== Keyword set customization. ====================== */
+
+/* If true extract all strings. */
+static bool extract_all = false;
+
+static hash_table keywords;
+static bool default_keywords = true;
+
+
+void
+x_awk_extract_all ()
+{
+ extract_all = true;
+}
+
+
+void
+x_awk_keyword (name)
+ const char *name;
+{
+ if (name == NULL)
+ default_keywords = false;
+ else
+ {
+ const char *end;
+ int argnum1;
+ int argnum2;
+ const char *colon;
+
+ if (keywords.table == NULL)
+ init_hash (&keywords, 100);
+
+ split_keywordspec (name, &end, &argnum1, &argnum2);
+
+ /* The characters between name and end should form a valid C identifier.
+ A colon means an invalid parse in split_keywordspec(). */
+ colon = strchr (name, ':');
+ if (colon == NULL || colon >= end)
+ {
+ if (argnum1 == 0)
+ argnum1 = 1;
+ insert_entry (&keywords, name, end - name,
+ (void *) (long) (argnum1 + (argnum2 << 10)));
+ }
+ }
+}
+
+/* Finish initializing the keywords hash table.
+ Called after argument processing, before each file is processed. */
+static void
+init_keywords ()
+{
+ if (default_keywords)
+ {
+ x_awk_keyword ("dcgettext");
+ x_awk_keyword ("dcngettext:1,2");
+ default_keywords = false;
+ }
+}
+
+
+/* ================== Reading of characters and tokens. =================== */
+
+/* Real filename, used in error messages about the input file. */
+static const char *real_file_name;
+
+/* Logical filename and line number, used to label the extracted messages. */
+static char *logical_file_name;
+static int line_number;
+
+/* The input file stream. */
+static FILE *fp;
+
+/* These are for tracking whether comments count as immediately before
+ keyword. */
+static int last_comment_line;
+static int last_non_comment_line;
+
+
+/* 1. line_number handling. */
+
+static int
+phase1_getc ()
+{
+ int c = getc (fp);
+
+ if (c == EOF)
+ {
+ if (ferror (fp))
+ error (EXIT_FAILURE, errno, _("error while reading \"%s\""),
+ real_file_name);
+ return EOF;
+ }
+
+ if (c == '\n')
+ line_number++;
+
+ return c;
+}
+
+static void
+phase1_ungetc (c)
+ int c;
+{
+ if (c != EOF)
+ {
+ if (c == '\n')
+ --line_number;
+
+ ungetc (c, fp);
+ }
+}
+
+
+/* 2. Replace each comment that is not inside a string literal or regular
+ expression with a newline character. We need to remember the comment
+ for later, because it may be attached to a keyword string. */
+
+static int
+phase2_getc ()
+{
+ static char *buffer;
+ static size_t bufmax;
+ size_t buflen;
+ int lineno;
+ int c;
+
+ c = phase1_getc ();
+ if (c == '#')
+ {
+ buflen = 0;
+ lineno = line_number;
+ for (;;)
+ {
+ c = phase1_getc ();
+ if (c == '\n' || c == EOF)
+ break;
+ if (buflen >= bufmax)
+ {
+ bufmax += 100;
+ buffer = xrealloc (buffer, bufmax);
+ }
+ buffer[buflen++] = c;
+ }
+ if (buflen >= bufmax)
+ {
+ bufmax += 100;
+ buffer = xrealloc (buffer, bufmax);
+ }
+ buffer[buflen] = '\0';
+ xgettext_comment_add (buffer);
+ last_comment_line = lineno;
+ }
+ return c;
+}
+
+static void
+phase2_ungetc (c)
+ int c;
+{
+ if (c != EOF)
+ phase1_ungetc (c);
+}
+
+
+/* 7. Replace escape sequences within character strings with their
+ single character equivalents. */
+
+#define P7_QUOTES (1000 + '"')
+
+static int
+phase7_getc ()
+{
+ int c;
+
+ for (;;)
+ {
+ /* Use phase 1, because phase 2 elides comments. */
+ c = phase1_getc ();
+
+ if (c == EOF || c == '\n')
+ break;
+ if (c == '"')
+ return P7_QUOTES;
+ if (c != '\\')
+ return c;
+ c = phase1_getc ();
+ if (c == EOF)
+ break;
+ if (c != '\n')
+ switch (c)
+ {
+ case 'a':
+ return ALERT_CHAR;
+ case 'b':
+ return '\b';
+ case 'f':
+ return '\f';
+ case 'n':
+ return '\n';
+ case 'r':
+ return '\r';
+ case 't':
+ return '\t';
+ case 'v':
+ return '\v';
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7':
+ {
+ int n = c - '0';
+
+ c = phase1_getc ();
+ if (c != EOF)
+ {
+ if (c >= '0' && c <= '7')
+ {
+ n = (n << 3) + (c - '0');
+ c = phase1_getc ();
+ if (c != EOF)
+ {
+ if (c >= '0' && c <= '7')
+ n = (n << 3) + (c - '0');
+ else
+ phase1_ungetc (c);
+ }
+ }
+ else
+ phase1_ungetc (c);
+ }
+ return (unsigned char) n;
+ }
+ case 'x':
+ {
+ int n = 0;
+
+ for (;;)
+ {
+ c = phase1_getc ();
+ if (c == EOF)
+ break;
+ else if (c >= '0' && c <= '9')
+ n = (n << 4) + (c - '0');
+ else if (c >= 'A' && c <= 'F')
+ n = (n << 4) + (c - 'A' + 10);
+ else if (c >= 'a' && c <= 'f')
+ n = (n << 4) + (c - 'a' + 10);
+ else
+ {
+ phase1_ungetc (c);
+ break;
+ }
+ }
+ return (unsigned char) n;
+ }
+ default:
+ return c;
+ }
+ }
+
+ phase1_ungetc (c);
+ error_with_progname = false;
+ error (0, 0, _("%s:%d: warning: unterminated string"), logical_file_name,
+ line_number);
+ error_with_progname = true;
+ return P7_QUOTES;
+}
+
+
+/* Free the memory pointed to by a 'struct token_ty'. */
+static inline void
+free_token (tp)
+ token_ty *tp;
+{
+ switch (tp->type)
+ {
+ case token_type_string:
+ case token_type_i18nstring:
+ case token_type_symbol:
+ free (tp->string);
+ break;
+ default:
+ break;
+ }
+}
+
+
+/* Combine characters into tokens. Discard whitespace. */
+
+/* There is an ambiguity about '/': It can start a division operator ('/' or
+ '/=') or it can start a regular expression. The distinction is important
+ because inside regular expressions, '#' and '"' lose its special meanings.
+ If you look at the awk grammar, you see that the operator is only allowed
+ right after a 'variable' or 'simp_exp' nonterminal, and these nonterminals
+ can only end in the NAME, LENGTH, YSTRING, YNUMBER, ')', ']' terminals.
+ So we prefer the division operator interpretation only right after
+ symbol, string, number, ')', ']', with whitespace but no newline allowed
+ in between. */
+static bool prefer_division_over_regexp;
+
+static void
+x_awk_lex (tp)
+ token_ty *tp;
+{
+ static char *buffer;
+ static int bufmax;
+ int bufpos;
+ int c;
+
+ for (;;)
+ {
+ tp->line_number = line_number;
+ c = phase2_getc ();
+
+ switch (c)
+ {
+ case EOF:
+ tp->type = token_type_eof;
+ return;
+
+ case '\n':
+ if (last_non_comment_line > last_comment_line)
+ xgettext_comment_reset ();
+ /* Newline is not allowed inside expressions. It usually
+ introduces a fresh statement. */
+ prefer_division_over_regexp = false;
+ /* FALLTHROUGH */
+ case '\t':
+ case ' ':
+ /* Ignore whitespace and comments. */
+ continue;
+
+ case '\\':
+ /* Backslash ought to be immediately followed by a newline. */
+ continue;
+ }
+
+ last_non_comment_line = tp->line_number;
+
+ switch (c)
+ {
+ case '.':
+ {
+ int c2 = phase2_getc ();
+ phase2_ungetc (c2);
+ if (!(c2 >= '0' && c2 <= '9'))
+ {
+
+ tp->type = token_type_other;
+ prefer_division_over_regexp = false;
+ return;
+ }
+ }
+ /* FALLTHROUGH */
+ case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+ case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
+ case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
+ case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
+ case 'Y': case 'Z':
+ case '_':
+ case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+ case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
+ case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
+ case 's': case 't': case 'u': case 'v': case 'w': case 'x':
+ case 'y': case 'z':
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ /* Symbol, or part of a number. */
+ bufpos = 0;
+ for (;;)
+ {
+ if (bufpos >= bufmax)
+ {
+ bufmax += 100;
+ buffer = xrealloc (buffer, bufmax);
+ }
+ buffer[bufpos++] = c;
+ c = phase2_getc ();
+ switch (c)
+ {
+ case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+ case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
+ case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
+ case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
+ case 'Y': case 'Z':
+ case '_':
+ case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+ case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
+ case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
+ case 's': case 't': case 'u': case 'v': case 'w': case 'x':
+ case 'y': case 'z':
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ continue;
+ default:
+ if (bufpos == 1 && buffer[0] == '_' && c == '"')
+ {
+ tp->type = token_type_i18nstring;
+ goto case_string;
+ }
+ phase2_ungetc (c);
+ break;
+ }
+ break;
+ }
+ if (bufpos >= bufmax)
+ {
+ bufmax += 100;
+ buffer = xrealloc (buffer, bufmax);
+ }
+ buffer[bufpos] = '\0';
+ tp->string = xstrdup (buffer);
+ tp->type = token_type_symbol;
+ /* Most identifiers can be variable names; after them we must
+ interpret '/' as division operator. But for awk's builtin
+ keywords we have three cases:
+ (a) Must interpret '/' as division operator. "length".
+ (b) Must interpret '/' as start of a regular expression.
+ "do", "exit", "print", "printf", "return".
+ (c) '/' after this keyword in invalid anyway. All others.
+ I used the following script for the distinction.
+ for k in $awk_keywords; do
+ echo; echo $k; awk "function foo () { $k / 10 }" < /dev/null
+ done
+ */
+ if (strcmp (buffer, "do") == 0
+ || strcmp (buffer, "exit") == 0
+ || strcmp (buffer, "print") == 0
+ || strcmp (buffer, "printf") == 0
+ || strcmp (buffer, "return") == 0)
+ prefer_division_over_regexp = false;
+ else
+ prefer_division_over_regexp = true;
+ return;
+
+ case '"':
+ tp->type = token_type_string;
+ case_string:
+ bufpos = 0;
+ for (;;)
+ {
+ c = phase7_getc ();
+ if (c == EOF || c == P7_QUOTES)
+ break;
+ if (bufpos >= bufmax)
+ {
+ bufmax += 100;
+ buffer = xrealloc (buffer, bufmax);
+ }
+ buffer[bufpos++] = c;
+ }
+ if (bufpos >= bufmax)
+ {
+ bufmax += 100;
+ buffer = xrealloc (buffer, bufmax);
+ }
+ buffer[bufpos] = '\0';
+ tp->string = xstrdup (buffer);
+ prefer_division_over_regexp = true;
+ return;
+
+ case '(':
+ tp->type = token_type_lparen;
+ prefer_division_over_regexp = false;
+ return;
+
+ case ')':
+ tp->type = token_type_rparen;
+ prefer_division_over_regexp = true;
+ return;
+
+ case ',':
+ tp->type = token_type_comma;
+ prefer_division_over_regexp = false;
+ return;
+
+ case ']':
+ tp->type = token_type_other;
+ prefer_division_over_regexp = true;
+ return;
+
+ case '/':
+ if (!prefer_division_over_regexp)
+ {
+ /* Regular expression.
+ Counting brackets is non-trivial. [[] is balanced, and so is
+ [\]]. Also, /[/]/ is balanced and ends at the third slash.
+ Do not count [ or ] if either one is preceded by a \.
+ A '[' should be counted if
+ a) it is the first one so far (brackets == 0), or
+ b) it is the '[' in '[:'.
+ A ']' should be counted if not preceded by a \.
+ According to POSIX, []] is how you put a ] into a set.
+ Try to handle that too.
+ */
+ int brackets = 0;
+ bool pos0 = true; /* true at start of regexp */
+ bool pos1_open = false; /* true after [ at start of regexp */
+ bool pos2_open_not = false; /* true after [^ at start of regexp */
+
+ for (;;)
+ {
+ c = phase1_getc ();
+
+ if (c == EOF || c == '\n')
+ {
+ phase1_ungetc (c);
+ error_with_progname = false;
+ error (0, 0, _("%s:%d: warning: unterminated regular expression"),
+ logical_file_name, line_number);
+ error_with_progname = true;
+ break;
+ }
+ else if (c == '[')
+ {
+ if (brackets == 0)
+ brackets++;
+ else
+ {
+ c = phase1_getc ();
+ if (c == ':')
+ brackets++;
+ phase1_ungetc (c);
+ }
+ if (pos0)
+ {
+ pos0 = false;
+ pos1_open = true;
+ continue;
+ }
+ }
+ else if (c == ']')
+ {
+ if (!(pos1_open || pos2_open_not))
+ brackets--;
+ }
+ else if (c == '^')
+ {
+ if (pos1_open)
+ {
+ pos1_open = false;
+ pos2_open_not = true;
+ continue;
+ }
+ }
+ else if (c == '\\')
+ {
+ c = phase1_getc ();
+ /* Backslash-newline is valid and ignored. */
+ }
+ else if (c == '/')
+ {
+ if (brackets <= 0)
+ break;
+ }
+
+ pos0 = false;
+ pos1_open = false;
+ pos2_open_not = false;
+ }
+
+ tp->type = token_type_other;
+ prefer_division_over_regexp = false;
+ return;
+ }
+ /* FALLTHROUGH */
+
+ default:
+ /* We could carefully recognize each of the 2 and 3 character
+ operators, but it is not necessary, as we only need to recognize
+ gettext invocations. Don't bother. */
+ tp->type = token_type_other;
+ prefer_division_over_regexp = false;
+ return;
+ }
+ }
+}
+
+
+/* ========================= Extracting strings. ========================== */
+
+/* The file is broken into tokens. Scan the token stream, looking for
+ a keyword, followed by a left paren, followed by a string. When we
+ see this sequence, we have something to remember. We assume we are
+ looking at a valid C or C++ program, and leave the complaints about
+ the grammar to the compiler.
+
+ Normal handling: Look for
+ keyword ( ... msgid ... )
+ Plural handling: Look for
+ keyword ( ... msgid ... msgid_plural ... )
+
+ We use recursion because the arguments before msgid or between msgid
+ and msgid_plural can contain subexpressions of the same form. */
+
+
+/* Extract messages until the next balanced closing parenthesis.
+ Extracted messages are added to MLP.
+ When a specific argument shall be extracted, COMMAS_TO_SKIP >= 0 and,
+ if also a plural argument shall be extracted, PLURAL_COMMAS > 0,
+ otherwise PLURAL_COMMAS = 0.
+ When no specific argument shall be extracted, COMMAS_TO_SKIP < 0.
+ Return true upon eof, false upon closing parenthesis. */
+static bool
+extract_parenthesized (mlp, commas_to_skip, plural_commas)
+ message_list_ty *mlp;
+ int commas_to_skip;
+ int plural_commas;
+{
+ /* Remember the message containing the msgid, for msgid_plural. */
+ message_ty *plural_mp = NULL;
+
+ /* 0 when no keyword has been seen. 1 right after a keyword is seen. */
+ int state;
+ /* Parameters of the keyword just seen. Defined only in state 1. */
+ int next_commas_to_skip = -1;
+ int next_plural_commas = 0;
+
+ /* Start state is 0. */
+ state = 0;
+
+ while (1)
+ {
+ token_ty token;
+
+ x_awk_lex (&token);
+ switch (token.type)
+ {
+ case token_type_symbol:
+ /* No need to bother if we extract all strings anyway. */
+ if (!extract_all)
+ {
+ void *keyword_value;
+
+ if (find_entry (&keywords, token.string, strlen (token.string),
+ &keyword_value)
+ == 0)
+ {
+ int argnum1 = (int) (long) keyword_value & ((1 << 10) - 1);
+ int argnum2 = (int) (long) keyword_value >> 10;
+
+ next_commas_to_skip = argnum1 - 1;
+ next_plural_commas = (argnum2 > argnum1 ? argnum2 - argnum1 : 0);
+ state = 1;
+ }
+ else
+ state = 0;
+ }
+ free (token.string);
+ continue;
+
+ case token_type_lparen:
+ /* No need to recurse if we extract all strings anyway. */
+ if (extract_all)
+ continue;
+ if (state
+ ? extract_parenthesized (mlp, next_commas_to_skip,
+ next_plural_commas)
+ : extract_parenthesized (mlp, -1, 0))
+ return true;
+ state = 0;
+ continue;
+
+ case token_type_rparen:
+ /* No need to return if we extract all strings anyway. */
+ if (extract_all)
+ continue;
+ return false;
+
+ case token_type_comma:
+ /* No need to bother if we extract all strings anyway. */
+ if (extract_all)
+ continue;
+ if (commas_to_skip >= 0)
+ {
+ if (commas_to_skip > 0)
+ commas_to_skip--;
+ else
+ if (plural_mp != NULL && plural_commas > 0)
+ {
+ commas_to_skip = plural_commas - 1;
+ plural_commas = 0;
+ }
+ else
+ commas_to_skip = -1;
+ }
+ state = 0;
+ continue;
+
+ case token_type_string:
+ {
+ lex_pos_ty pos;
+ pos.file_name = logical_file_name;
+ pos.line_number = token.line_number;
+
+ if (extract_all)
+ remember_a_message (mlp, token.string, &pos);
+ else
+ {
+ if (commas_to_skip == 0)
+ {
+ if (plural_mp == NULL)
+ {
+ /* Seen an msgid. */
+ message_ty *mp = remember_a_message (mlp, token.string,
+ &pos);
+ if (plural_commas > 0)
+ plural_mp = mp;
+ }
+ else
+ {
+ /* Seen an msgid_plural. */
+ remember_a_message_plural (plural_mp, token.string,
+ &pos);
+ plural_mp = NULL;
+ }
+ }
+ else
+ free (token.string);
+ state = 0;
+ }
+ continue;
+ }
+
+ case token_type_i18nstring:
+ {
+ lex_pos_ty pos;
+ pos.file_name = logical_file_name;
+ pos.line_number = token.line_number;
+
+ remember_a_message (mlp, token.string, &pos);
+ }
+ state = 0;
+ continue;
+
+ case token_type_eof:
+ return true;
+
+ case token_type_other:
+ state = 0;
+ continue;
+
+ default:
+ abort ();
+ }
+ }
+}
+
+
+void
+extract_awk (f, real_filename, logical_filename, mdlp)
+ FILE *f;
+ const char *real_filename;
+ const char *logical_filename;
+ msgdomain_list_ty *mdlp;
+{
+ message_list_ty *mlp = mdlp->item[0]->messages;
+
+ fp = f;
+ real_file_name = real_filename;
+ logical_file_name = xstrdup (logical_filename);
+ line_number = 1;
+
+ last_comment_line = -1;
+ last_non_comment_line = -1;
+
+ prefer_division_over_regexp = false;
+
+ init_keywords ();
+
+ /* Eat tokens until eof is seen. When extract_parenthesized returns
+ due to an unbalanced closing parenthesis, just restart it. */
+ while (!extract_parenthesized (mlp, -1, 0))
+ ;
+
+ fp = NULL;
+ real_file_name = NULL;
+ logical_file_name = NULL;
+ line_number = 0;
+}
--- /dev/null
+/* xgettext awk backend.
+ Copyright (C) 2002 Free Software Foundation, Inc.
+ Written by Bruno Haible <haible@clisp.cons.org>, 2002.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+
+#define EXTENSIONS_AWK \
+ { "awk", "awk" }, \
+
+#define SCANNERS_AWK \
+ { "awk", extract_awk, &formatstring_awk }, \
+
+/* Scan an awk file and add its translatable strings to mdlp. */
+extern void extract_awk PARAMS ((FILE *fp, const char *real_filename,
+ const char *logical_filename,
+ msgdomain_list_ty *mdlp));
+
+extern void x_awk_keyword PARAMS ((const char *keyword));
+extern void x_awk_extract_all PARAMS ((void));
#include "x-elisp.h"
#include "x-librep.h"
#include "x-java.h"
+#include "x-awk.h"
#include "x-ycp.h"
#include "x-rst.h"
x_elisp_extract_all ();
x_librep_extract_all ();
x_java_extract_all ();
+ x_awk_extract_all ();
break;
case 'c':
if (optarg == NULL)
x_elisp_keyword (optarg);
x_librep_keyword (optarg);
x_java_keyword (optarg);
+ x_awk_keyword (optarg);
}
break;
case 'K':
SCANNERS_ELISP
SCANNERS_LIBREP
SCANNERS_JAVA
+ SCANNERS_AWK
SCANNERS_YCP
SCANNERS_RST
- /* Here will follow more languages and their scanners: awk, perl,
- etc... Make sure new scanners honor the --exclude-file option. */
+ /* Here will follow more languages and their scanners: perl, etc...
+ Make sure new scanners honor the --exclude-file option. */
};
table_ty *tp;
EXTENSIONS_ELISP
EXTENSIONS_LIBREP
EXTENSIONS_JAVA
+ EXTENSIONS_AWK
EXTENSIONS_YCP
EXTENSIONS_RST
/* Here will follow more file extensions: sh, pl, tcl ... */
+2002-01-27 Bruno Haible <bruno@clisp.org>
+
+ * format-awk-1: New file.
+ * format-awk-2: New file.
+ * lang-gawk: New file.
+ * Makefile.am (TESTS): Add format-awk-1, format-awk-2, lang-gawk.
+
2002-02-02 Bruno Haible <bruno@clisp.org>
* xgettext-18: New file.
xgettext-7 xgettext-8 xgettext-9 xgettext-10 xgettext-11 xgettext-12 \
xgettext-13 xgettext-14 xgettext-15 xgettext-16 xgettext-17 \
xgettext-18 \
+ format-awk-1 format-awk-2 \
format-c-1 format-c-2 \
format-elisp-1 format-elisp-2 \
format-java-1 format-java-2 \
format-pascal-1 format-pascal-2 \
format-ycp-1 format-ycp-2 \
plural-1 plural-2 \
- lang-c lang-c++ lang-objc lang-python lang-clisp lang-elisp lang-librep lang-java lang-pascal lang-ycp lang-po lang-rst \
+ lang-c lang-c++ lang-objc lang-python lang-clisp lang-elisp lang-librep lang-java lang-gawk lang-pascal lang-ycp lang-po lang-rst \
rpath-1a rpath-1b \
rpath-2aaa rpath-2aab rpath-2aac rpath-2aad \
rpath-2aba rpath-2abb rpath-2abc rpath-2abd \
--- /dev/null
+#! /bin/sh
+
+# Test recognition of awk format strings.
+
+tmpfiles=""
+trap 'rm -fr $tmpfiles' 1 2 3 15
+
+tmpfiles="$tmpfiles f-a-1.data"
+cat <<\EOF > f-a-1.data
+# Valid: no argument
+"abc%%"
+# Valid: one character argument
+"abc%c"
+# Valid: one string argument
+"abc%s"
+# Valid: one integer argument
+"abc%i"
+# Valid: one integer argument
+"abc%d"
+# Valid: one integer argument
+"abc%o"
+# Valid: one integer argument
+"abc%u"
+# Valid: one integer argument
+"abc%x"
+# Valid: one integer argument
+"abc%X"
+# Valid: one floating-point argument
+"abc%e"
+# Valid: one floating-point argument
+"abc%E"
+# Valid: one floating-point argument
+"abc%f"
+# Valid: one floating-point argument
+"abc%g"
+# Valid: one floating-point argument
+"abc%G"
+# Valid: one argument with flags
+"abc%0#g"
+# Valid: one argument with width
+"abc%2g"
+# Valid: one argument with width
+"abc%*g"
+# Valid: one argument with precision
+"abc%.4g"
+# Valid: one argument with precision
+"abc%.*g"
+# Valid: one argument with width and precision
+"abc%14.4g"
+# Valid: one argument with width and precision
+"abc%14.*g"
+# Valid: one argument with width and precision
+"abc%*.4g"
+# Valid: one argument with width and precision
+"abc%*.*g"
+# Invalid: unterminated
+"abc%"
+# Invalid: unknown format specifier
+"abc%y"
+# Invalid: unknown format specifier
+"abc%F"
+# Invalid: flags after width
+"abc%*0g"
+# Invalid: twice precision
+"abc%.4.2g"
+# Valid: three arguments
+"abc%d%u%u"
+# Valid: a numbered argument
+"abc%1$d"
+# Invalid: zero
+"abc%0$d"
+# Valid: two-digit numbered arguments
+"abc%11$def%10$dgh%9$dij%8$dkl%7$dmn%6$dop%5$dqr%4$dst%3$duv%2$dwx%1$dyz"
+# Invalid: unterminated number
+"abc%1"
+# Invalid: flags before number
+"abc%+1$d"
+# Valid: three arguments, two with same number
+"abc%1$4x,%2$c,%1$u"
+# Invalid: argument with conflicting types
+"abc%1$4x,%2$c,%1$s"
+# Valid: no conflict
+"abc%1$4x,%2$c,%1$u"
+# Invalid: mixing of numbered and unnumbered arguments
+"abc%d%2$x"
+# Valid: numbered argument with constant precision
+"abc%1$.9x"
+# Invalid: mixing of numbered and unnumbered arguments
+"abc%1$.*x"
+# Valid: missing non-final argument
+"abc%2$x%3$s"
+# Valid: permutation
+"abc%2$ddef%1$d"
+# Valid: multiple uses of same argument
+"abc%2$xdef%1$sghi%2$x"
+# Valid: one argument with width
+"abc%2$#*1$g"
+# Valid: one argument with width and precision
+"abc%3$*2$.*1$g"
+# Invalid: zero
+"abc%2$*0$.*1$g"
+EOF
+
+: ${XGETTEXT=xgettext}
+n=0
+while read comment; do
+ read string
+ n=`expr $n + 1`
+ tmpfiles="$tmpfiles f-a-1-$n.in f-a-1-$n.po"
+ cat <<EOF > f-a-1-$n.in
+dcgettext(${string});
+EOF
+ ${XGETTEXT} -L awk -o f-a-1-$n.po f-a-1-$n.in || exit 1
+ test -f f-a-1-$n.po || exit 1
+ fail=
+ if echo "$comment" | grep 'Valid:' > /dev/null; then
+ if grep awk-format f-a-1-$n.po > /dev/null; then
+ :
+ else
+ fail=yes
+ fi
+ else
+ if grep awk-format f-a-1-$n.po > /dev/null; then
+ fail=yes
+ else
+ :
+ fi
+ fi
+ if test -n "$fail"; then
+ echo "Format string recognition error:" 1>&2
+ cat f-a-1-$n.in 1>&2
+ echo "Got:" 1>&2
+ cat f-a-1-$n.po 1>&2
+ exit 1
+ fi
+done < f-a-1.data
+
+rm -fr $tmpfiles
+
+exit 0
--- /dev/null
+#! /bin/sh
+
+# Test checking of awk format strings.
+
+tmpfiles=""
+trap 'rm -fr $tmpfiles' 1 2 3 15
+
+tmpfiles="$tmpfiles f-a-2.data"
+cat <<\EOF > f-a-2.data
+# Valid: %% doesn't count
+msgid "abc%%def"
+msgstr "xyz"
+# Invalid: invalid msgstr
+msgid "abc%%def"
+msgstr "xyz%"
+# Valid: same arguments
+msgid "abc%s%gdef"
+msgstr "xyz%s%g"
+# Valid: same arguments, with different widths
+msgid "abc%2sdef"
+msgstr "xyz%3s"
+# Valid: same arguments but in numbered syntax
+msgid "abc%s%gdef"
+msgstr "xyz%1$s%2$g"
+# Valid: permutation
+msgid "abc%s%g%cdef"
+msgstr "xyz%3$c%2$g%1$s"
+# Invalid: too few arguments
+msgid "abc%2$udef%1$s"
+msgstr "xyz%1$s"
+# Invalid: too few arguments
+msgid "abc%sdef%u"
+msgstr "xyz%s"
+# Invalid: too many arguments
+msgid "abc%udef"
+msgstr "xyz%uvw%c"
+# Valid: same numbered arguments, with different widths
+msgid "abc%2$5s%1$4s"
+msgstr "xyz%2$4s%1$5s"
+# Invalid: missing argument
+msgid "abc%2$sdef%1$u"
+msgstr "xyz%1$u"
+# Invalid: missing argument
+msgid "abc%1$sdef%2$u"
+msgstr "xyz%2$u"
+# Invalid: added argument
+msgid "abc%1$udef"
+msgstr "xyz%1$uvw%2$c"
+# Valid: type compatibility
+msgid "abc%i"
+msgstr "xyz%d"
+# Valid: type compatibility
+msgid "abc%o"
+msgstr "xyz%u"
+# Valid: type compatibility
+msgid "abc%u"
+msgstr "xyz%x"
+# Valid: type compatibility
+msgid "abc%u"
+msgstr "xyz%X"
+# Valid: type compatibility
+msgid "abc%e"
+msgstr "xyz%E"
+# Valid: type compatibility
+msgid "abc%e"
+msgstr "xyz%f"
+# Valid: type compatibility
+msgid "abc%e"
+msgstr "xyz%g"
+# Valid: type compatibility
+msgid "abc%e"
+msgstr "xyz%G"
+# Invalid: type incompatibility
+msgid "abc%c"
+msgstr "xyz%s"
+# Invalid: type incompatibility
+msgid "abc%c"
+msgstr "xyz%i"
+# Invalid: type incompatibility
+msgid "abc%c"
+msgstr "xyz%o"
+# Invalid: type incompatibility
+msgid "abc%c"
+msgstr "xyz%e"
+# Invalid: type incompatibility
+msgid "abc%s"
+msgstr "xyz%i"
+# Invalid: type incompatibility
+msgid "abc%s"
+msgstr "xyz%o"
+# Invalid: type incompatibility
+msgid "abc%s"
+msgstr "xyz%e"
+# Invalid: type incompatibility
+msgid "abc%i"
+msgstr "xyz%o"
+# Invalid: type incompatibility
+msgid "abc%i"
+msgstr "xyz%e"
+# Invalid: type incompatibility
+msgid "abc%u"
+msgstr "xyz%e"
+# Invalid: type incompatibility for width
+msgid "abc%g%*g"
+msgstr "xyz%*g%g"
+EOF
+
+: ${MSGFMT=msgfmt}
+n=0
+while read comment; do
+ read msgid_line
+ read msgstr_line
+ n=`expr $n + 1`
+ tmpfiles="$tmpfiles f-a-2-$n.po f-a-2-$n.mo"
+ cat <<EOF > f-a-2-$n.po
+#, awk-format
+${msgid_line}
+${msgstr_line}
+EOF
+ fail=
+ if echo "$comment" | grep 'Valid:' > /dev/null; then
+ if ${MSGFMT} --check-format -o f-a-2-$n.mo f-a-2-$n.po; then
+ :
+ else
+ fail=yes
+ fi
+ else
+ ${MSGFMT} --check-format -o f-a-2-$n.mo f-a-2-$n.po 2> /dev/null
+ if test $? = 1; then
+ :
+ else
+ fail=yes
+ fi
+ fi
+ if test -n "$fail"; then
+ echo "Format string checking error:" 1>&2
+ cat f-a-2-$n.po 1>&2
+ exit 1
+ fi
+done < f-a-2.data
+
+rm -fr $tmpfiles
+
+exit 0
--- /dev/null
+#! /bin/sh
+
+# Test of gettext facilities in the GNU awk language.
+# Assumes an fr_FR locale is installed.
+# Assumes the following packages are installed: gawk.
+
+tmpfiles=""
+trap 'rm -fr $tmpfiles' 1 2 3 15
+
+tmpfiles="$tmpfiles prog.awk"
+cat <<\EOF > prog.awk
+BEGIN {
+ TEXTDOMAIN = "prog"
+ bindtextdomain ("./")
+
+ print _"'Your command, please?', asked the waiter."
+
+ printf dcngettext ("a piece of cake", "%d pieces of cake", n) "\n", n
+
+ printf _"%s is replaced by %s." "\n", "FF", "EUR"
+}
+EOF
+
+tmpfiles="$tmpfiles prog.pot"
+: ${XGETTEXT=xgettext}
+${XGETTEXT} -o prog.pot --omit-header --no-location prog.awk
+
+tmpfiles="$tmpfiles prog.ok"
+cat <<EOF > prog.ok
+msgid "'Your command, please?', asked the waiter."
+msgstr ""
+
+#, awk-format
+msgid "a piece of cake"
+msgid_plural "%d pieces of cake"
+msgstr[0] ""
+msgstr[1] ""
+
+#, awk-format
+msgid "%s is replaced by %s."
+msgstr ""
+EOF
+
+: ${DIFF=diff}
+${DIFF} prog.ok prog.pot || exit 1
+
+tmpfiles="$tmpfiles fr.po"
+cat <<\EOF > fr.po
+msgid ""
+msgstr ""
+"Content-Type: text/plain; charset=ISO-8859-1\n"
+"Plural-Forms: nplurals=2; plural=(n > 1);\n"
+
+msgid "'Your command, please?', asked the waiter."
+msgstr "«Votre commande, s'il vous plait», dit le garçon."
+
+# Les gateaux allemands sont les meilleurs du monde.
+#, awk-format
+msgid "a piece of cake"
+msgid_plural "%d pieces of cake"
+msgstr[0] "un morceau de gateau"
+msgstr[1] "%d morceaux de gateau"
+
+# Reverse the arguments.
+#, awk-format
+msgid "%s is replaced by %s."
+msgstr "%2$s remplace %1$s."
+EOF
+
+tmpfiles="$tmpfiles fr.po.new"
+: ${MSGMERGE=msgmerge}
+${MSGMERGE} -q -o fr.po.new fr.po prog.pot
+
+: ${DIFF=diff}
+${DIFF} fr.po fr.po.new || exit 1
+
+tmpfiles="$tmpfiles fr"
+test -d fr || mkdir fr
+test -d fr/LC_MESSAGES || mkdir fr/LC_MESSAGES
+
+: ${MSGFMT=msgfmt}
+${MSGFMT} -o fr/LC_MESSAGES/prog.mo fr.po
+
+tmpfiles="$tmpfiles prog.ok prog.out"
+: ${DIFF=diff}
+cat <<\EOF > prog.ok
+«Votre commande, s'il vous plait», dit le garçon.
+2 morceaux de gateau
+EUR remplace FF.
+EOF
+
+# Test for presence of gawk version 3.1.1 or newer.
+(gawk --version) >/dev/null 2>/dev/null \
+ || { echo "SKIP: lang-awk"; rm -fr $tmpfiles; exit 77; }
+case `gawk --version | sed -e 's/^[^0-9]*//'` in
+ 0.* | 1.* | 2.* | 3.0* | 3.1.0*)
+ echo "SKIP: lang-awk"; rm -fr $tmpfiles; exit 77;;
+esac
+
+LANGUAGE= LC_ALL=fr_FR gawk -v n=2 -f prog.awk > prog.out || exit 1
+${DIFF} prog.ok prog.out || exit 1
+
+rm -fr $tmpfiles
+
+exit 0