From: Bruno Haible Date: Tue, 12 Jun 2001 12:50:16 +0000 (+0000) Subject: New programs msgcat, msgconv, msgen, msggrep, msgsed. X-Git-Tag: v0.11~672 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=18bfb53d15e38e9e43d76a13caf6c3498412bf97;p=thirdparty%2Fgettext.git New programs msgcat, msgconv, msgen, msggrep, msgsed. --- diff --git a/ChangeLog b/ChangeLog index 8489fa3fb..03df4c289 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,7 @@ +2001-06-10 Bruno Haible + + * configure.in: Bump version number to 0.11. + 2001-06-10 Bruno Haible * configure.in: Check for ssize_t, pid_t, posix_spawn, select. diff --git a/configure.in b/configure.in index ee7c87c30..be26b049a 100644 --- a/configure.in +++ b/configure.in @@ -1,8 +1,8 @@ dnl Process this file with autoconf to produce a configure script. AC_PREREQ(2.13) -AC_REVISION($Revision: 1.30 $) +AC_REVISION($Revision: 1.31 $) AC_INIT(src/msgfmt.c) -AM_INIT_AUTOMAKE(gettext, 0.10.38) +AM_INIT_AUTOMAKE(gettext, 0.11) RELEASE_DATE=2001-05-23 dnl in "date +%Y-%m-%d" format AM_CONFIG_HEADER(config.h) diff --git a/po/ChangeLog b/po/ChangeLog index 532af9f77..3b1094a13 100644 --- a/po/ChangeLog +++ b/po/ChangeLog @@ -1,3 +1,8 @@ +2001-06-10 Bruno Haible + + * POTFILES.in: Add src/msgcat.c, src/msgconv.c, src/msgen.c, + src/msggrep.c, src/msgl-charset.c, src/msgl-iconv.c, src/msgsed.c. + 2001-06-10 Bruno Haible * POTFILES.in: Add lib/pipe-bidi.c, lib/pipe-in.c, lib/pipe-out.c, diff --git a/po/POTFILES.in b/po/POTFILES.in index 51dbcd52c..b8c171866 100644 --- a/po/POTFILES.in +++ b/po/POTFILES.in @@ -18,10 +18,17 @@ lib/xmalloc.c # Package source files src/gettext.c src/message.c +src/msgcat.c src/msgcmp.c src/msgcomm.c +src/msgconv.c +src/msgen.c src/msgfmt.c +src/msggrep.c +src/msgl-charset.c +src/msgl-iconv.c src/msgmerge.c +src/msgsed.c src/msgunfmt.c src/ngettext.c src/open-po.c diff --git a/src/ChangeLog b/src/ChangeLog index 0c5bc2dfe..1f243a59a 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,35 @@ +2001-06-10 Bruno Haible + + * message.h (message_ty): New field 'tmp'. + (message_predicate_ty): New type. + (message_list_remove_if_not): New declaration. + * message.c (message_list_delete_nth): Comment out. + (message_list_remove_if_not): New function. + * msgl-charset.h: New file. + * msgl-charset.c: New file. + * msgl-iconv.h: New file. + * msgl-iconv.c: New file. + * msgcat.c: New file. + * msgconv.c: New file. + * msgen.c: New file. + * msggrep.c: New file. + * msgsed.c: New file. + * msgcomm.c (default_domain, output_dir): Remove variables. + (long_options): Remove --default-domain and --output-dir. Add + --output-file. + (main): Don't accept -a, -C, -d, -k, -l, -L, -m, -M, -p, -T, -x. + Use output_file instead of complexly computed file_name. + Use O(n) loop instead of O(n^2) loop for removing messages. + (usage): Don't document --default-domain and --output-dir. + (is_message_selected): New function. + * Makefile.am (bin_PROGRAMS): Add msgcat, msgconv, msgen, msggrep, + msgsed. + (noinst_HEADERS): Add msgl-charset.h, msgl-iconv.h. + (msgcat_SOURCES, msgconv_SOURCES, msgen_SOURCES, msggrep_SOURCES, + msgsed_SOURCES): New variables. + (msgcat_LDADD, msgconv_LDADD, msgen_LDADD, msggrep_LDADD, + msgsed_LDADD): New variables. + 2001-06-10 Bruno Haible * message.h (struct message_variant_ty): Remove type. diff --git a/src/Makefile.am b/src/Makefile.am index a4be1cd0e..482fd477c 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -19,11 +19,13 @@ AUTOMAKE_OPTIONS = 1.2 gnits -bin_PROGRAMS = gettext ngettext msgcmp msgfmt msgmerge msgunfmt xgettext msgcomm +bin_PROGRAMS = gettext ngettext \ +msgcmp msgfmt msgmerge msgunfmt xgettext \ +msgcat msgcomm msgconv msgen msggrep msgsed noinst_HEADERS = pos.h message.h po-gram.h po-hash.h po-charset.h po-lex.h \ po.h open-po.h read-po.h str-list.h write-po.h xget-lex.h dir-list.h \ -po-gram-gen.h po-hash-gen.h +po-gram-gen.h po-hash-gen.h msgl-charset.h msgl-iconv.h EXTRA_DIST = FILES @@ -50,8 +52,22 @@ msgunfmt_SOURCES = message.c msgunfmt.c str-list.c write-po.c xgettext_SOURCES = message.c open-po.c po-gram-gen.y po-hash-gen.y \ po-charset.c po-lex.c po.c str-list.c xget-lex.c xgettext.c dir-list.c \ write-po.c +msgcat_SOURCES = msgcat.c message.c open-po.c po-gram-gen.y po-hash-gen.y \ +po-charset.c po-lex.c po.c read-po.c str-list.c dir-list.c write-po.c \ +msgl-iconv.c msgcomm_SOURCES = msgcomm.c message.c po-gram-gen.y po-hash-gen.y \ po-charset.c po-lex.c open-po.c po.c str-list.c dir-list.c write-po.c +msgconv_SOURCES = msgconv.c message.c open-po.c po-gram-gen.y po-hash-gen.y \ +po-charset.c po-lex.c po.c read-po.c str-list.c dir-list.c write-po.c \ +msgl-iconv.c +msgen_SOURCES = msgen.c message.c open-po.c po-gram-gen.y po-hash-gen.y \ +po-charset.c po-lex.c po.c read-po.c str-list.c dir-list.c write-po.c +msggrep_SOURCES = msggrep.c message.c open-po.c po-gram-gen.y po-hash-gen.y \ +po-charset.c po-lex.c po.c read-po.c str-list.c dir-list.c write-po.c \ +msgl-charset.c +msgsed_SOURCES = msgsed.c message.c open-po.c po-gram-gen.y po-hash-gen.y \ +po-charset.c po-lex.c po.c read-po.c str-list.c dir-list.c write-po.c \ +msgl-charset.c # Link dependencies. # po-lex.c and po.c may need -liconv. @@ -61,7 +77,12 @@ msgfmt_LDADD = ../lib/libnlsut.a @INTLLIBS@ @LIBICONV@ msgmerge_LDADD = ../lib/libnlsut.a @INTLLIBS@ @LIBICONV@ msgunfmt_LDADD = ../lib/libnlsut.a @INTLLIBS@ @LIBICONV@ xgettext_LDADD = ../lib/libnlsut.a @INTLLIBS@ @LIBICONV@ +msgcat_LDADD = ../lib/libnlsut.a @INTLLIBS@ @LIBICONV@ msgcomm_LDADD = ../lib/libnlsut.a @INTLLIBS@ @LIBICONV@ +msgconv_LDADD = ../lib/libnlsut.a @INTLLIBS@ @LIBICONV@ +msgen_LDADD = ../lib/libnlsut.a @INTLLIBS@ @LIBICONV@ +msggrep_LDADD = ../lib/libnlsut.a @INTLLIBS@ @LIBICONV@ +msgsed_LDADD = ../lib/libnlsut.a @INTLLIBS@ @LIBICONV@ BUILT_SOURCES = po-gram-gen.c po-hash-gen.c po-gram-gen.h po-hash-gen.h diff --git a/src/message.c b/src/message.c index e2cd57c1d..79e31f425 100644 --- a/src/message.c +++ b/src/message.c @@ -510,6 +510,7 @@ message_list_prepend (mlp, mp) } +#if 0 /* unused */ void message_list_delete_nth (mlp, n) message_list_ty *mlp; @@ -524,6 +525,21 @@ message_list_delete_nth (mlp, n) mlp->item[j - 1] = mlp->item[j]; mlp->nitems--; } +#endif + + +void +message_list_remove_if_not (mlp, predicate) + message_list_ty *mlp; + message_predicate_ty *predicate; +{ + size_t i, j; + + for (j = 0, i = 0; j < mlp->nitems; j++) + if (predicate (mlp->item[j])) + mlp->item[i++] = mlp->item[j]; + mlp->nitems = i; +} message_ty * diff --git a/src/message.h b/src/message.h index c591df217..bee267a9e 100644 --- a/src/message.h +++ b/src/message.h @@ -96,13 +96,16 @@ struct message_ty /* Do we want the string to be wrapped in the emitted PO file? */ enum is_wrap do_wrap; - /* Used for checking that messages have been used, in the msgcmp, - msgmerge and msgcomm programs. */ - int used; - /* If set the message is obsolete and while writing out it should be commented out. */ int obsolete; + + /* Used for checking that messages have been used, in the msgcmp, + msgmerge, msgcomm and msgcat programs. */ + int used; + + /* Used for looking up the target message, in the msgcat program. */ + message_ty *tmp; }; extern message_ty * @@ -143,6 +146,10 @@ extern void message_list_prepend PARAMS ((message_list_ty *mlp, message_ty *mp)); extern void message_list_delete_nth PARAMS ((message_list_ty *mlp, size_t n)); +typedef int message_predicate_ty PARAMS ((const message_ty *mp)); +extern void + message_list_remove_if_not PARAMS ((message_list_ty *mlp, + message_predicate_ty *predicate)); extern message_ty * message_list_search PARAMS ((message_list_ty *mlp, const char *msgid)); extern message_ty * diff --git a/src/msgcat.c b/src/msgcat.c new file mode 100644 index 000000000..260f59844 --- /dev/null +++ b/src/msgcat.c @@ -0,0 +1,904 @@ +/* Concatenates several translation catalogs. + Copyright (C) 2001 Free Software Foundation, Inc. + Written by Bruno Haible , 2001. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include +#include +#include +#include +#include +#include + +#include "dir-list.h" +#include "error.h" +#include "xerror.h" +#include "progname.h" +#include "message.h" +#include "read-po.h" +#include "write-po.h" +#include "po-charset.h" +#include "msgl-iconv.h" +#include "system.h" +#include "libgettext.h" + +#define _(str) gettext (str) + + +/* Force output of PO file even if empty. */ +static int force_po; + +/* Target encoding. */ +static const char *to_code; + +/* These variables control which messages are selected. */ +static int more_than = 0; +static int less_than = INT_MAX; + +/* If true, use the first available translation. + If false, merge all available translations into one and fuzzy it. */ +static int use_first; + +/* Long options. */ +static const struct option long_options[] = +{ + { "add-location", no_argument, &line_comment, 1 }, + { "directory", required_argument, NULL, 'D' }, + { "escape", no_argument, NULL, 'E' }, + { "files-from", required_argument, NULL, 'f' }, + { "force-po", no_argument, &force_po, 1 }, + { "help", no_argument, NULL, 'h' }, + { "indent", no_argument, NULL, 'i' }, + { "no-escape", no_argument, NULL, 'e' }, + { "no-location", no_argument, &line_comment, 0 }, + { "output-file", required_argument, NULL, 'o' }, + { "sort-by-file", no_argument, NULL, 'F' }, + { "sort-output", no_argument, NULL, 's' }, + { "strict", no_argument, NULL, 'S' }, + { "to-code", required_argument, NULL, 't' }, + { "unique", no_argument, NULL, 'u' }, + { "use-first", no_argument, &use_first, 1 }, + { "version", no_argument, NULL, 'V' }, + { "width", required_argument, NULL, 'w', }, + { "more-than", required_argument, NULL, '>', }, + { "less-than", required_argument, NULL, '<', }, + { NULL, 0, NULL, 0 } +}; + + +/* Prototypes for local functions. */ +static void usage PARAMS ((int status)); +static string_list_ty *read_name_from_file PARAMS ((const char *file_name)); +static int is_message_selected PARAMS ((const message_ty *tmp)); +static int is_message_needed PARAMS ((const message_ty *tmp)); +static int is_message_first_needed PARAMS ((const message_ty *tmp)); +static msgdomain_list_ty * + catenate_msgdomain_list PARAMS ((string_list_ty *file_list, + const char *to_code)); + + +int +main (argc, argv) + int argc; + char **argv; +{ + int cnt; + int optchar; + int do_help; + int do_version; + char *output_file; + const char *files_from; + string_list_ty *file_list; + msgdomain_list_ty *result; + int sort_by_msgid = 0; + int sort_by_filepos = 0; + + /* Set program name for messages. */ + program_name = argv[0]; + error_print_progname = maybe_print_progname; + +#ifdef HAVE_SETLOCALE + /* Set locale via LC_ALL. */ + setlocale (LC_ALL, ""); +#endif + + /* Set the text message domain. */ + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + /* Set default values for variables. */ + do_help = 0; + do_version = 0; + output_file = NULL; + files_from = NULL; + + while ((optchar = getopt_long (argc, argv, "<:>:D:eEf:Fhino:st:uVw:", + long_options, NULL)) != EOF) + switch (optchar) + { + case '\0': /* Long option. */ + break; + + case '>': + { + int value; + char *endp; + value = strtol (optarg, &endp, 10); + if (endp != optarg) + more_than = value; + } + break; + + case '<': + { + int value; + char *endp; + value = strtol (optarg, &endp, 10); + if (endp != optarg) + less_than = value; + } + break; + + case 'D': + dir_list_append (optarg); + break; + + case 'e': + message_print_style_escape (0); + break; + + case 'E': + message_print_style_escape (1); + break; + + case 'f': + files_from = optarg; + break; + + case 'F': + sort_by_filepos = 1; + break; + + case 'h': + do_help = 1; + break; + + case 'i': + message_print_style_indent (); + break; + + case 'n': + line_comment = 1; + break; + + case 'o': + output_file = optarg; + break; + + case 's': + sort_by_msgid = 1; + break; + + case 'S': + message_print_style_uniforum (); + break; + + case 't': + to_code = optarg; + break; + + case 'u': + less_than = 2; + break; + + case 'V': + do_version = 1; + break; + + case 'w': + { + int value; + char *endp; + value = strtol (optarg, &endp, 10); + if (endp != optarg) + message_page_width_set (value); + } + break; + + default: + usage (EXIT_FAILURE); + /* NOTREACHED */ + } + + /* Verify selected options. */ + if (!line_comment && sort_by_filepos) + error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"), + "--no-location", "--sort-by-file"); + + if (sort_by_msgid && sort_by_filepos) + error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"), + "--sort-output", "--sort-by-file"); + + /* Version information requested. */ + if (do_version) + { + printf ("%s (GNU %s) %s\n", basename (program_name), PACKAGE, VERSION); + /* xgettext: no-wrap */ + printf (_("Copyright (C) %s Free Software Foundation, Inc.\n\ +This is free software; see the source for copying conditions. There is NO\n\ +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\ +"), + "2001"); + printf (_("Written by %s.\n"), "Bruno Haible"); + exit (EXIT_SUCCESS); + } + + /* Help is requested. */ + if (do_help) + usage (EXIT_SUCCESS); + + /* Determine list of files we have to process. */ + if (files_from != NULL) + file_list = read_name_from_file (files_from); + else + file_list = string_list_alloc (); + /* Append names from command line. */ + for (cnt = optind; cnt < argc; ++cnt) + string_list_append_unique (file_list, argv[cnt]); + + /* Check the message selection criteria for sanity. */ + if (more_than >= less_than || less_than < 2) + error (EXIT_FAILURE, 0, + _("impossible selection criteria specified (%d < n < %d)"), + more_than, less_than); + + /* Read input files, then filter, convert and merge messages. */ + result = catenate_msgdomain_list (file_list, to_code); + + string_list_free (file_list); + + /* Sorting the list of messages. */ + if (sort_by_filepos) + msgdomain_list_sort_by_filepos (result); + else if (sort_by_msgid) + msgdomain_list_sort_by_msgid (result); + + /* Write the PO file. */ + msgdomain_list_print (result, output_file, force_po, 0); + + exit (EXIT_SUCCESS); +} + + +/* Display usage information and exit. */ +static void +usage (status) + int status; +{ + if (status != EXIT_SUCCESS) + fprintf (stderr, _("Try `%s --help' for more information.\n"), + program_name); + else + { + /* xgettext: no-wrap */ + printf (_("\ +Usage: %s [OPTION] [INPUTFILE]...\n\ +"), program_name); + printf ("\n"); + /* xgettext: no-wrap */ + printf (_("\ +Concatenates and merges the specified PO files.\n\ +Find messages which are common to two or more of the specified PO files.\n\ +By using the --more-than option, greater commonality may be requested\n\ +before messages are printed. Conversely, the --less-than option may be\n\ +used to specify less commonality before messages are printed (i.e.\n\ +--less-than=2 will only print the unique messages). Translations,\n\ +comments and extract comments will be cumulated, except that if --use-first\n\ +is specified, they will be taken from the first PO file to define them.\n\ +File positions from all PO files will be cumulated.\n\ +")); + printf ("\n"); + /* xgettext: no-wrap */ + printf (_("\ +Mandatory arguments to long options are mandatory for short options too.\n\ +")); + printf ("\n"); + /* xgettext: no-wrap */ + printf (_("\ +Input file location:\n\ + INPUTFILE ... input files\n\ + -f, --files-from=FILE get list of input files from FILE\n\ + -D, --directory=DIRECTORY add DIRECTORY to list for input files search\n\ +If input file is -, standard input is read.\n\ +")); + printf ("\n"); + /* xgettext: no-wrap */ + printf (_("\ +Output file location:\n\ + -o, --output-file=FILE write output to specified file\n\ +The results are written to standard output if no output file is specified\n\ +or if it is -.\n\ +")); + printf ("\n"); + /* xgettext: no-wrap */ + printf (_("\ +Message selection:\n\ + -<, --less-than=NUMBER print messages with less than this many\n\ + definitions, defaults to infinite if not\n\ + set\n\ + ->, --more-than=NUMBER print messages with more than this many\n\ + definitions, defaults to 1 if not set\n\ + -u, --unique shorthand for --less-than=2, requests\n\ + that only unique messages be printed\n\ +")); + printf ("\n"); + /* xgettext: no-wrap */ + printf (_("\ +Output details:\n\ + -t, --to-code=NAME encoding for output\n\ + --use-first use first available translation for each\n\ + message, don't merge several translations\n\ + -e, --no-escape do not use C escapes in output (default)\n\ + -E, --escape use C escapes in output, no extended chars\n\ + --force-po write PO file even if empty\n\ + -i, --indent write the .po file using indented style\n\ + --no-location do not write '#: filename:line' lines\n\ + -n, --add-location generate '#: filename:line' lines (default)\n\ + --strict write out strict Uniforum conforming .po file\n\ + -w, --width=NUMBER set output page width\n\ + -s, --sort-output generate sorted output and remove duplicates\n\ + -F, --sort-by-file sort output by file location\n\ +")); + printf ("\n"); + /* xgettext: no-wrap */ + printf (_("\ +Informative output:\n\ + -h, --help display this help and exit\n\ + -V, --version output version information and exit\n\ +")); + printf ("\n"); + fputs (_("Report bugs to .\n"), + stdout); + } + + exit (status); +} + + +/* Read list of files to process from file. */ +static string_list_ty * +read_name_from_file (file_name) + const char *file_name; +{ + size_t line_len = 0; + char *line_buf = NULL; + FILE *fp; + string_list_ty *result; + + if (strcmp (file_name, "-") == 0) + fp = stdin; + else + { + fp = fopen (file_name, "r"); + if (fp == NULL) + error (EXIT_FAILURE, errno, + _("error while opening \"%s\" for reading"), file_name); + } + + result = string_list_alloc (); + + while (!feof (fp)) + { + /* Read next line from file. */ + int len = getline (&line_buf, &line_len, fp); + + /* In case of an error leave loop. */ + if (len < 0) + break; + + /* Remove trailing '\n' and trailing whitespace. */ + if (len > 0 && line_buf[len - 1] == '\n') + line_buf[--len] = '\0'; + while (len > 0 + && (line_buf[len - 1] == ' ' + || line_buf[len - 1] == '\t' + || line_buf[len - 1] == '\r')) + line_buf[--len] = '\0'; + + /* Test if we have to ignore the line. */ + if (*line_buf == '\0' || *line_buf == '#') + continue; + + string_list_append_unique (result, line_buf); + } + + /* Free buffer allocated through getline. */ + if (line_buf != NULL) + free (line_buf); + + /* Close input stream. */ + if (fp != stdin) + fclose (fp); + + return result; +} + + +static int +is_message_selected (tmp) + const message_ty *tmp; +{ + int used = (tmp->used >= 0 ? tmp->used : - tmp->used); + + /* FIXME: Add tmp->msgid[0] == '\0' || ... here? */ + return (used > more_than && used < less_than); +} + + +static int +is_message_needed (mp) + const message_ty *mp; +{ + if ((mp->msgid[0] != '\0' && mp->is_fuzzy) || mp->msgstr[0] == '\0') + /* Weak translation. Needed if there are only weak translations. */ + return mp->tmp->used < 0 && is_message_selected (mp->tmp); + else + /* Good translation. */ + return is_message_selected (mp->tmp); +} + + +/* The use_first logic. */ +static int +is_message_first_needed (mp) + const message_ty *mp; +{ + if (mp->tmp->obsolete && is_message_needed (mp)) + { + mp->tmp->obsolete = 0; + return 1; + } + else + return 0; +} + + +static msgdomain_list_ty * +catenate_msgdomain_list (file_list, to_code) + string_list_ty *file_list; + const char *to_code; +{ + const char * const *files = file_list->item; + size_t nfiles = file_list->nitems; + msgdomain_list_ty **mdlps; + const char ***canon_charsets; + const char ***identifications; + msgdomain_list_ty *total_mdlp; + const char *canon_to_code; + size_t n, j, k; + + /* Read input files. */ + mdlps = + (msgdomain_list_ty **) xmalloc (nfiles * sizeof (msgdomain_list_ty *)); + for (n = 0; n < nfiles; n++) + mdlps[n] = read_po_file (files[n]); + + /* Determine the canonical name of each input file's encoding. */ + canon_charsets = (const char ***) xmalloc (nfiles * sizeof (const char **)); + for (n = 0; n < nfiles; n++) + { + msgdomain_list_ty *mdlp = mdlps[n]; + size_t k; + + canon_charsets[n] = + (const char **) xmalloc (mdlp->nitems * sizeof (const char *)); + for (k = 0; k < mdlp->nitems; k++) + { + message_list_ty *mlp = mdlp->item[k]->messages; + const char *canon_from_code = NULL; + + if (mlp->nitems > 0) + { + for (j = 0; j < mlp->nitems; j++) + if (mlp->item[j]->msgid[0] == '\0' && mlp->item[j]->obsolete == 0) + { + const char *header = mlp->item[j]->msgstr; + + if (header != NULL) + { + const char *charsetstr = strstr (header, "charset="); + + if (charsetstr != NULL) + { + size_t len; + char *charset; + const char *canon_charset; + + charsetstr += strlen ("charset="); + len = strcspn (charsetstr, " \t\n"); + charset = (char *) alloca (len + 1); + memcpy (charset, charsetstr, len); + charset[len] = '\0'; + + canon_charset = po_charset_canonicalize (charset); + if (canon_charset == NULL) + error (EXIT_FAILURE, 0, + _("\ +present charset \"%s\" is not a portable encoding name"), + charset); + + if (canon_from_code == NULL) + canon_from_code = canon_charset; + else if (canon_from_code != canon_charset) + error (EXIT_FAILURE, 0, + _("\ +two different charsets \"%s\" and \"%s\" in input file"), + canon_from_code, canon_charset); + } + } + } + if (canon_from_code == NULL) + { + if (k == 0) + error (EXIT_FAILURE, 0, _("\ +input file `%s' doesn't contain a header entry with a charset specification"), + files[n]); + else + error (EXIT_FAILURE, 0, _("\ +domain \"%s\" in input file `%s' doesn't contain a header entry with a charset specification"), + mdlp->item[k]->domain, files[n]); + } + } + canon_charsets[n][k] = canon_from_code; + } + } + + /* Determine textual identifications of each file/domain combination. */ + identifications = (const char ***) xmalloc (nfiles * sizeof (const char **)); + for (n = 0; n < nfiles; n++) + { + const char *filename = basename (files[n]); + msgdomain_list_ty *mdlp = mdlps[n]; + size_t k; + + identifications[n] = + (const char **) xmalloc (mdlp->nitems * sizeof (const char *)); + for (k = 0; k < mdlp->nitems; k++) + { + const char *domain = mdlp->item[k]->domain; + message_list_ty *mlp = mdlp->item[k]->messages; + char *project_id = NULL; + + for (j = 0; j < mlp->nitems; j++) + if (mlp->item[j]->msgid[0] == '\0' && mlp->item[j]->obsolete == 0) + { + const char *header = mlp->item[j]->msgstr; + + if (header != NULL) + { + const char *cp = strstr (header, "Project-Id-Version:"); + + if (cp != NULL) + { + const char *endp; + + cp += sizeof ("Project-Id-Version:") - 1; + + endp = strchr (cp, '\n'); + if (endp == NULL) + endp = cp + strlen (cp); + + while (cp < endp && *cp == ' ') + cp++; + + if (cp < endp) + { + size_t len = endp - cp; + project_id = (char *) xmalloc (len + 1); + memcpy (project_id, cp, len); + project_id[len] = '\0'; + } + break; + } + } + } + + identifications[n][k] = + (project_id != NULL + ? (k > 0 ? xasprintf ("%s:%s (%s)", filename, domain, project_id) + : xasprintf ("%s (%s)", filename, project_id)) + : (k > 0 ? xasprintf ("%s:%s", filename, domain) + : xasprintf ("%s", filename))); + } + } + + /* Create list of resulting messages, but don't fill it. Only count + the number of translations for each message. + If for a message, there is at least one non-fuzzy, non-empty translation, + use only the non-fuzzy, non-empty translations. Otherwise use the + fuzzy or empty translations as well. */ + total_mdlp = msgdomain_list_alloc (); + for (n = 0; n < nfiles; n++) + { + msgdomain_list_ty *mdlp = mdlps[n]; + + for (k = 0; k < mdlp->nitems; k++) + { + const char *domain = mdlp->item[k]->domain; + message_list_ty *mlp = mdlp->item[k]->messages; + message_list_ty *total_mlp; + + total_mlp = msgdomain_list_sublist (total_mdlp, domain, 1); + + for (j = 0; j < mlp->nitems; j++) + { + message_ty *mp = mlp->item[j]; + message_ty *tmp; + + tmp = message_list_search (total_mlp, mp->msgid); + if (tmp == NULL) + { + tmp = message_alloc (mp->msgid, mp->msgid_plural, NULL, 0, + &mp->pos); + tmp->is_c_format = 0; /* may be set to 1 later */ + tmp->do_wrap = 1; /* may be set to 0 later */ + tmp->obsolete = 1; /* may be set to 0 later */ + message_list_append (total_mlp, tmp); + } + + if ((mp->msgid[0] != '\0' && mp->is_fuzzy) + || mp->msgstr[0] == '\0') + /* Weak translation. Counted as negative tmp->used. */ + { + if (tmp->used <= 0) + tmp->used--; + } + else + /* Good translation. Counted as positive tmp->used. */ + { + if (tmp->used < 0) + tmp->used = 0; + tmp->used++; + } + mp->tmp = tmp; + } + } + } + + /* Remove messages that are not used and need not be converted. */ + for (n = 0; n < nfiles; n++) + { + msgdomain_list_ty *mdlp = mdlps[n]; + + for (k = 0; k < mdlp->nitems; k++) + { + message_list_ty *mlp = mdlp->item[k]->messages; + + message_list_remove_if_not (mlp, + use_first + ? is_message_first_needed + : is_message_needed); + + /* If no messages are remaining, drop the charset. */ + if (mlp->nitems == 0) + canon_charsets[n][k] = NULL; + } + } + for (k = 0; k < total_mdlp->nitems; k++) + { + message_list_ty *mlp = total_mdlp->item[k]->messages; + + message_list_remove_if_not (mlp, is_message_selected); + } + + /* Determine the target encoding for the remaining messages. */ + if (to_code != NULL) + { + /* Canonicalize target encoding. */ + canon_to_code = po_charset_canonicalize (to_code); + if (canon_to_code == NULL) + error (EXIT_FAILURE, 0, + _("target charset \"%s\" is not a portable encoding name."), + to_code); + } + else + { + /* No target encoding was specified. Test whether the messages are + all in a single encoding. If so, conversion is not needed. */ + const char *first = NULL; + const char *second = NULL; + int with_UTF8 = 0; + + for (n = 0; n < nfiles; n++) + { + msgdomain_list_ty *mdlp = mdlps[n]; + + for (k = 0; k < mdlp->nitems; k++) + if (canon_charsets[n][k] != NULL) + { + if (first == NULL) + first = canon_charsets[n][k]; + else if (canon_charsets[n][k] != first && second == NULL) + second = canon_charsets[n][k]; + + if (strcmp (canon_charsets[n][k], "UTF-8") == 0) + with_UTF8 = 1; + } + } + + if (second != NULL) + { + /* A conversion is needed. Warn the user since he hasn't asked + for it and might be surprised. */ + if (with_UTF8) + multiline_warning (xasprintf (_("warning: ")), + xasprintf (_("\ +Input files contain messages in different encodings, UTF-8 among others.\n\ +Converting the output to UTF-8.\n\ +"))); + else + multiline_warning (xasprintf (_("warning: ")), + xasprintf (_("\ +Input files contain messages in different encodings, %s and %s among others.\n\ +Converting the output to UTF-8.\n\ +To select a different output encoding, use the --to-code option.\n\ +"), first, second)); + canon_to_code = po_charset_canonicalize ("UTF-8"); + } + else + { + /* No conversion needed. */ + canon_to_code = NULL; + } + } + + /* Now convert the remaining messages to to_code. */ + if (canon_to_code != NULL) + for (n = 0; n < nfiles; n++) + { + msgdomain_list_ty *mdlp = mdlps[n]; + + for (k = 0; k < mdlp->nitems; k++) + if (canon_charsets[n][k] != NULL) + iconv_message_list (mdlp->item[k]->messages, canon_to_code); + } + + /* Fill the resulting messages. */ + for (n = 0; n < nfiles; n++) + { + msgdomain_list_ty *mdlp = mdlps[n]; + + for (k = 0; k < mdlp->nitems; k++) + { + message_list_ty *mlp = mdlp->item[k]->messages; + + for (j = 0; j < mlp->nitems; j++) + { + message_ty *mp = mlp->item[j]; + message_ty *tmp = mp->tmp; + size_t i; + + /* No need to discard unneeded weak translations here; + they have already been filtered out above. */ + if (use_first || tmp->used == 1 || tmp->used == -1) + { + /* Copy mp, as only message, into tmp. */ + tmp->msgstr = mp->msgstr; + tmp->msgstr_len = mp->msgstr_len; + tmp->pos = mp->pos; + if (mp->comment) + for (i = 0; i < mp->comment->nitems; i++) + message_comment_append (tmp, mp->comment->item[i]); + if (mp->comment_dot) + for (i = 0; i < mp->comment_dot->nitems; i++) + message_comment_dot_append (tmp, + mp->comment_dot->item[i]); + for (i = 0; i < mp->filepos_count; i++) + message_comment_filepos (tmp, mp->filepos[i].file_name, + mp->filepos[i].line_number); + tmp->is_fuzzy = mp->is_fuzzy; + tmp->is_c_format = mp->is_c_format; + tmp->do_wrap = mp->do_wrap; + tmp->obsolete = mp->obsolete; + } + else + { + /* Copy mp, among others, into tmp. */ + char *id = xasprintf ("#-#-#-#-# %s #-#-#-#-#", + identifications[n][k]); + size_t id_len = strlen (id); + const char *p; + const char *p_end; + const char *tp; + const char *tp_end; + char *new_msgstr; + size_t len; + char *np; + + if (tmp->msgstr == NULL) + tmp->pos = mp->pos; + + p_end = mp->msgstr + mp->msgstr_len; + tp_end = tmp->msgstr + tmp->msgstr_len; + len = mp->msgstr_len + tmp->msgstr_len; + for (p = mp->msgstr; p < p_end; p += strlen (p) + 1) + len += id_len + 2; + new_msgstr = (char *) xmalloc (len); + for (tp = tmp->msgstr, p = mp->msgstr, np = new_msgstr; + tp < tp_end || p < p_end; ) + { + if (tp < tp_end) + { + len = strlen (tp); + memcpy (np, tp, len); + np += len; + tp += len + 1; + } + if (p < p_end) + { + if (np > new_msgstr && np[-1] != '\0' + && np[-1] != '\n') + *np++ = '\n'; + memcpy (np, id, id_len); + np += id_len; + *np++ = '\n'; + len = strlen (p); + memcpy (np, p, len); + np += len; + p += len + 1; + } + *np++ = '\0'; + } + if (tmp->msgstr != NULL) + free ((char *) tmp->msgstr); + tmp->msgstr = new_msgstr; + tmp->msgstr_len = np - new_msgstr; + + if (mp->comment) + { + message_comment_append (tmp, id); + for (i = 0; i < mp->comment->nitems; i++) + message_comment_append (tmp, mp->comment->item[i]); + } + if (mp->comment_dot) + { + message_comment_dot_append (tmp, id); + for (i = 0; i < mp->comment_dot->nitems; i++) + message_comment_dot_append (tmp, + mp->comment_dot->item[i]); + } + for (i = 0; i < mp->filepos_count; i++) + message_comment_filepos (tmp, mp->filepos[i].file_name, + mp->filepos[i].line_number); + tmp->is_fuzzy = 1; + if (mp->is_c_format) + tmp->is_c_format = 1; + if (!mp->do_wrap) + tmp->do_wrap = 0; + if (!mp->obsolete) + tmp->obsolete = 0; + } + } + } + } + + return total_mdlp; +} diff --git a/src/msgcomm.c b/src/msgcomm.c index 33c3434e8..a7c4c0948 100644 --- a/src/msgcomm.c +++ b/src/msgcomm.c @@ -55,15 +55,9 @@ /* If nonzero add comments for file name and line number for each msgid. */ static int line_comment = 1; -/* Name of default domain file. If not set defaults to messages.po. */ -static const char *default_domain; - /* Force output of PO file even if empty. */ static int force_po; -/* Directory in which output files are created. */ -static char *output_dir; - /* If nonzero omit header with information about this run. */ static int omit_header; @@ -75,7 +69,6 @@ static int less_than = -1; static const struct option long_options[] = { { "add-location", no_argument, &line_comment, 1 }, - { "default-domain", required_argument, NULL, 'd' }, { "directory", required_argument, NULL, 'D' }, { "escape", no_argument, NULL, 'E' }, { "files-from", required_argument, NULL, 'f' }, @@ -85,8 +78,8 @@ static const struct option long_options[] = { "no-escape", no_argument, NULL, 'e' }, { "no-location", no_argument, &line_comment, 0 }, { "omit-header", no_argument, &omit_header, 1 }, - { "output", required_argument, NULL, 'o' }, - { "output-dir", required_argument, NULL, 'p' }, + { "output", required_argument, NULL, 'o' }, /* for backward compatibility */ + { "output-file", required_argument, NULL, 'o' }, { "sort-by-file", no_argument, NULL, 'F' }, { "sort-output", no_argument, NULL, 's' }, { "strict", no_argument, NULL, 'S' }, @@ -106,6 +99,7 @@ static void usage PARAMS ((int status)) #endif ; static string_list_ty *read_name_from_file PARAMS ((const char *file_name)); +static int is_message_selected PARAMS ((const message_ty *mp)); static void extract_constructor PARAMS ((po_ty *that)); static void extract_directive_domain PARAMS ((po_ty *that, char *name)); static void extract_directive_message PARAMS ((po_ty *that, char *msgid, @@ -137,11 +131,9 @@ main (argc, argv) msgdomain_list_ty *result; int sort_by_msgid = 0; int sort_by_filepos = 0; - const char *file_name; const char *files_from = NULL; string_list_ty *file_list; char *output_file = NULL; - size_t j; /* Set program name for messages. */ set_program_name (argv[0]); @@ -156,11 +148,7 @@ main (argc, argv) bindtextdomain (PACKAGE, LOCALEDIR); textdomain (PACKAGE); - /* Set initial value of variables. */ - default_domain = MESSAGE_DOMAIN_DEFAULT; - - while ((optchar = getopt_long (argc, argv, - "<:>:aCd:D:eEf:Fhik::l:L:m::M::no:p:sTuVw:x:", + while ((optchar = getopt_long (argc, argv, "<:>:D:eEf:Fhino:suVw:", long_options, NULL)) != EOF) switch (optchar) { @@ -184,9 +172,6 @@ main (argc, argv) less_than = value; } break; - case 'd': - default_domain = optarg; - break; case 'D': dir_list_append (optarg); break; @@ -214,25 +199,6 @@ main (argc, argv) case 'o': output_file = optarg; break; - case 'p': - { - size_t len = strlen (optarg); - - if (output_dir != NULL) - free (output_dir); - - if (optarg[len - 1] == '/') - output_dir = xstrdup (optarg); - else - { - asprintf (&output_dir, "%s/", optarg); - if (output_dir == NULL) - /* We are about to construct the absolute path to the - directory for the output files but asprintf failed. */ - error (EXIT_FAILURE, errno, _("while preparing output")); - } - } - break; case 's': sort_by_msgid = 1; break; @@ -286,25 +252,6 @@ warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\ if (do_help) usage (EXIT_SUCCESS); - /* Default output directory is the current directory. */ - if (output_dir == NULL) - output_dir = "."; - - /* Construct the name of the output file. If the default domain has - the special name "-" we write to stdout. */ - if (output_file) - { - if (IS_ABSOLUTE_PATH (output_file) || strcmp (output_file, "-") == 0) - file_name = xstrdup (output_file); - else - /* Please do NOT add a .po suffix! */ - file_name = concatenated_pathname (output_dir, output_file, NULL); - } - else if (strcmp (default_domain, "-") == 0) - file_name = "-"; - else - file_name = concatenated_pathname (output_dir, default_domain, ".po"); - /* Determine list of files we have to process. */ if (files_from != NULL) file_list = read_name_from_file (files_from); @@ -341,17 +288,7 @@ warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\ string_list_free (file_list); /* Remove messages which do not fit the criteria. */ - j = 0; - while (j < mlp->nitems) - { - message_ty *mp; - - mp = mlp->item[j]; - if (mp->used > more_than && mp->used < less_than) - ++j; - else - message_list_delete_nth (mlp, j); - } + message_list_remove_if_not (mlp, is_message_selected); /* Sorting the list of messages. */ if (sort_by_filepos) @@ -360,7 +297,7 @@ warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\ msgdomain_list_sort_by_msgid (result); /* Write the PO file. */ - msgdomain_list_print (result, file_name, force_po, 0); + msgdomain_list_print (result, output_file, force_po, 0); exit (EXIT_SUCCESS); } @@ -410,10 +347,9 @@ If input file is -, standard input is read.\n\ /* xgettext: no-wrap */ printf (_("\ Output file location:\n\ - -d, --default-domain=NAME use NAME.po for output (instead of messages.po)\n\ - -o, --output=FILE write output to specified file\n\ - -p, --output-dir=DIR output files will be placed in directory DIR\n\ -If output file is -, output is written to standard output.\n\ + -o, --output-file=FILE write output to specified file\n\ +The results are written to standard output if no output file is specified\n\ +or if it is -.\n\ ")); printf ("\n"); /* xgettext: no-wrap */ @@ -518,6 +454,15 @@ read_name_from_file (file_name) } +static int +is_message_selected (mp) + const message_ty *mp; +{ + /* FIXME: Add mp->msgid[0] == '\0' || ... here? */ + return (mp->used > more_than && mp->used < less_than); +} + + typedef struct extract_class_ty extract_class_ty; struct extract_class_ty { diff --git a/src/msgconv.c b/src/msgconv.c new file mode 100644 index 000000000..4dfb316d3 --- /dev/null +++ b/src/msgconv.c @@ -0,0 +1,303 @@ +/* Converts a translation catalog to a different character encoding. + Copyright (C) 2001 Free Software Foundation, Inc. + Written by Bruno Haible , 2001. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include +#include +#include +#include + +#include "dir-list.h" +#include "error.h" +#include "progname.h" +#include "message.h" +#include "read-po.h" +#include "write-po.h" +#include "msgl-iconv.h" +#include "system.h" +#include "libgettext.h" + +#define _(str) gettext (str) + +extern const char * locale_charset PARAMS ((void)); + + +/* Force output of PO file even if empty. */ +static int force_po; + +/* Target encoding. */ +static const char *to_code; + +/* Long options. */ +static const struct option long_options[] = +{ + { "add-location", no_argument, &line_comment, 1 }, + { "directory", required_argument, NULL, 'D' }, + { "escape", no_argument, NULL, 'E' }, + { "force-po", no_argument, &force_po, 1 }, + { "help", no_argument, NULL, 'h' }, + { "indent", no_argument, NULL, 'i' }, + { "no-escape", no_argument, NULL, 'e' }, + { "no-location", no_argument, &line_comment, 0 }, + { "output-file", required_argument, NULL, 'o' }, + { "sort-by-file", no_argument, NULL, 'F' }, + { "sort-output", no_argument, NULL, 's' }, + { "strict", no_argument, NULL, 'S' }, + { "to-code", required_argument, NULL, 't' }, + { "version", no_argument, NULL, 'V' }, + { "width", required_argument, NULL, 'w', }, + { NULL, 0, NULL, 0 } +}; + + +/* Prototypes for local functions. */ +static void usage PARAMS ((int status)); + + +int +main (argc, argv) + int argc; + char **argv; +{ + int opt; + int do_help; + int do_version; + char *output_file; + const char *input_file; + msgdomain_list_ty *result; + int sort_by_filepos = 0; + int sort_by_msgid = 0; + + /* Set program name for messages. */ + program_name = argv[0]; + error_print_progname = maybe_print_progname; + +#ifdef HAVE_SETLOCALE + /* Set locale via LC_ALL. */ + setlocale (LC_ALL, ""); +#endif + + /* Set the text message domain. */ + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + /* Set default values for variables. */ + do_help = 0; + do_version = 0; + output_file = NULL; + input_file = NULL; + + while ((opt = getopt_long (argc, argv, "D:eEFhio:st:Vw:", long_options, NULL)) + != EOF) + switch (opt) + { + case '\0': /* Long option. */ + break; + + case 'D': + dir_list_append (optarg); + break; + + case 'e': + message_print_style_escape (0); + break; + + case 'E': + message_print_style_escape (1); + break; + + case 'F': + sort_by_filepos = 1; + break; + + case 'h': + do_help = 1; + break; + + case 'i': + message_print_style_indent (); + break; + + case 'o': + output_file = optarg; + break; + + case 's': + sort_by_msgid = 1; + break; + + case 'S': + message_print_style_uniforum (); + break; + + case 't': + to_code = optarg; + break; + + case 'V': + do_version = 1; + break; + + case 'w': + { + int value; + char *endp; + value = strtol (optarg, &endp, 10); + if (endp != optarg) + message_page_width_set (value); + } + break; + + default: + usage (EXIT_FAILURE); + break; + } + + /* Version information is requested. */ + if (do_version) + { + printf ("%s (GNU %s) %s\n", basename (program_name), PACKAGE, VERSION); + /* xgettext: no-wrap */ + printf (_("Copyright (C) %s Free Software Foundation, Inc.\n\ +This is free software; see the source for copying conditions. There is NO\n\ +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\ +"), + "2001"); + printf (_("Written by %s.\n"), "Bruno Haible"); + exit (EXIT_SUCCESS); + } + + /* Help is requested. */ + if (do_help) + usage (EXIT_SUCCESS); + + /* Test whether we have an .po file name as argument. */ + if (optind == argc) + input_file = "-"; + else if (optind + 1 == argc) + input_file = argv[optind]; + else + { + error (EXIT_SUCCESS, 0, _("at most one input file allowed")); + usage (EXIT_FAILURE); + } + + if (sort_by_msgid && sort_by_filepos) + error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"), + "--sort-output", "--sort-by-file"); + + /* Default for target encoding is current locale's encoding. */ + if (to_code == NULL) + to_code = locale_charset (); + + /* Read input file and convert. */ + result = iconv_msgdomain_list (read_po_file (input_file), to_code); + + /* Sort the results. */ + if (sort_by_filepos) + msgdomain_list_sort_by_filepos (result); + else if (sort_by_msgid) + msgdomain_list_sort_by_msgid (result); + + /* Write the merged message list out. */ + msgdomain_list_print (result, output_file, force_po, 0); + + exit (EXIT_SUCCESS); +} + + +/* Display usage information and exit. */ +static void +usage (status) + int status; +{ + if (status != EXIT_SUCCESS) + fprintf (stderr, _("Try `%s --help' for more information.\n"), + program_name); + else + { + /* xgettext: no-wrap */ + printf (_("\ +Usage: %s [OPTION] [INPUTFILE]\n\ +"), program_name); + printf ("\n"); + /* xgettext: no-wrap */ + printf (_("\ +Converts a translation catalog to a different character encoding.\n\ +")); + printf ("\n"); + /* xgettext: no-wrap */ + printf (_("\ +Mandatory arguments to long options are mandatory for short options too.\n\ +")); + printf ("\n"); + /* xgettext: no-wrap */ + printf (_("\ +Input file location:\n\ + INPUTFILE input PO file\n\ + -D, --directory=DIRECTORY add DIRECTORY to list for input files search\n\ +If no input file is given or if it is -, standard input is read.\n\ +")); + printf ("\n"); + /* xgettext: no-wrap */ + printf (_("\ +Output file location:\n\ + -o, --output-file=FILE write output to specified file\n\ +The results are written to standard output if no output file is specified\n\ +or if it is -.\n\ +")); + printf ("\n"); + /* xgettext: no-wrap */ + printf (_("\ +Conversion target:\n\ + -t, --to-code=NAME encoding for output\n\ +The default encoding is the current locale's encoding.\n\ +")); + printf ("\n"); + /* xgettext: no-wrap */ + printf (_("\ +Output details:\n\ + -e, --no-escape do not use C escapes in output (default)\n\ + -E, --escape use C escapes in output, no extended chars\n\ + --force-po write PO file even if empty\n\ + -i, --indent indented output style\n\ + --no-location suppress '#: filename:line' lines\n\ + --add-location preserve '#: filename:line' lines (default)\n\ + --strict strict Uniforum output style\n\ + -w, --width=NUMBER set output page width\n\ + -s, --sort-output generate sorted output and remove duplicates\n\ + -F, --sort-by-file sort output by file location\n\ +")); + printf ("\n"); + /* xgettext: no-wrap */ + printf (_("\ +Informative output:\n\ + -h, --help display this help and exit\n\ + -V, --version output version information and exit\n\ +")); + printf ("\n"); + fputs (_("Report bugs to .\n"), + stdout); + } + + exit (status); +} diff --git a/src/msgen.c b/src/msgen.c new file mode 100644 index 000000000..75670b778 --- /dev/null +++ b/src/msgen.c @@ -0,0 +1,327 @@ +/* Creates an English translation catalog. + Copyright (C) 2001 Free Software Foundation, Inc. + Written by Bruno Haible , 2001. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include +#include +#include +#include + +#include "dir-list.h" +#include "error.h" +#include "progname.h" +#include "message.h" +#include "read-po.h" +#include "write-po.h" +#include "system.h" +#include "libgettext.h" + +#define _(str) gettext (str) + + +/* Force output of PO file even if empty. */ +static int force_po; + +/* Long options. */ +static const struct option long_options[] = +{ + { "add-location", no_argument, &line_comment, 1 }, + { "directory", required_argument, NULL, 'D' }, + { "escape", no_argument, NULL, 'E' }, + { "force-po", no_argument, &force_po, 1 }, + { "help", no_argument, NULL, 'h' }, + { "indent", no_argument, NULL, 'i' }, + { "no-escape", no_argument, NULL, 'e' }, + { "no-location", no_argument, &line_comment, 0 }, + { "output-file", required_argument, NULL, 'o' }, + { "sort-by-file", no_argument, NULL, 'F' }, + { "sort-output", no_argument, NULL, 's' }, + { "strict", no_argument, NULL, 'S' }, + { "version", no_argument, NULL, 'V' }, + { "width", required_argument, NULL, 'w', }, + { NULL, 0, NULL, 0 } +}; + + +/* Prototypes for local functions. */ +static void usage PARAMS ((int status)); +static msgdomain_list_ty *english PARAMS ((msgdomain_list_ty *mdlp)); + + +int +main (argc, argv) + int argc; + char **argv; +{ + int opt; + int do_help; + int do_version; + char *output_file; + msgdomain_list_ty *result; + int sort_by_filepos = 0; + int sort_by_msgid = 0; + + /* Set program name for messages. */ + program_name = argv[0]; + error_print_progname = maybe_print_progname; + +#ifdef HAVE_SETLOCALE + /* Set locale via LC_ALL. */ + setlocale (LC_ALL, ""); +#endif + + /* Set the text message domain. */ + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + /* Set default values for variables. */ + do_help = 0; + do_version = 0; + output_file = NULL; + + while ((opt = getopt_long (argc, argv, "D:eEFhio:sVw:", long_options, NULL)) + != EOF) + switch (opt) + { + case '\0': /* Long option. */ + break; + + case 'D': + dir_list_append (optarg); + break; + + case 'e': + message_print_style_escape (0); + break; + + case 'E': + message_print_style_escape (1); + break; + + case 'F': + sort_by_filepos = 1; + break; + + case 'h': + do_help = 1; + break; + + case 'i': + message_print_style_indent (); + break; + + case 'o': + output_file = optarg; + break; + + case 's': + sort_by_msgid = 1; + break; + + case 'S': + message_print_style_uniforum (); + break; + + case 'V': + do_version = 1; + break; + + case 'w': + { + int value; + char *endp; + value = strtol (optarg, &endp, 10); + if (endp != optarg) + message_page_width_set (value); + } + break; + + default: + usage (EXIT_FAILURE); + break; + } + + /* Version information is requested. */ + if (do_version) + { + printf ("%s (GNU %s) %s\n", basename (program_name), PACKAGE, VERSION); + /* xgettext: no-wrap */ + printf (_("Copyright (C) %s Free Software Foundation, Inc.\n\ +This is free software; see the source for copying conditions. There is NO\n\ +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\ +"), + "2001"); + printf (_("Written by %s.\n"), "Bruno Haible"); + exit (EXIT_SUCCESS); + } + + /* Help is requested. */ + if (do_help) + usage (EXIT_SUCCESS); + + /* Test whether we have an .po file name as argument. */ + if (optind >= argc) + { + error (EXIT_SUCCESS, 0, _("no input file given")); + usage (EXIT_FAILURE); + } + if (optind + 1 != argc) + { + error (EXIT_SUCCESS, 0, _("exactly one input file required")); + usage (EXIT_FAILURE); + } + + if (sort_by_msgid && sort_by_filepos) + error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"), + "--sort-output", "--sort-by-file"); + + /* Read input file and add English translations. */ + result = english (read_po_file (argv[optind])); + + /* Sort the results. */ + if (sort_by_filepos) + msgdomain_list_sort_by_filepos (result); + else if (sort_by_msgid) + msgdomain_list_sort_by_msgid (result); + + /* Write the merged message list out. */ + msgdomain_list_print (result, output_file, force_po, 0); + + exit (EXIT_SUCCESS); +} + + +/* Display usage information and exit. */ +static void +usage (status) + int status; +{ + if (status != EXIT_SUCCESS) + fprintf (stderr, _("Try `%s --help' for more information.\n"), + program_name); + else + { + /* xgettext: no-wrap */ + printf (_("\ +Usage: %s [OPTION] INPUTFILE\n\ +"), program_name); + printf ("\n"); + /* xgettext: no-wrap */ + printf (_("\ +Creates an English translation catalog. The input file is the last\n\ +created English PO file, or a PO Template file (generally created by\n\ +xgettext). Untranslated entries are assigned a translation that is\n\ +identical to the msgid, and are marked fuzzy.\n\ +")); + printf ("\n"); + /* xgettext: no-wrap */ + printf (_("\ +Mandatory arguments to long options are mandatory for short options too.\n\ +")); + printf ("\n"); + /* xgettext: no-wrap */ + printf (_("\ +Input file location:\n\ + INPUTFILE input PO or POT file\n\ + -D, --directory=DIRECTORY add DIRECTORY to list for input files search\n\ +If input file is -, standard input is read.\n\ +")); + printf ("\n"); + /* xgettext: no-wrap */ + printf (_("\ +Output file location:\n\ + -o, --output-file=FILE write output to specified file\n\ +The results are written to standard output if no output file is specified\n\ +or if it is -.\n\ +")); + printf ("\n"); + /* xgettext: no-wrap */ + printf (_("\ +Output details:\n\ + -e, --no-escape do not use C escapes in output (default)\n\ + -E, --escape use C escapes in output, no extended chars\n\ + --force-po write PO file even if empty\n\ + -i, --indent indented output style\n\ + --no-location suppress '#: filename:line' lines\n\ + --add-location preserve '#: filename:line' lines (default)\n\ + --strict strict Uniforum output style\n\ + -w, --width=NUMBER set output page width\n\ + -s, --sort-output generate sorted output and remove duplicates\n\ + -F, --sort-by-file sort output by file location\n\ +")); + printf ("\n"); + /* xgettext: no-wrap */ + printf (_("\ +Informative output:\n\ + -h, --help display this help and exit\n\ + -V, --version output version information and exit\n\ +")); + printf ("\n"); + fputs (_("Report bugs to .\n"), + stdout); + } + + exit (status); +} + + +static msgdomain_list_ty * +english (mdlp) + msgdomain_list_ty *mdlp; +{ + size_t j, k; + + for (k = 0; k < mdlp->nitems; k++) + { + message_list_ty *mlp = mdlp->item[k]->messages; + + for (j = 0; j < mlp->nitems; j++) + { + message_ty *mp = mlp->item[j]; + + if (mp->msgid_plural == NULL) + { + if (mp->msgstr_len == 1 && mp->msgstr[0] == '\0') + { + mp->msgstr = mp->msgid; /* no need for xstrdup */ + mp->msgstr_len = strlen (mp->msgid) + 1; + } + } + else + { + if (mp->msgstr_len == 2 + && mp->msgstr[0] == '\0' && mp->msgstr[1] == '\0') + { + size_t len0 = strlen (mp->msgid) + 1; + size_t len1 = strlen (mp->msgid_plural) + 1; + char *cp = (char *) xmalloc (len0 + len1); + memcpy (cp, mp->msgid, len0); + memcpy (cp + len0, mp->msgid_plural, len1); + mp->msgstr = cp; + mp->msgstr_len = len0 + len1; + } + } + } + } + + return mdlp; +} diff --git a/src/msggrep.c b/src/msggrep.c new file mode 100644 index 000000000..576c42300 --- /dev/null +++ b/src/msggrep.c @@ -0,0 +1,579 @@ +/* Extract some translations of a translation catalog. + Copyright (C) 2001 Free Software Foundation, Inc. + Written by Bruno Haible , 2001. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include +#include +#include +#include +#include +#include +#include + +#ifdef HAVE_UNISTD_H +# include +#endif + +#ifdef HAVE_LIMITS_H +# include +#endif + +#include "dir-list.h" +#include "error.h" +#include "progname.h" +#include "message.h" +#include "read-po.h" +#include "write-po.h" +#include "str-list.h" +#include "msgl-charset.h" +#include "system.h" +#include "full-write.h" +#include "findprog.h" +#include "pipe.h" +#include "wait-process.h" +#include "libgettext.h" + +#define _(str) gettext (str) + +/* Force output of PO file even if empty. */ +static int force_po; + +/* Selected source files. */ +static string_list_ty *location_files; + +/* Selected domain names. */ +static string_list_ty *domain_names; + +/* Arguments to be passed to the grep subprocesses. */ +static string_list_ty *grep_args[2]; + +/* Pathname of the grep program. */ +static const char *grep_path; + +/* Argument lists for the grep program. */ +static char **grep_argv[2]; + +/* Long options. */ +static const struct option long_options[] = +{ + { "add-location", no_argument, &line_comment, 1 }, + { "directory", required_argument, NULL, 'D' }, + { "domain", required_argument, NULL, 'M' }, + { "escape", no_argument, NULL, CHAR_MAX + 1 }, + { "extended-regexp", no_argument, NULL, 'E' }, + { "file", required_argument, NULL, 'f' }, + { "fixed-strings", no_argument, NULL, 'F' }, + { "force-po", no_argument, &force_po, 1 }, + { "help", no_argument, NULL, 'h' }, + { "ignore-case", no_argument, NULL, 'i' }, + { "indent", no_argument, NULL, CHAR_MAX + 2 }, + { "location", required_argument, NULL, 'N' }, + { "msgid", no_argument, NULL, 'K' }, + { "msgstr", no_argument, NULL, 'T' }, + { "no-escape", no_argument, NULL, CHAR_MAX + 3 }, + { "no-location", no_argument, &line_comment, 0 }, + { "output-file", required_argument, NULL, 'o' }, + { "regexp", required_argument, NULL, 'e' }, + { "sort-by-file", no_argument, NULL, CHAR_MAX + 4 }, + { "sort-output", no_argument, NULL, CHAR_MAX + 5 }, + { "strict", no_argument, NULL, 'S' }, + { "version", no_argument, NULL, 'V' }, + { "width", required_argument, NULL, 'w' }, + { NULL, 0, NULL, 0 } +}; + + +/* Prototypes for local functions. */ +static void no_pass PARAMS ((int opt)); +static void usage PARAMS ((int status)); +#ifdef EINTR +static inline int nonintr_close PARAMS ((int fd)); +#endif +static int is_string_selected PARAMS ((int grep_pass, const char *str, + size_t len)); +static int is_message_selected PARAMS ((const message_ty *mp)); +static void process_message_list PARAMS ((const char *domain, + message_list_ty *mlp)); +static msgdomain_list_ty * + process_msgdomain_list PARAMS ((msgdomain_list_ty *mdlp)); + + +int +main (argc, argv) + int argc; + char **argv; +{ + int opt; + int do_help; + int do_version; + char *output_file; + const char *input_file; + int grep_pass; + msgdomain_list_ty *result; + int sort_by_filepos = 0; + int sort_by_msgid = 0; + size_t i; + + /* Set program name for messages. */ + program_name = argv[0]; + error_print_progname = maybe_print_progname; + +#ifdef HAVE_SETLOCALE + /* Set locale via LC_ALL. */ + setlocale (LC_ALL, ""); +#endif + + /* Set the text message domain. */ + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + /* Set default values for variables. */ + do_help = 0; + do_version = 0; + output_file = NULL; + input_file = NULL; + grep_pass = -1; + location_files = string_list_alloc (); + domain_names = string_list_alloc (); + grep_args[0] = string_list_alloc (); + grep_args[1] = string_list_alloc (); + + while ((opt = getopt_long (argc, argv, "D:e:Ef:FhiKM:N:o:TVw:", + long_options, NULL)) + != EOF) + switch (opt) + { + case '\0': /* Long option. */ + break; + + case 'D': + dir_list_append (optarg); + break; + + case 'e': + if (grep_pass < 0) + no_pass (opt); + string_list_append (grep_args[grep_pass], "-e"); + string_list_append (grep_args[grep_pass], optarg); + break; + + case 'E': + if (grep_pass < 0) + no_pass (opt); + string_list_append (grep_args[grep_pass], "-E"); + break; + + case 'f': + if (grep_pass < 0) + no_pass (opt); + string_list_append (grep_args[grep_pass], "-f"); + string_list_append (grep_args[grep_pass], optarg); + break; + + case 'F': + if (grep_pass < 0) + no_pass (opt); + string_list_append (grep_args[grep_pass], "-F"); + break; + + case 'h': + do_help = 1; + break; + + case 'i': + if (grep_pass < 0) + no_pass (opt); + string_list_append (grep_args[grep_pass], "-i"); + break; + + case 'K': + grep_pass = 0; + break; + + case 'M': + string_list_append (domain_names, optarg); + break; + + case 'N': + string_list_append (location_files, optarg); + break; + + case 'o': + output_file = optarg; + break; + + case 'S': + message_print_style_uniforum (); + break; + + case 'T': + grep_pass = 1; + break; + + case 'V': + do_version = 1; + break; + + case 'w': + { + int value; + char *endp; + value = strtol (optarg, &endp, 10); + if (endp != optarg) + message_page_width_set (value); + } + break; + + case CHAR_MAX + 1: + message_print_style_escape (1); + break; + + case CHAR_MAX + 2: + message_print_style_indent (); + break; + + case CHAR_MAX + 3: + message_print_style_escape (0); + break; + + case CHAR_MAX + 4: + sort_by_filepos = 1; + break; + + case CHAR_MAX + 5: + sort_by_msgid = 1; + break; + + default: + usage (EXIT_FAILURE); + break; + } + + /* Version information is requested. */ + if (do_version) + { + printf ("%s (GNU %s) %s\n", basename (program_name), PACKAGE, VERSION); + /* xgettext: no-wrap */ + printf (_("Copyright (C) %s Free Software Foundation, Inc.\n\ +This is free software; see the source for copying conditions. There is NO\n\ +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\ +"), + "2001"); + printf (_("Written by %s.\n"), "Bruno Haible"); + exit (EXIT_SUCCESS); + } + + /* Help is requested. */ + if (do_help) + usage (EXIT_SUCCESS); + + /* Test whether we have an .po file name as argument. */ + if (optind == argc) + input_file = "-"; + else if (optind + 1 == argc) + input_file = argv[optind]; + else + { + error (EXIT_SUCCESS, 0, _("at most one input file allowed")); + usage (EXIT_FAILURE); + } + + if (sort_by_msgid && sort_by_filepos) + error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"), + "--sort-output", "--sort-by-file"); + + /* Read input file. */ + result = read_po_file (input_file); + + if (grep_args[0]->nitems > 0 || grep_args[1]->nitems > 0) + { + /* Warn if the current locale is not suitable for this PO file. */ + compare_po_locale_charsets (result); + + /* Attempt to locate the 'grep' program. + This is an optimization, to avoid that spawn/exec searches the PATH + on every call. */ + grep_path = find_in_path ("grep"); + } + + /* Build argument lists for the 'grep' program. */ + for (grep_pass = 0; grep_pass < 2; grep_pass++) + if (grep_args[grep_pass]->nitems > 0) + { + string_list_ty *args = grep_args[grep_pass]; + + grep_argv[grep_pass] = + (char **) xmalloc ((2 + args->nitems + 1) * sizeof (char *)); + grep_argv[grep_pass][0] = (char *) grep_path; + grep_argv[grep_pass][1] = "-q"; + for (i = 2; i <= args->nitems + 1; i++) + grep_argv[grep_pass][i] = (char *) args->item[i - 2]; + grep_argv[grep_pass][i] = NULL; + } + + /* Select the messages. */ + result = process_msgdomain_list (result); + + /* Sort the results. */ + if (sort_by_filepos) + msgdomain_list_sort_by_filepos (result); + else if (sort_by_msgid) + msgdomain_list_sort_by_msgid (result); + + /* Write the merged message list out. */ + msgdomain_list_print (result, output_file, force_po, 0); + + exit (EXIT_SUCCESS); +} + + +static void +no_pass (opt) + int opt; +{ + error (EXIT_SUCCESS, 0, + _("option '%c' cannot be used before 'K' or 'T' has been specified"), + opt); + usage (EXIT_FAILURE); +} + + +/* Display usage information and exit. */ +static void +usage (status) + int status; +{ + if (status != EXIT_SUCCESS) + fprintf (stderr, _("Try `%s --help' for more information.\n"), + program_name); + else + { + /* xgettext: no-wrap */ + printf (_("\ +Usage: %s [OPTION] [INPUTFILE]\n\ +"), program_name); + printf ("\n"); + /* xgettext: no-wrap */ + printf (_("\ +Extracts all messages of a translation catalog that match a given pattern\n\ +or belong to some given source files.\n\ +")); + printf ("\n"); + /* xgettext: no-wrap */ + printf (_("\ +Mandatory arguments to long options are mandatory for short options too.\n\ +")); + printf ("\n"); + /* xgettext: no-wrap */ + printf (_("\ +Input file location:\n\ + INPUTFILE input PO file\n\ + -D, --directory=DIRECTORY add DIRECTORY to list for input files search\n\ +If no input file is given or if it is -, standard input is read.\n\ +")); + printf ("\n"); + /* xgettext: no-wrap */ + printf (_("\ +Output file location:\n\ + -o, --output-file=FILE write output to specified file\n\ +The results are written to standard output if no output file is specified\n\ +or if it is -.\n\ +")); + printf ("\n"); + /* xgettext: no-wrap */ + printf (_("\ +Message selection:\n\ + [-N SOURCEFILE]... [-M DOMAINNAME]... [-K MSGID-PATTERN] [-T MSGSTR-PATTERN]\n\ +A message is selected if it comes from one of the specified source files,\n\ +or if it comes from one of the specified domains,\n\ +or if -K is given and its key (msgid or msgid_plural) matches MSGID-PATTERN,\n\ +or if -T is given and its translation (msgstr) matches MSGSTR-PATTERN.\n\ +PATTERNs are basic regular expressions by default, or extended regular\n\ +expressions if -E is given, or fixed strings if -F is given.\n\ + -N, --location=SOURCEFILE select messages extracted from SOURCEFILE\n\ + -M, --domain=DOMAINNAME select messages belonging to domain DOMAINNAME\n\ + -K, --msgid start of patterns for the msgid\n\ + -T, --msgstr start of patterns for the msgstr\n\ + -E, --extended-regexp PATTERN is an extended regular expression\n\ + -F, --fixed-strings PATTERN is a set of newline-separated strings\n\ + -e, --regexp=PATTERN use PATTERN as a regular expression\n\ + -f, --file=FILE obtain PATTERN from FILE\n\ + -i, --ignore-case ignore case distinctions\n\ +")); + printf ("\n"); + /* xgettext: no-wrap */ + printf (_("\ +Output details:\n\ + --no-escape do not use C escapes in output (default)\n\ + --escape use C escapes in output, no extended chars\n\ + --force-po write PO file even if empty\n\ + --indent indented output style\n\ + --no-location suppress '#: filename:line' lines\n\ + --add-location preserve '#: filename:line' lines (default)\n\ + --strict strict Uniforum output style\n\ + -w, --width=NUMBER set output page width\n\ + --sort-output generate sorted output and remove duplicates\n\ + --sort-by-file sort output by file location\n\ +")); + printf ("\n"); + /* xgettext: no-wrap */ + printf (_("\ +Informative output:\n\ + -h, --help display this help and exit\n\ + -V, --version output version information and exit\n\ +")); + printf ("\n"); + fputs (_("Report bugs to .\n"), + stdout); + } + + exit (status); +} + + +#ifdef EINTR + +/* EINTR handling for close(). + These functions can return -1/EINTR even though we don't have any + signal handlers set up, namely when we get interrupted via SIGSTOP. */ + +static inline int +nonintr_close (fd) + int fd; +{ + int retval; + + do + retval = close (fd); + while (retval < 0 && errno == EINTR); + + return retval; +} +#define close nonintr_close + +#endif + + +/* Process a string STR of size LEN bytes through grep, and return nonzero + if it matches. */ +static int +is_string_selected (grep_pass, str, len) + int grep_pass; + const char *str; + size_t len; +{ + if (grep_args[grep_pass]->nitems > 0) + { + pid_t child; + int fd[1]; + ssize_t nwritten; + int exitstatus; + + /* Open a pipe to a grep subprocess. */ + child = create_pipe_out ("grep", grep_path, grep_argv[grep_pass], + "/dev/null", fd); + + nwritten = full_write (fd[0], str, len); + if (nwritten != (ssize_t) len) + error (EXIT_FAILURE, errno, + _("write to grep subprocess failed")); + + close (fd[0]); + + /* Remove zombie process from process list, and retrieve exit status. */ + exitstatus = wait_subprocess (child, "grep"); + return (exitstatus == 0); + } + else + return 0; +} + + +/* Return nonzero if a message matches. */ +static int +is_message_selected (mp) + const message_ty *mp; +{ + size_t i; + const char *msgstr; + size_t msgstr_len; + const char *p; + + /* Always keep the header entry. */ + if (mp->msgid[0] == '\0') + return 1; + + /* Test whether one of mp->filepos[] is selected. */ + for (i = 0; i < mp->filepos_count; i++) + if (string_list_member (location_files, mp->filepos[i].file_name)) + return 1; + + /* Test msgid and msgid_plural using the --msgid arguments. */ + if (is_string_selected (0, mp->msgid, strlen (mp->msgid))) + return 1; + if (mp->msgid_plural != NULL + && is_string_selected (0, mp->msgid_plural, strlen (mp->msgid_plural))) + return 1; + + /* Test msgstr using the --msgstr arguments. */ + msgstr = mp->msgstr; + msgstr_len = mp->msgstr_len; + /* Process each NUL delimited substring separately. */ + for (p = msgstr; p < msgstr + msgstr_len; ) + { + size_t length = strlen (p); + + if (is_string_selected (1, p, length)) + return 1; + + p += length + 1; + } + + return 0; +} + + +static void +process_message_list (domain, mlp) + const char *domain; + message_list_ty *mlp; +{ + if (string_list_member (domain_names, domain)) + /* Keep all the messages in the list. */ + ; + else + /* Keep only the selected messages. */ + message_list_remove_if_not (mlp, is_message_selected); +} + + +static msgdomain_list_ty * +process_msgdomain_list (mdlp) + msgdomain_list_ty *mdlp; +{ + size_t k; + + for (k = 0; k < mdlp->nitems; k++) + process_message_list (mdlp->item[k]->domain, mdlp->item[k]->messages); + + return mdlp; +} diff --git a/src/msgl-charset.c b/src/msgl-charset.c new file mode 100644 index 000000000..ccc7c840e --- /dev/null +++ b/src/msgl-charset.c @@ -0,0 +1,128 @@ +/* Message list charset and locale charset handling. + Copyright (C) 2001 Free Software Foundation, Inc. + Written by Bruno Haible , 2001. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include +#include + +#include "po-charset.h" +#include "msgl-charset.h" +#include "error.h" +#include "progname.h" +#include "xerror.h" +#include "message.h" +#include "system.h" +#include "libgettext.h" + +#define _(str) gettext (str) + +extern const char * locale_charset PARAMS ((void)); + +void +compare_po_locale_charsets (mdlp) + const msgdomain_list_ty *mdlp; +{ + const char *locale_code; + const char *canon_locale_code; + int warned; + size_t j, k; + + /* Check whether the locale encoding and the PO file's encoding are the + same. Otherwise emit a warning. */ + locale_code = locale_charset (); + canon_locale_code = po_charset_canonicalize (locale_code); + warned = 0; + for (k = 0; k < mdlp->nitems; k++) + { + const message_list_ty *mlp = mdlp->item[k]->messages; + + for (j = 0; j < mlp->nitems; j++) + if (mlp->item[j]->msgid[0] == '\0' && mlp->item[j]->obsolete == 0) + { + const char *header = mlp->item[j]->msgstr; + + if (header != NULL) + { + const char *charsetstr = strstr (header, "charset="); + + if (charsetstr != NULL) + { + size_t len; + char *charset; + const char *canon_charset; + + charsetstr += strlen ("charset="); + len = strcspn (charsetstr, " \t\n"); + charset = (char *) alloca (len + 1); + memcpy (charset, charsetstr, len); + charset[len] = '\0'; + + canon_charset = po_charset_canonicalize (charset); + if (canon_charset == NULL) + error (EXIT_FAILURE, 0, + _("\ +present charset \"%s\" is not a portable encoding name"), + charset); + if (canon_locale_code != canon_charset) + { + multiline_warning (xasprintf (_("warning: ")), + xasprintf (_("\ +Locale charset \"%s\" is different from\n\ +input file charset \"%s\".\n\ +Output of '%s' might be incorrect.\n\ +Possible workarounds are:\n\ +"), locale_code, canon_charset, basename (program_name))); + multiline_warning (NULL, + xasprintf (_("\ +- Set LC_ALL to a locale with encoding %s.\n\ +"), canon_charset)); + if (canon_locale_code != NULL) + multiline_warning (NULL, + xasprintf (_("\ +- Convert the translation catalog to %s using 'msgconv',\n\ + then apply '%s',\n\ + then convert back to %s using 'msgconv'.\n\ +"), canon_locale_code, basename (program_name), canon_charset)); + if (strcmp (canon_charset, "UTF-8") != 0 + && (canon_locale_code == NULL + || strcmp (canon_locale_code, "UTF-8") != 0)) + multiline_warning (NULL, + xasprintf (_("\ +- Set LC_ALL to a locale with encoding %s,\n\ + convert the translation catalog to %s using 'msgconv',\n\ + then apply '%s',\n\ + then convert back to %s using 'msgconv'.\n\ +"), "UTF-8", "UTF-8", basename (program_name), canon_charset)); + warned = 1; + } + } + } + } + } + if (canon_locale_code == NULL && !warned) + multiline_warning (xasprintf (_("warning: ")), + xasprintf (_("\ +Locale charset \"%s\" is not a portable encoding name.\n\ +Output of '%s' might be incorrect.\n\ +A possible workaround is to set LC_ALL=C.\n\ +"), locale_code, basename (program_name))); +} diff --git a/src/msgl-charset.h b/src/msgl-charset.h new file mode 100644 index 000000000..6c29df23c --- /dev/null +++ b/src/msgl-charset.h @@ -0,0 +1,27 @@ +/* Message list charset and locale charset handling. + Copyright (C) 2001 Free Software Foundation, Inc. + Written by Bruno Haible , 2001. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + +#ifndef _MSGL_CHARSET_H +#define _MSGL_CHARSET_H + +#include "message.h" + +extern void + compare_po_locale_charsets PARAMS ((const msgdomain_list_ty *mdlp)); + +#endif /* _MSGL_CHARSET_H */ diff --git a/src/msgl-iconv.c b/src/msgl-iconv.c new file mode 100644 index 000000000..4ac759e6c --- /dev/null +++ b/src/msgl-iconv.c @@ -0,0 +1,377 @@ +/* Message list charset and locale charset handling. + Copyright (C) 2001 Free Software Foundation, Inc. + Written by Bruno Haible , 2001. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include +#include + +#if HAVE_ICONV +# include +#endif + +#include "msgl-iconv.h" +#include "error.h" +#include "progname.h" +#include "message.h" +#include "po-charset.h" +#include "system.h" +#include "libgettext.h" + +#define _(str) gettext (str) + + +/* Prototypes for local functions. */ +#if HAVE_ICONV +static int iconv_string PARAMS ((iconv_t cd, + const char *start, const char *end, + char **resultp, size_t *lengthp)); +static const char *convert_string PARAMS ((iconv_t cd, const char *string)); +static void convert_string_list PARAMS ((iconv_t cd, string_list_ty *slp)); +static void convert_msgstr PARAMS ((iconv_t cd, message_ty *mp)); +#endif + + +#if HAVE_ICONV + +/* Converts an entire string from one encoding to another, using iconv. + Return value: 0 if successful, otherwise -1 and errno set. */ +static int +iconv_string (cd, start, end, resultp, lengthp) + iconv_t cd; + const char *start; + const char *end; + char **resultp; + size_t *lengthp; +{ +#define tmpbufsize 4096 + size_t length; + char *result; + + /* Avoid glibc-2.1 bug and Solaris 2.7 bug. */ +# if defined _LIBICONV_VERSION \ + || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun) + /* Set to the initial state. */ + iconv (cd, NULL, NULL, NULL, NULL); +# endif + + /* Determine the length we need. */ + { + size_t count = 0; + char tmpbuf[tmpbufsize]; + const char *inptr = start; + size_t insize = end - start; + + while (insize > 0) + { + char *outptr = tmpbuf; + size_t outsize = tmpbufsize; + size_t res = iconv (cd, + (ICONV_CONST char **) &inptr, &insize, + &outptr, &outsize); + + if (res == (size_t)(-1)) + { + if (errno == EINVAL) + break; + else + return -1; + } +# if !defined _LIBICONV_VERSION && (defined sgi || defined __sgi) + /* Irix iconv() inserts a NUL byte if it cannot convert. */ + else if (res > 0) + return -1; +# endif + count += outptr - tmpbuf; + } + /* Avoid glibc-2.1 bug and Solaris 2.7 bug. */ +# if defined _LIBICONV_VERSION \ + || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun) + { + char *outptr = tmpbuf; + size_t outsize = tmpbufsize; + size_t res = iconv (cd, NULL, NULL, &outptr, &outsize); + + if (res == (size_t)(-1)) + return -1; + count += outptr - tmpbuf; + } +# endif + length = count; + } + + *lengthp = length; + *resultp = result = xrealloc (*resultp, length); + if (length == 0) + return 0; + + /* Avoid glibc-2.1 bug and Solaris 2.7 bug. */ +# if defined _LIBICONV_VERSION \ + || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun) + /* Return to the initial state. */ + iconv (cd, NULL, NULL, NULL, NULL); +# endif + + /* Do the conversion for real. */ + { + const char *inptr = start; + size_t insize = end - start; + char *outptr = result; + size_t outsize = length; + + while (insize > 0) + { + size_t res = iconv (cd, + (ICONV_CONST char **) &inptr, &insize, + &outptr, &outsize); + + if (res == (size_t)(-1)) + { + if (errno == EINVAL) + break; + else + return -1; + } +# if !defined _LIBICONV_VERSION && (defined sgi || defined __sgi) + /* Irix iconv() inserts a NUL byte if it cannot convert. */ + else if (res > 0) + return -1; +# endif + } + /* Avoid glibc-2.1 bug and Solaris 2.7 bug. */ +# if defined _LIBICONV_VERSION \ + || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun) + { + size_t res = iconv (cd, NULL, NULL, &outptr, &outsize); + + if (res == (size_t)(-1)) + return -1; + } +# endif + if (outsize != 0) + abort (); + } + + return 0; +#undef tmpbufsize +} + +static const char * +convert_string (cd, string) + iconv_t cd; + const char *string; +{ + size_t len = strlen (string) + 1; + char *result = NULL; + size_t resultlen; + + if (iconv_string (cd, string, string + len, &result, &resultlen) == 0) + /* Verify the result has exactly one NUL byte, at the end. */ + if (resultlen > 0 && result[resultlen - 1] == '\0' + && strlen (result) == resultlen - 1) + return result; + + error (EXIT_FAILURE, 0, _("conversion failure")); + /* NOTREACHED */ + return NULL; +} + +static void +convert_string_list (cd, slp) + iconv_t cd; + string_list_ty *slp; +{ + size_t i; + + if (slp != NULL) + for (i = 0; i < slp->nitems; i++) + slp->item[i] = convert_string (cd, slp->item[i]); +} + +static void +convert_msgstr (cd, mp) + iconv_t cd; + message_ty *mp; +{ + char *result = NULL; + size_t resultlen; + + if (!(mp->msgstr_len > 0 && mp->msgstr[mp->msgstr_len - 1] == '\0')) + abort (); + + if (iconv_string (cd, mp->msgstr, mp->msgstr + mp->msgstr_len, + &result, &resultlen) == 0) + /* Verify the result has a NUL byte at the end. */ + if (resultlen > 0 && result[resultlen - 1] == '\0') + /* Verify the result has the same number of NUL bytes. */ + { + const char *p; + const char *pend; + int nulcount1; + int nulcount2; + + for (p = mp->msgstr, pend = p + mp->msgstr_len, nulcount1 = 0; + p < pend; + p += strlen (p) + 1, nulcount1++); + for (p = result, pend = p + resultlen, nulcount2 = 0; + p < pend; + p += strlen (p) + 1, nulcount2++); + + if (nulcount1 == nulcount2) + { + mp->msgstr = result; + mp->msgstr_len = resultlen; + return; + } + } + + error (EXIT_FAILURE, 0, _("conversion failure")); +} + +#endif + + +void +iconv_message_list (mlp, canon_to_code) + message_list_ty *mlp; + const char *canon_to_code; +{ + const char *canon_from_code; + size_t j; + + /* If the list is empty, nothing to do. */ + if (mlp->nitems == 0) + return; + + /* Search the header entry, and extract and replace the charset name. */ + canon_from_code = NULL; + for (j = 0; j < mlp->nitems; j++) + if (mlp->item[j]->msgid[0] == '\0' && mlp->item[j]->obsolete == 0) + { + const char *header = mlp->item[j]->msgstr; + + if (header != NULL) + { + const char *charsetstr = strstr (header, "charset="); + + if (charsetstr != NULL) + { + size_t len; + char *charset; + const char *canon_charset; + size_t len1, len2, len3; + char *new_header; + + charsetstr += strlen ("charset="); + len = strcspn (charsetstr, " \t\n"); + charset = (char *) alloca (len + 1); + memcpy (charset, charsetstr, len); + charset[len] = '\0'; + + canon_charset = po_charset_canonicalize (charset); + if (canon_charset == NULL) + error (EXIT_FAILURE, 0, + _("\ +present charset \"%s\" is not a portable encoding name"), + charset); + + if (canon_from_code == NULL) + canon_from_code = canon_charset; + else if (canon_from_code != canon_charset) + error (EXIT_FAILURE, 0, + _("\ +two different charsets \"%s\" and \"%s\" in input file"), + canon_from_code, canon_charset); + + len1 = charsetstr - header; + len2 = strlen (canon_to_code); + len3 = (header + strlen (header)) - (charsetstr + len); + new_header = (char *) xmalloc (len1 + len2 + len3 + 1); + memcpy (new_header, header, len1); + memcpy (new_header + len1, canon_to_code, len2); + memcpy (new_header + len1 + len2, charsetstr + len, len3 + 1); + mlp->item[j]->msgstr = new_header; + mlp->item[j]->msgstr_len = len1 + len2 + len3 + 1; + } + } + } + if (canon_from_code == NULL) + error (EXIT_FAILURE, 0, _("\ +input file doesn't contain a header entry with a charset specification")); + + /* If the two encodings are the same, nothing to do. */ + if (canon_from_code != canon_to_code) + { +#if HAVE_ICONV + iconv_t cd; + + /* Avoid glibc-2.1 bug with EUC-KR. */ +# if (__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) && !defined _LIBICONV_VERSION + if (strcmp (canon_from_code, "EUC-KR") == 0) + cd = (iconv_t)(-1); + else +# endif + cd = iconv_open (canon_to_code, canon_from_code); + if (cd == (iconv_t)(-1)) + error (EXIT_FAILURE, 0, _("\ +Cannot convert from \"%s\" to \"%s\". %s relies on iconv(), \ +and iconv() does not support this conversion."), + canon_from_code, canon_to_code, basename (program_name)); + + for (j = 0; j < mlp->nitems; j++) + { + message_ty *mp = mlp->item[j]; + + convert_string_list (cd, mp->comment); + convert_string_list (cd, mp->comment_dot); + convert_msgstr (cd, mp); + } + + iconv_close (cd); +#else + error (EXIT_FAILURE, 0, _("\ +Cannot convert from \"%s\" to \"%s\". %s relies on iconv(). \ +This version was built without iconv()."), + canon_from_code, canon_to_code, basename (program_name)); +#endif + } +} + +msgdomain_list_ty * +iconv_msgdomain_list (mdlp, to_code) + msgdomain_list_ty *mdlp; + const char *to_code; +{ + const char *canon_to_code; + size_t k; + + /* Canonicalize target encoding. */ + canon_to_code = po_charset_canonicalize (to_code); + if (canon_to_code == NULL) + error (EXIT_FAILURE, 0, + _("target charset \"%s\" is not a portable encoding name."), + to_code); + + for (k = 0; k < mdlp->nitems; k++) + iconv_message_list (mdlp->item[k]->messages, canon_to_code); + + return mdlp; +} diff --git a/src/msgl-iconv.h b/src/msgl-iconv.h new file mode 100644 index 000000000..7748f3d65 --- /dev/null +++ b/src/msgl-iconv.h @@ -0,0 +1,31 @@ +/* Message list character set conversion. + Copyright (C) 2001 Free Software Foundation, Inc. + Written by Bruno Haible , 2001. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + +#ifndef _MSGL_ICONV_H +#define _MSGL_ICONV_H + +#include "message.h" + +extern void + iconv_message_list PARAMS ((message_list_ty *mlp, + const char *canon_to_code)); +extern msgdomain_list_ty * + iconv_msgdomain_list PARAMS ((msgdomain_list_ty *mdlp, + const char *to_code)); + +#endif /* _MSGL_ICONV_H */ diff --git a/src/msgsed.c b/src/msgsed.c new file mode 100644 index 000000000..fd612f8bf --- /dev/null +++ b/src/msgsed.c @@ -0,0 +1,706 @@ +/* Edit translations using a script of editing commands. + Copyright (C) 2001 Free Software Foundation, Inc. + Written by Bruno Haible , 2001. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef HAVE_UNISTD_H +# include +#endif + +#ifdef HAVE_LIMITS_H +# include +#endif + +#include "dir-list.h" +#include "error.h" +#include "progname.h" +#include "message.h" +#include "read-po.h" +#include "write-po.h" +#include "str-list.h" +#include "msgl-charset.h" +#include "system.h" +#include "findprog.h" +#include "pipe.h" +#include "wait-process.h" +#include "libgettext.h" + +#define _(str) gettext (str) + + +/* We use a child process running 'sed', and communicate through a + bidirectional pipe. To avoid deadlocks, let the child process decide + when it wants to read or to write, and let the parent behave accordingly. + The parent uses select() to know whether it must write or read. On + platforms without select(), we use non-blocking I/O. (This means the + parent is busy looping while waiting for the child. Not good.) */ + +/* On BeOS select() works only on sockets, not on normal file descriptors. */ +#ifdef __BeOS__ +# undef HAVE_SELECT +#endif + + +/* Force output of PO file even if empty. */ +static int force_po; + +/* Arguments to be passed to the sed subprocess. */ +static string_list_ty *sed_args; + +/* Pathname of the sed program. */ +static const char *sed_path; + +/* Argument list for the sed program. */ +static char **sed_argv; + +/* Long options. */ +static const struct option long_options[] = +{ + { "add-location", no_argument, &line_comment, 1 }, + { "directory", required_argument, NULL, 'D' }, + { "escape", no_argument, NULL, 'E' }, + { "expression", required_argument, NULL, 'e' }, + { "file", required_argument, NULL, 'f' }, + { "force-po", no_argument, &force_po, 1 }, + { "help", no_argument, NULL, 'h' }, + { "indent", no_argument, NULL, 'i' }, + { "no-escape", no_argument, NULL, CHAR_MAX + 1 }, + { "no-location", no_argument, &line_comment, 0 }, + { "output-file", required_argument, NULL, 'o' }, + { "quiet", no_argument, NULL, 'n' }, + { "silent", no_argument, NULL, 'n' }, + { "sort-by-file", no_argument, NULL, 'F' }, + { "sort-output", no_argument, NULL, 's' }, + { "strict", no_argument, NULL, 'S' }, + { "version", no_argument, NULL, 'V' }, + { "width", required_argument, NULL, 'w', }, + { NULL, 0, NULL, 0 } +}; + + +/* Prototypes for local functions. */ +static void usage PARAMS ((int status)); +#ifdef EINTR +static inline int nonintr_close PARAMS ((int fd)); +static inline ssize_t nonintr_read PARAMS ((int fd, void *buf, size_t count)); +static inline ssize_t nonintr_write PARAMS ((int fd, const void *buf, + size_t count)); +#endif +static void process_string PARAMS ((const char *str, size_t len, + char **resultp, size_t *lengthp)); +static void process_message PARAMS ((message_ty *mp)); +static void process_message_list PARAMS ((message_list_ty *mlp)); +static msgdomain_list_ty * + process_msgdomain_list PARAMS ((msgdomain_list_ty *mdlp)); + + +int +main (argc, argv) + int argc; + char **argv; +{ + int opt; + int do_help; + int do_version; + char *output_file; + const char *input_file; + msgdomain_list_ty *result; + int sort_by_filepos = 0; + int sort_by_msgid = 0; + size_t i; + + /* Set program name for messages. */ + program_name = argv[0]; + error_print_progname = maybe_print_progname; + +#ifdef HAVE_SETLOCALE + /* Set locale via LC_ALL. */ + setlocale (LC_ALL, ""); +#endif + + /* Set the text message domain. */ + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + /* Set default values for variables. */ + do_help = 0; + do_version = 0; + output_file = NULL; + input_file = NULL; + sed_args = string_list_alloc (); + + while ((opt = getopt_long (argc, argv, "D:e:Ef:Fhino:sVw:", + long_options, NULL)) + != EOF) + switch (opt) + { + case '\0': /* Long option. */ + break; + + case 'D': + dir_list_append (optarg); + break; + + case 'e': + string_list_append (sed_args, "-e"); + string_list_append (sed_args, optarg); + break; + + case 'E': + message_print_style_escape (1); + break; + + case 'f': + string_list_append (sed_args, "-f"); + string_list_append (sed_args, optarg); + break; + + case 'F': + sort_by_filepos = 1; + break; + + case 'h': + do_help = 1; + break; + + case 'i': + message_print_style_indent (); + break; + + case 'n': + string_list_append (sed_args, "-n"); + break; + + case 'o': + output_file = optarg; + break; + + case 's': + sort_by_msgid = 1; + break; + + case 'S': + message_print_style_uniforum (); + break; + + case 'V': + do_version = 1; + break; + + case 'w': + { + int value; + char *endp; + value = strtol (optarg, &endp, 10); + if (endp != optarg) + message_page_width_set (value); + } + break; + + case CHAR_MAX + 1: + message_print_style_escape (0); + break; + + default: + usage (EXIT_FAILURE); + break; + } + + /* Version information is requested. */ + if (do_version) + { + printf ("%s (GNU %s) %s\n", basename (program_name), PACKAGE, VERSION); + /* xgettext: no-wrap */ + printf (_("Copyright (C) %s Free Software Foundation, Inc.\n\ +This is free software; see the source for copying conditions. There is NO\n\ +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\ +"), + "2001"); + printf (_("Written by %s.\n"), "Bruno Haible"); + exit (EXIT_SUCCESS); + } + + /* Help is requested. */ + if (do_help) + usage (EXIT_SUCCESS); + + /* Test whether we have an .po file name as argument. */ + if (optind == argc) + input_file = "-"; + else if (optind + 1 == argc) + input_file = argv[optind]; + else + { + error (EXIT_SUCCESS, 0, _("at most one input file allowed")); + usage (EXIT_FAILURE); + } + + if (sed_args->nitems == 0) + error (EXIT_FAILURE, 0, _("at least one sed script must be specified")); + + if (sort_by_msgid && sort_by_filepos) + error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"), + "--sort-output", "--sort-by-file"); + + /* Read input file. */ + result = read_po_file (input_file); + + /* Warn if the current locale is not suitable for this PO file. */ + compare_po_locale_charsets (result); + + /* Attempt to locate the 'sed' program. + This is an optimization, to avoid that spawn/exec searches the PATH + on every call. */ + sed_path = find_in_path ("sed"); + + /* Build argument list for the 'sed' program. */ + sed_argv = (char **) xmalloc ((1 + sed_args->nitems + 1) * sizeof (char *)); + sed_argv[0] = (char *) sed_path; + for (i = 1; i <= sed_args->nitems; i++) + sed_argv[i] = (char *) sed_args->item[i - 1]; + sed_argv[i] = NULL; + + /* Apply the sed script. */ + result = process_msgdomain_list (result); + + /* Sort the results. */ + if (sort_by_filepos) + msgdomain_list_sort_by_filepos (result); + else if (sort_by_msgid) + msgdomain_list_sort_by_msgid (result); + + /* Write the merged message list out. */ + msgdomain_list_print (result, output_file, force_po, 0); + + exit (EXIT_SUCCESS); +} + + +/* Display usage information and exit. */ +static void +usage (status) + int status; +{ + if (status != EXIT_SUCCESS) + fprintf (stderr, _("Try `%s --help' for more information.\n"), + program_name); + else + { + /* xgettext: no-wrap */ + printf (_("\ +Usage: %s [OPTION] [INPUTFILE]\n\ +"), program_name); + printf ("\n"); + /* xgettext: no-wrap */ + printf (_("\ +Applies a sed script to all translations of a translation catalog.\n\ +")); + printf ("\n"); + /* xgettext: no-wrap */ + printf (_("\ +Mandatory arguments to long options are mandatory for short options too.\n\ +")); + printf ("\n"); + /* xgettext: no-wrap */ + printf (_("\ +Input file location:\n\ + INPUTFILE input PO file\n\ + -D, --directory=DIRECTORY add DIRECTORY to list for input files search\n\ +If no input file is given or if it is -, standard input is read.\n\ +")); + printf ("\n"); + /* xgettext: no-wrap */ + printf (_("\ +Output file location:\n\ + -o, --output-file=FILE write output to specified file\n\ +The results are written to standard output if no output file is specified\n\ +or if it is -.\n\ +")); + printf ("\n"); + /* xgettext: no-wrap */ + printf (_("\ +Sed options:\n\ + -e, --expression=SCRIPT add SCRIPT to the commands to be executed\n\ + -f, --file=SCRIPTFILE add the contents of SCRIPTFILE to the commands\n\ + to be executed\n\ + -n, --quiet, --silent suppress automatic printing of pattern space\n\ +")); + printf ("\n"); + /* xgettext: no-wrap */ + printf (_("\ +Output details:\n\ + --no-escape do not use C escapes in output (default)\n\ + -E, --escape use C escapes in output, no extended chars\n\ + --force-po write PO file even if empty\n\ + -i, --indent indented output style\n\ + --no-location suppress '#: filename:line' lines\n\ + --add-location preserve '#: filename:line' lines (default)\n\ + --strict strict Uniforum output style\n\ + -w, --width=NUMBER set output page width\n\ + -s, --sort-output generate sorted output and remove duplicates\n\ + -F, --sort-by-file sort output by file location\n\ +")); + printf ("\n"); + /* xgettext: no-wrap */ + printf (_("\ +Informative output:\n\ + -h, --help display this help and exit\n\ + -V, --version output version information and exit\n\ +")); + printf ("\n"); + fputs (_("Report bugs to .\n"), + stdout); + } + + exit (status); +} + + +#ifdef EINTR + +/* EINTR handling for close(), read(), write(), select(). + These functions can return -1/EINTR even though we don't have any + signal handlers set up, namely when we get interrupted via SIGSTOP. */ + +static inline int +nonintr_close (fd) + int fd; +{ + int retval; + + do + retval = close (fd); + while (retval < 0 && errno == EINTR); + + return retval; +} +#define close nonintr_close + +static inline ssize_t +nonintr_read (fd, buf, count) + int fd; + void *buf; + size_t count; +{ + ssize_t retval; + + do + retval = read (fd, buf, count); + while (retval < 0 && errno == EINTR); + + return retval; +} +#define read nonintr_read + +static inline ssize_t +nonintr_write (fd, buf, count) + int fd; + const void *buf; + size_t count; +{ + ssize_t retval; + + do + retval = write (fd, buf, count); + while (retval < 0 && errno == EINTR); + + return retval; +} +#define write nonintr_write + +# if HAVE_SELECT + +static inline int +nonintr_select (n, readfds, writefds, exceptfds, timeout) + int n; + fd_set *readfds; + fd_set *writefds; + fd_set *exceptfds; + struct timeval *timeout; +{ + int retval; + + do + retval = select (n, readfds, writefds, exceptfds, timeout); + while (retval < 0 && errno == EINTR); + + return retval; +} +#define select nonintr_select + +# endif + +#endif + + +/* Non-blocking I/O. */ +#ifndef O_NONBLOCK +# define O_NONBLOCK O_NDELAY +#endif +#if HAVE_SELECT +# define IS_EAGAIN(errcode) 0 +#else +# ifdef EWOULDBLOCK +# define IS_EAGAIN(errcode) ((errcode) == EAGAIN || (errcode) == EWOULDBLOCK) +# else +# define IS_EAGAIN(errcode) ((errcode) == EAGAIN) +# endif +#endif + +/* Process a string STR of size LEN bytes through sed, then remove NUL bytes. + Store the freshly allocated result at *RESULTP and its length at *LENGTHP. + */ +static void +process_string (str, len, resultp, lengthp) + const char *str; + size_t len; + char **resultp; + size_t *lengthp; +{ + pid_t child; + int fd[2]; + char *result; + size_t allocated; + size_t length; + int exitstatus; + + /* Open a bidirectional pipe to a sed subprocess. */ + child = create_pipe_bidi ("sed", sed_path, sed_argv, fd); + + /* Enable non-blocking I/O. This permits the read() and write() calls + to return -1/EAGAIN without blocking; this is important for polling + if HAVE_SELECT is not defined. It also permits the read() and write() + calls to return after partial reads/writes; this is important if + HAVE_SELECT is defined, because select() only says that some data + can be read or written, not how many. Without non-blocking I/O, + Linux 2.2.17 and BSD systems prefer to block instead of returning + with partial results. */ + { + int fcntl_flags; + + if ((fcntl_flags = fcntl (fd[1], F_GETFL, 0)) < 0 + || fcntl (fd[1], F_SETFL, fcntl_flags | O_NONBLOCK) < 0 + || (fcntl_flags = fcntl (fd[0], F_GETFL, 0)) < 0 + || fcntl (fd[0], F_SETFL, fcntl_flags | O_NONBLOCK) < 0) + error (EXIT_FAILURE, errno, + _("cannot set up nonblocking I/O to sed subprocess")); + } + + allocated = len + (len >> 2) + 1; + result = (char *) xmalloc (allocated); + length = 0; + + for (;;) + { +#if HAVE_SELECT + int n; + fd_set readfds; + fd_set writefds; + + FD_ZERO (&readfds); + FD_SET (fd[0], &readfds); + n = fd[0] + 1; + if (str != NULL) + { + FD_ZERO (&writefds); + FD_SET (fd[1], &writefds); + if (n <= fd[1]) + n = fd[1] + 1; + } + + n = select (n, &readfds, (str != NULL ? &writefds : NULL), NULL, NULL); + if (n < 0) + error (EXIT_FAILURE, errno, + _("communication with sed subprocess failed")); + if (str != NULL && FD_ISSET (fd[1], &writefds)) + goto try_write; + if (FD_ISSET (fd[0], &readfds)) + goto try_read; + /* How could select() return if none of the two descriptors is ready? */ + abort (); +#endif + + /* Attempt to write. */ +#if HAVE_SELECT + try_write: +#endif + if (str != NULL) + { + if (len > 0) + { + ssize_t nwritten = write (fd[1], str, len); + if (nwritten < 0 && !IS_EAGAIN (errno)) + error (EXIT_FAILURE, errno, + _("write to sed subprocess failed")); + if (nwritten > 0) + { + str += nwritten; + len -= nwritten; + } + } + else + { + /* Tell the child there is nothing more the parent will send. */ + close (fd[1]); + str = NULL; + } + } +#if HAVE_SELECT + continue; +#endif + + /* Attempt to read. */ +#if HAVE_SELECT + try_read: +#endif + if (length == allocated) + { + allocated = allocated + (allocated >> 1); + result = xrealloc (result, allocated); + } + { + ssize_t nread = read (fd[0], result + length, allocated - length); + if (nread < 0 && !IS_EAGAIN (errno)) + error (EXIT_FAILURE, errno, _("read from sed subprocess failed")); + if (nread > 0) + length += nread; + if (nread == 0 && str == NULL) + break; + } +#if HAVE_SELECT + continue; +#endif + } + + close (fd[0]); + + /* Remove zombie process from process list. */ + exitstatus = wait_subprocess (child, "sed"); + if (exitstatus != 0) + error (EXIT_FAILURE, 0, _("sed subprocess terminated with exit code %d"), + exitstatus); + + /* Remove NUL bytes from result. */ + { + char *p = result; + char *pend = result + length; + + for (; p < pend; p++) + if (*p == '\0') + { + char *q; + + q = p; + for (; p < pend; p++) + if (*p != '\0') + *q++ = *p; + length = q - result; + break; + } + } + + *resultp = result; + *lengthp = length; +} + + +static void +process_message (mp) + message_ty *mp; +{ + const char *msgstr = mp->msgstr; + size_t msgstr_len = mp->msgstr_len; + size_t nsubstrings; + char **substrings; + size_t total_len; + char *total_str; + const char *p; + char *q; + size_t k; + + /* Count NUL delimited substrings. */ + for (p = msgstr, nsubstrings = 0; + p < msgstr + msgstr_len; + p += strlen (p) + 1, nsubstrings++); + + /* Process each NUL delimited substring separately. */ + substrings = (char **) xmalloc (nsubstrings * sizeof (char *)); + for (p = msgstr, k = 0, total_len = 0; k < nsubstrings; k++) + { + char *result; + size_t length; + + process_string (p, strlen (p), &result, &length); + result = xrealloc (result, length + 1); + result[length] = '\0'; + substrings[k] = result; + total_len += length + 1; + } + + /* Concatenate the results, including the NUL after each. */ + total_str = (char *) xmalloc (total_len); + for (k = 0, q = total_str; k < nsubstrings; k++) + { + size_t length = strlen (substrings[k]); + + memcpy (q, substrings[k], length + 1); + free (substrings[k]); + q += length + 1; + } + free (substrings); + + mp->msgstr = total_str; + mp->msgstr_len = total_len; +} + + +static void +process_message_list (mlp) + message_list_ty *mlp; +{ + size_t j; + + for (j = 0; j < mlp->nitems; j++) + process_message (mlp->item[j]); +} + + +static msgdomain_list_ty * +process_msgdomain_list (mdlp) + msgdomain_list_ty *mdlp; +{ + size_t k; + + for (k = 0; k < mdlp->nitems; k++) + process_message_list (mdlp->item[k]->messages); + + return mdlp; +}