New program 'msguniq'.

author Bruno Haible <bruno@clisp.org>

Sun, 29 Jul 2001 14:55:31 +0000 (14:55 +0000)

committer Bruno Haible <bruno@clisp.org>

Sun, 29 Jul 2001 14:55:31 +0000 (14:55 +0000)
author Bruno Haible <bruno@clisp.org>
Sun, 29 Jul 2001 14:55:31 +0000 (14:55 +0000)
committer Bruno Haible <bruno@clisp.org>
Sun, 29 Jul 2001 14:55:31 +0000 (14:55 +0000)
diff --git a/man/ChangeLog b/man/ChangeLog

index 348239323d5215fbd1359eff9a4522813abc00cd..e0daebdec73e8bf9c476605febbe3d659f1fee85 100644 (file)
--- a/man/ChangeLog
+++ b/man/ChangeLog
@@ -1,3 +1,11 @@
+2001-07-21  Bruno Haible  <haible@clisp.cons.org>
+
+       * msguniq.x: New file.
+       * Makefile.am (man_aux): Add it.
+       (man_MAN1): Add msguniq.1.
+       (man_HTML): Add msguniq.1.html.
+       (msguniq.1.html): New rule.
+
  2001-07-12  Bruno Haible  <haible@clisp.cons.org>
  
         * msgexec.x: New file.
diff --git a/man/Makefile.am b/man/Makefile.am

index 105b14a159454e87086665e7a7d7043384eef6da..96d3e0d09c8f3ad82b4f8ce6ba4794dc2a603f73 100644 (file)
--- a/man/Makefile.am
+++ b/man/Makefile.am
@@ -28,13 +28,13 @@ AUTOMAKE_OPTIONS = 1.2 gnits
  
  man_aux  = gettext.x ngettext.x \
  msgcmp.x msgfmt.x msgmerge.x msgunfmt.x xgettext.x \
-msgcat.x msgcomm.x msgconv.x msgen.x msggrep.x msgexec.x
+msgcat.x msgcomm.x msgconv.x msgen.x msgexec.x msggrep.x msguniq.x
  
  # Likewise, plus additional manual pages for the libintl functions.
  
  man_MAN1 = gettext.1 ngettext.1 \
  msgcmp.1 msgfmt.1 msgmerge.1 msgunfmt.1 xgettext.1 \
-msgcat.1 msgcomm.1 msgconv.1 msgen.1 msggrep.1 msgexec.1
+msgcat.1 msgcomm.1 msgconv.1 msgen.1 msgexec.1 msggrep.1 msguniq.1
  man_MAN3 = gettext.3 ngettext.3 \
  textdomain.3 bindtextdomain.3 bind_textdomain_codeset.3
  man_MAN3IN = gettext.3.in ngettext.3.in \
@@ -44,7 +44,7 @@ dgettext.3 dcgettext.3 dngettext.3 dcngettext.3
  
  man_HTML = gettext.1.html ngettext.1.html \
  msgcmp.1.html msgfmt.1.html msgmerge.1.html msgunfmt.1.html xgettext.1.html \
-msgcat.1.html msgcomm.1.html msgconv.1.html msgen.1.html msggrep.1.html msgexec.1.html \
+msgcat.1.html msgcomm.1.html msgconv.1.html msgen.1.html msgexec.1.html msggrep.1.html msguniq.1.html \
  gettext.3.html ngettext.3.html \
  textdomain.3.html bindtextdomain.3.html bind_textdomain_codeset.3.html
  
@@ -148,11 +148,14 @@ msgconv.1.html: msgconv.1
  msgen.1.html: msgen.1
         $(MAN2HTML) `if test -f msgen.1; then echo .; else echo $(srcdir); fi`/msgen.1 > t-$@
         mv t-$@ $@
+msgexec.1.html: msgexec.1
+       $(MAN2HTML) `if test -f msgexec.1; then echo .; else echo $(srcdir); fi`/msgexec.1 > t-$@
+       mv t-$@ $@
  msggrep.1.html: msggrep.1
         $(MAN2HTML) `if test -f msggrep.1; then echo .; else echo $(srcdir); fi`/msggrep.1 > t-$@
         mv t-$@ $@
-msgexec.1.html: msgexec.1
-       $(MAN2HTML) `if test -f msgexec.1; then echo .; else echo $(srcdir); fi`/msgexec.1 > t-$@
+msguniq.1.html: msguniq.1
+       $(MAN2HTML) `if test -f msguniq.1; then echo .; else echo $(srcdir); fi`/msguniq.1 > t-$@
         mv t-$@ $@
  gettext.3.html: gettext.3.in
         $(MAN2HTML) $(srcdir)/gettext.3.in > t-$@
diff --git a/man/msguniq.x b/man/msguniq.x

new file mode 100644 (file)

index 0000000..79e7a46
--- /dev/null
+++ b/man/msguniq.x
@@ -0,0 +1,4 @@
+[NAME]
+msguniq \- unify duplicate translations in message catalog
+[DESCRIPTION]
+.\" Add any additional description here
diff --git a/src/ChangeLog b/src/ChangeLog

index c92ff276bfc0265e5018ffee056d82b134208765..408b3d7018565a1bcdbc48f877ce248d3b59ed6d 100644 (file)
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,24 @@
+2001-07-21  Bruno Haible  <haible@clisp.cons.org>
+
+       * msgl-cat.h: New file.
+       * msgl-cat.c: New file, extracted from msgcat.c.
+       * msgcat.c (more_than, less_than): Move to msgl-cat.c.
+       (use_first): Likewise. Change type to bool.
+       (long_options): Don't take the address of use_first.
+       (main): Initialize more_than, less_than, use_first explicitly.
+       Add --use-first handling.
+       (is_message_selected, is_message_needed, is_message_first_needed,
+       catenate_msgdomain_list): Move to msgl-cat.c.
+       * read-po.h (allow_duplicates): New declaration.
+       * read-po.c (allow_duplicates): New variable.
+       (readall_directive_message): If allow_duplicates is true, don't search
+       for the message ID, just append the message.
+       * msguniq.c: New file.
+       * Makefile.am (bin_PROGRAMS): Add msguniq.
+       (noinst_HEADERS): Add msgl-cat.h.
+       (msgcat_SOURCES): Add msgl-cat.c.
+       (msguniq_SOURCES, msguniq_LDADD): New variables.
+
  2001-07-21  Bruno Haible  <haible@clisp.cons.org>
  
         * msgcat.c (usage): The default value for more-than is 0 here.
diff --git a/src/FILES b/src/FILES

index 740e00745404000db7a1fc08b35496258a6fdf0e..9bb458fba2d7a17c0b9fb0d90de2aae5c741067e 100644 (file)
--- a/src/FILES
+++ b/src/FILES
@@ -68,6 +68,11 @@ read-po.h
  read-po.c
                  Reading of a PO file, returning a list-of-messages.
  
+msgl-cat.h
+msgl-cat.c
+                Concatenate message lists from several files, with handling
+                of duplicate msgids.
+
  msgfmt.c        Main source for the 'msgfmt' program.
  msgcmp.c        Main source for the 'msgcmp' program.
  msgcomm.c       Main source for the 'msgcomm' program.
diff --git a/src/Makefile.am b/src/Makefile.am

index 9ac1b4e27c75d4986d57e3780a8ada3b3a3aeb69..2b02ef9a5fc7338ce09b86dc22b6c1e564bcbefc 100644 (file)
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -21,11 +21,11 @@ AUTOMAKE_OPTIONS = 1.2 gnits
  
  bin_PROGRAMS = gettext ngettext \
  msgcmp msgfmt msgmerge msgunfmt xgettext \
-msgcat msgcomm msgconv msgen msggrep msgexec
+msgcat msgcomm msgconv msgen msgexec msggrep msguniq
  
  noinst_HEADERS = pos.h message.h po-gram.h po-hash.h po-charset.h po-lex.h \
  po.h open-po.h read-po.h str-list.h write-po.h xget-lex.h dir-list.h \
-po-gram-gen.h po-hash-gen.h msgl-charset.h msgl-iconv.h
+po-gram-gen.h po-hash-gen.h msgl-charset.h msgl-iconv.h msgl-cat.h
  
  EXTRA_DIST = FILES
  
@@ -54,7 +54,7 @@ po-charset.c po-lex.c po.c str-list.c xget-lex.c xgettext.c dir-list.c \
  write-po.c
  msgcat_SOURCES = msgcat.c message.c open-po.c po-gram-gen.y po-hash-gen.y \
  po-charset.c po-lex.c po.c read-po.c str-list.c dir-list.c write-po.c \
-msgl-iconv.c
+msgl-iconv.c msgl-cat.c
  msgcomm_SOURCES = msgcomm.c message.c po-gram-gen.y po-hash-gen.y \
  po-charset.c po-lex.c open-po.c po.c str-list.c dir-list.c write-po.c
  msgconv_SOURCES = msgconv.c message.c open-po.c po-gram-gen.y po-hash-gen.y \
@@ -62,12 +62,15 @@ po-charset.c po-lex.c po.c read-po.c str-list.c dir-list.c write-po.c \
  msgl-iconv.c
  msgen_SOURCES = msgen.c message.c open-po.c po-gram-gen.y po-hash-gen.y \
  po-charset.c po-lex.c po.c read-po.c str-list.c dir-list.c write-po.c
-msggrep_SOURCES = msggrep.c message.c open-po.c po-gram-gen.y po-hash-gen.y \
+msgexec_SOURCES = msgexec.c message.c open-po.c po-gram-gen.y po-hash-gen.y \
  po-charset.c po-lex.c po.c read-po.c str-list.c dir-list.c write-po.c \
  msgl-charset.c
-msgexec_SOURCES = msgexec.c message.c open-po.c po-gram-gen.y po-hash-gen.y \
+msggrep_SOURCES = msggrep.c message.c open-po.c po-gram-gen.y po-hash-gen.y \
  po-charset.c po-lex.c po.c read-po.c str-list.c dir-list.c write-po.c \
  msgl-charset.c
+msguniq_SOURCES = msguniq.c message.c open-po.c po-gram-gen.y po-hash-gen.y \
+po-charset.c po-lex.c po.c read-po.c str-list.c dir-list.c write-po.c \
+msgl-iconv.c msgl-cat.c
  
  # Link dependencies.
  # po-lex.c and po.c may need -liconv.
@@ -81,8 +84,9 @@ msgcat_LDADD = ../lib/libnlsut.a @INTLLIBS@ @LIBICONV@
  msgcomm_LDADD = ../lib/libnlsut.a @INTLLIBS@ @LIBICONV@
  msgconv_LDADD = ../lib/libnlsut.a @INTLLIBS@ @LIBICONV@
  msgen_LDADD = ../lib/libnlsut.a @INTLLIBS@ @LIBICONV@
-msggrep_LDADD = ../lib/libnlsut.a @INTLLIBS@ @LIBICONV@
  msgexec_LDADD = ../lib/libnlsut.a @INTLLIBS@ @LIBICONV@
+msggrep_LDADD = ../lib/libnlsut.a @INTLLIBS@ @LIBICONV@
+msguniq_LDADD = ../lib/libnlsut.a @INTLLIBS@ @LIBICONV@
  
  BUILT_SOURCES = po-gram-gen.c po-hash-gen.c po-gram-gen.h po-hash-gen.h
  
diff --git a/src/msgcat.c b/src/msgcat.c

index 33259f00dadb6a06dfa818fef1452c0c9cf63b5d..8ed926d4053c2a4e2e57c703f8668698f3b1be71 100644 (file)
--- a/src/msgcat.c
+++ b/src/msgcat.c
@@ -29,14 +29,13 @@
  #include <locale.h>
  
  #include "dir-list.h"
+#include "str-list.h"
  #include "error.h"
-#include "xerror.h"
  #include "progname.h"
  #include "message.h"
  #include "read-po.h"
  #include "write-po.h"
-#include "po-charset.h"
-#include "msgl-iconv.h"
+#include "msgl-cat.h"
  #include "system.h"
  #include "libgettext.h"
  
@@ -49,14 +48,6 @@ static int force_po;
  /* Target encoding.  */
  static const char *to_code;
  
-/* These variables control which messages are selected.  */
-static int more_than = 0;
-static int less_than = INT_MAX;
-
-/* If true, use the first available translation.
-   If false, merge all available translations into one and fuzzy it.  */
-static int use_first;
-
  /* Long options.  */
  static const struct option long_options[] =
  {
@@ -75,7 +66,7 @@ static const struct option long_options[] =
    { "strict", no_argument, NULL, 'S' },
    { "to-code", required_argument, NULL, 't' },
    { "unique", no_argument, NULL, 'u' },
-  { "use-first", no_argument, &use_first, 1 },
+  { "use-first", no_argument, NULL, CHAR_MAX + 1 },
    { "version", no_argument, NULL, 'V' },
    { "width", required_argument, NULL, 'w', },
    { "more-than", required_argument, NULL, '>', },
@@ -87,12 +78,6 @@ static const struct option long_options[] =
  /* Prototypes for local functions.  */
  static void usage PARAMS ((int status));
  static string_list_ty *read_name_from_file PARAMS ((const char *file_name));
-static bool is_message_selected PARAMS ((const message_ty *tmp));
-static bool is_message_needed PARAMS ((const message_ty *tmp));
-static bool is_message_first_needed PARAMS ((const message_ty *tmp));
-static msgdomain_list_ty *
-       catenate_msgdomain_list PARAMS ((string_list_ty *file_list,
-                                       const char *to_code));
  
  
  int
@@ -129,6 +114,9 @@ main (argc, argv)
    do_version = false;
    output_file = NULL;
    files_from = NULL;
+  more_than = 0;
+  less_than = INT_MAX;
+  use_first = false;
  
    while ((optchar = getopt_long (argc, argv, "<:>:D:eEf:Fhino:st:uVw:",
                                  long_options, NULL)) != EOF)
@@ -223,6 +211,10 @@ main (argc, argv)
         }
         break;
  
+      case CHAR_MAX + 1:
+       use_first = true;
+       break;
+
        default:
         usage (EXIT_FAILURE);
         /* NOTREACHED */
@@ -440,532 +432,3 @@ read_name_from_file (file_name)
  
    return result;
  }
-
-
-static bool
-is_message_selected (tmp)
-     const message_ty *tmp;
-{
-  int used = (tmp->used >= 0 ? tmp->used : - tmp->used);
-
-  return (tmp->msgid[0] == '\0') /* keep the header entry */
-        || (used > more_than && used < less_than);
-}
-
-
-static bool
-is_message_needed (mp)
-     const message_ty *mp;
-{
-  if ((mp->msgid[0] != '\0' && mp->is_fuzzy) || mp->msgstr[0] == '\0')
-    /* Weak translation.  Needed if there are only weak translations.  */
-    return mp->tmp->used < 0 && is_message_selected (mp->tmp);
-  else
-    /* Good translation.  */
-    return is_message_selected (mp->tmp);
-}
-
-
-/* The use_first logic.  */
-static bool
-is_message_first_needed (mp)
-     const message_ty *mp;
-{
-  if (mp->tmp->obsolete && is_message_needed (mp))
-    {
-      mp->tmp->obsolete = false;
-      return true;
-    }
-  else
-    return false;
-}
-
-
-static msgdomain_list_ty *
-catenate_msgdomain_list (file_list, to_code)
-     string_list_ty *file_list;
-     const char *to_code;
-{
-  const char * const *files = file_list->item;
-  size_t nfiles = file_list->nitems;
-  msgdomain_list_ty **mdlps;
-  const char ***canon_charsets;
-  const char ***identifications;
-  msgdomain_list_ty *total_mdlp;
-  const char *canon_to_code;
-  size_t n, j, k;
-
-  /* Read input files.  */
-  mdlps =
-    (msgdomain_list_ty **) xmalloc (nfiles * sizeof (msgdomain_list_ty *));
-  for (n = 0; n < nfiles; n++)
-    mdlps[n] = read_po_file (files[n]);
-
-  /* Determine the canonical name of each input file's encoding.  */
-  canon_charsets = (const char ***) xmalloc (nfiles * sizeof (const char **));
-  for (n = 0; n < nfiles; n++)
-    {
-      msgdomain_list_ty *mdlp = mdlps[n];
-      size_t k;
-
-      canon_charsets[n] =
-       (const char **) xmalloc (mdlp->nitems * sizeof (const char *));
-      for (k = 0; k < mdlp->nitems; k++)
-       {
-         message_list_ty *mlp = mdlp->item[k]->messages;
-         const char *canon_from_code = NULL;
-
-         if (mlp->nitems > 0)
-           {
-             for (j = 0; j < mlp->nitems; j++)
-               if (mlp->item[j]->msgid[0] == '\0' && !mlp->item[j]->obsolete)
-                 {
-                   const char *header = mlp->item[j]->msgstr;
-
-                   if (header != NULL)
-                     {
-                       const char *charsetstr = strstr (header, "charset=");
-
-                       if (charsetstr != NULL)
-                         {
-                           size_t len;
-                           char *charset;
-                           const char *canon_charset;
-
-                           charsetstr += strlen ("charset=");
-                           len = strcspn (charsetstr, " \t\n");
-                           charset = (char *) alloca (len + 1);
-                           memcpy (charset, charsetstr, len);
-                           charset[len] = '\0';
-
-                           canon_charset = po_charset_canonicalize (charset);
-                           if (canon_charset == NULL)
-                             error (EXIT_FAILURE, 0,
-                                    _("\
-present charset \"%s\" is not a portable encoding name"),
-                                    charset);
-
-                           if (canon_from_code == NULL)
-                             canon_from_code = canon_charset;
-                           else if (canon_from_code != canon_charset)
-                             error (EXIT_FAILURE, 0,
-                                    _("\
-two different charsets \"%s\" and \"%s\" in input file"),
-                                    canon_from_code, canon_charset);
-                         }
-                     }
-                 }
-             if (canon_from_code == NULL)
-               {
-                 if (k == 0)
-                   error (EXIT_FAILURE, 0, _("\
-input file `%s' doesn't contain a header entry with a charset specification"),
-                          files[n]);
-                 else
-                   error (EXIT_FAILURE, 0, _("\
-domain \"%s\" in input file `%s' doesn't contain a header entry with a charset specification"),
-                          mdlp->item[k]->domain, files[n]);
-               }
-           }
-         canon_charsets[n][k] = canon_from_code;
-       }
-    }
-
-  /* Determine textual identifications of each file/domain combination.  */
-  identifications = (const char ***) xmalloc (nfiles * sizeof (const char **));
-  for (n = 0; n < nfiles; n++)
-    {
-      const char *filename = basename (files[n]);
-      msgdomain_list_ty *mdlp = mdlps[n];
-      size_t k;
-
-      identifications[n] =
-       (const char **) xmalloc (mdlp->nitems * sizeof (const char *));
-      for (k = 0; k < mdlp->nitems; k++)
-       {
-         const char *domain = mdlp->item[k]->domain;
-         message_list_ty *mlp = mdlp->item[k]->messages;
-         char *project_id = NULL;
-
-         for (j = 0; j < mlp->nitems; j++)
-           if (mlp->item[j]->msgid[0] == '\0' && !mlp->item[j]->obsolete)
-             {
-               const char *header = mlp->item[j]->msgstr;
-
-               if (header != NULL)
-                 {
-                   const char *cp = strstr (header, "Project-Id-Version:");
-
-                   if (cp != NULL)
-                     {
-                       const char *endp;
-
-                       cp += sizeof ("Project-Id-Version:") - 1;
-
-                       endp = strchr (cp, '\n');
-                       if (endp == NULL)
-                         endp = cp + strlen (cp);
-
-                       while (cp < endp && *cp == ' ')
-                         cp++;
-
-                       if (cp < endp)
-                         {
-                           size_t len = endp - cp;
-                           project_id = (char *) xmalloc (len + 1);
-                           memcpy (project_id, cp, len);
-                           project_id[len] = '\0';
-                         }
-                       break;
-                     }
-                 }
-             }
-
-         identifications[n][k] =
-           (project_id != NULL
-            ? (k > 0 ? xasprintf ("%s:%s (%s)", filename, domain, project_id)
-                     : xasprintf ("%s (%s)", filename, project_id))
-            : (k > 0 ? xasprintf ("%s:%s", filename, domain)
-                     : xasprintf ("%s", filename)));
-       }
-    }
-
-  /* Create list of resulting messages, but don't fill it.  Only count
-     the number of translations for each message.
-     If for a message, there is at least one non-fuzzy, non-empty translation,
-     use only the non-fuzzy, non-empty translations.  Otherwise use the
-     fuzzy or empty translations as well.  */
-  total_mdlp = msgdomain_list_alloc ();
-  for (n = 0; n < nfiles; n++)
-    {
-      msgdomain_list_ty *mdlp = mdlps[n];
-
-      for (k = 0; k < mdlp->nitems; k++)
-       {
-         const char *domain = mdlp->item[k]->domain;
-         message_list_ty *mlp = mdlp->item[k]->messages;
-         message_list_ty *total_mlp;
-
-         total_mlp = msgdomain_list_sublist (total_mdlp, domain, 1);
-
-         for (j = 0; j < mlp->nitems; j++)
-           {
-             message_ty *mp = mlp->item[j];
-             message_ty *tmp;
-
-             tmp = message_list_search (total_mlp, mp->msgid);
-             if (tmp == NULL)
-               {
-                 tmp = message_alloc (mp->msgid, mp->msgid_plural, NULL, 0,
-                                      &mp->pos);
-                 tmp->is_fuzzy = true; /* may be set to false later */
-                 tmp->is_c_format = undecided; /* may be set to yes/no later */
-                 tmp->do_wrap = yes; /* may be set to no later */
-                 tmp->obsolete = true; /* may be set to false later */
-                 tmp->alternative_count = 0;
-                 tmp->alternative = NULL;
-                 message_list_append (total_mlp, tmp);
-               }
-
-             if ((mp->msgid[0] != '\0' && mp->is_fuzzy)
-                 || mp->msgstr[0] == '\0')
-               /* Weak translation.  Counted as negative tmp->used.  */
-               {
-                 if (tmp->used <= 0)
-                   tmp->used--;
-               }
-             else
-               /* Good translation.  Counted as positive tmp->used.  */
-               {
-                 if (tmp->used < 0)
-                   tmp->used = 0;
-                 tmp->used++;
-               }
-             mp->tmp = tmp;
-           }
-       }
-    }
-
-  /* Remove messages that are not used and need not be converted.  */
-  for (n = 0; n < nfiles; n++)
-    {
-      msgdomain_list_ty *mdlp = mdlps[n];
-
-      for (k = 0; k < mdlp->nitems; k++)
-       {
-         message_list_ty *mlp = mdlp->item[k]->messages;
-
-         message_list_remove_if_not (mlp,
-                                     use_first
-                                     ? is_message_first_needed
-                                     : is_message_needed);
-
-         /* If no messages are remaining, drop the charset.  */
-         if (mlp->nitems == 0)
-           canon_charsets[n][k] = NULL;
-       }
-    }
-  for (k = 0; k < total_mdlp->nitems; k++)
-    {
-      message_list_ty *mlp = total_mdlp->item[k]->messages;
-
-      message_list_remove_if_not (mlp, is_message_selected);
-    }
-
-  /* Determine the target encoding for the remaining messages.  */
-  if (to_code != NULL)
-    {
-      /* Canonicalize target encoding.  */
-      canon_to_code = po_charset_canonicalize (to_code);
-      if (canon_to_code == NULL)
-       error (EXIT_FAILURE, 0,
-              _("target charset \"%s\" is not a portable encoding name."),
-              to_code);
-    }
-  else
-    {
-      /* No target encoding was specified.  Test whether the messages are
-        all in a single encoding.  If so, conversion is not needed.  */
-      const char *first = NULL;
-      const char *second = NULL;
-      bool with_UTF8 = false;
-
-      for (n = 0; n < nfiles; n++)
-       {
-         msgdomain_list_ty *mdlp = mdlps[n];
-
-         for (k = 0; k < mdlp->nitems; k++)
-           if (canon_charsets[n][k] != NULL)
-             {
-               if (first == NULL)
-                 first = canon_charsets[n][k];
-               else if (canon_charsets[n][k] != first && second == NULL)
-                 second = canon_charsets[n][k];
-
-               if (strcmp (canon_charsets[n][k], "UTF-8") == 0)
-                 with_UTF8 = true;
-             }
-       }
-
-      if (second != NULL)
-       {
-         /* A conversion is needed.  Warn the user since he hasn't asked
-            for it and might be surprised.  */
-         if (with_UTF8)
-           multiline_warning (xasprintf (_("warning: ")),
-                              xasprintf (_("\
-Input files contain messages in different encodings, UTF-8 among others.\n\
-Converting the output to UTF-8.\n\
-")));
-         else
-           multiline_warning (xasprintf (_("warning: ")),
-                              xasprintf (_("\
-Input files contain messages in different encodings, %s and %s among others.\n\
-Converting the output to UTF-8.\n\
-To select a different output encoding, use the --to-code option.\n\
-"), first, second));
-         canon_to_code = po_charset_canonicalize ("UTF-8");
-       }
-      else
-       {
-         /* No conversion needed.  */
-         canon_to_code = NULL;
-       }
-    }
-
-  /* Now convert the remaining messages to to_code.  */
-  if (canon_to_code != NULL)
-    for (n = 0; n < nfiles; n++)
-      {
-       msgdomain_list_ty *mdlp = mdlps[n];
-
-       for (k = 0; k < mdlp->nitems; k++)
-         if (canon_charsets[n][k] != NULL)
-           iconv_message_list (mdlp->item[k]->messages, canon_to_code);
-      }
-
-  /* Fill the resulting messages.  */
-  for (n = 0; n < nfiles; n++)
-    {
-      msgdomain_list_ty *mdlp = mdlps[n];
-
-      for (k = 0; k < mdlp->nitems; k++)
-       {
-         message_list_ty *mlp = mdlp->item[k]->messages;
-
-         for (j = 0; j < mlp->nitems; j++)
-           {
-             message_ty *mp = mlp->item[j];
-             message_ty *tmp = mp->tmp;
-             size_t i;
-
-             /* No need to discard unneeded weak translations here;
-                they have already been filtered out above.  */
-             if (use_first || tmp->used == 1 || tmp->used == -1)
-               {
-                 /* Copy mp, as only message, into tmp.  */
-                 tmp->msgstr = mp->msgstr;
-                 tmp->msgstr_len = mp->msgstr_len;
-                 tmp->pos = mp->pos;
-                 if (mp->comment)
-                   for (i = 0; i < mp->comment->nitems; i++)
-                     message_comment_append (tmp, mp->comment->item[i]);
-                 if (mp->comment_dot)
-                   for (i = 0; i < mp->comment_dot->nitems; i++)
-                     message_comment_dot_append (tmp,
-                                                 mp->comment_dot->item[i]);
-                 for (i = 0; i < mp->filepos_count; i++)
-                   message_comment_filepos (tmp, mp->filepos[i].file_name,
-                                            mp->filepos[i].line_number);
-                 tmp->is_fuzzy = mp->is_fuzzy;
-                 tmp->is_c_format = mp->is_c_format;
-                 tmp->do_wrap = mp->do_wrap;
-                 tmp->obsolete = mp->obsolete;
-               }
-             else
-               {
-                 /* Copy mp, among others, into tmp.  */
-                 char *id = xasprintf ("#-#-#-#-#  %s  #-#-#-#-#",
-                                       identifications[n][k]);
-                 size_t nbytes;
-
-                 if (tmp->alternative_count == 0)
-                   tmp->pos = mp->pos;
-
-                 i = tmp->alternative_count;
-                 nbytes = (i + 1) * sizeof (struct altstr);
-                 tmp->alternative = xrealloc (tmp->alternative, nbytes);
-                 tmp->alternative[i].msgstr = mp->msgstr;
-                 tmp->alternative[i].msgstr_len = mp->msgstr_len;
-                 tmp->alternative[i].msgstr_end =
-                   tmp->alternative[i].msgstr + tmp->alternative[i].msgstr_len;
-                 tmp->alternative[i].id = id;
-                 tmp->alternative_count = i + 1;
-
-                 if (mp->comment)
-                   {
-                     message_comment_append (tmp, id);
-                     for (i = 0; i < mp->comment->nitems; i++)
-                       message_comment_append (tmp, mp->comment->item[i]);
-                   }
-                 if (mp->comment_dot)
-                   {
-                     message_comment_dot_append (tmp, id);
-                     for (i = 0; i < mp->comment_dot->nitems; i++)
-                       message_comment_dot_append (tmp,
-                                                   mp->comment_dot->item[i]);
-                   }
-                 for (i = 0; i < mp->filepos_count; i++)
-                   message_comment_filepos (tmp, mp->filepos[i].file_name,
-                                            mp->filepos[i].line_number);
-                 if (!mp->is_fuzzy)
-                   tmp->is_fuzzy = false;
-                 if (mp->is_c_format == yes)
-                   tmp->is_c_format = yes;
-                 else if (mp->is_c_format == no
-                          && tmp->is_c_format == undecided)
-                   tmp->is_c_format = no;
-                 if (mp->do_wrap == no)
-                   tmp->do_wrap = no;
-                 if (!mp->obsolete)
-                   tmp->obsolete = false;
-               }
-           }
-       }
-    }
-  for (k = 0; k < total_mdlp->nitems; k++)
-    {
-      message_list_ty *mlp = total_mdlp->item[k]->messages;
-
-      for (j = 0; j < mlp->nitems; j++)
-       {
-         message_ty *tmp = mlp->item[j];
-
-         if (tmp->alternative_count > 0)
-           {
-             /* Test whether all alternative translations are equal.  */
-             struct altstr *first = &tmp->alternative[0];
-             size_t i;
-
-             for (i = 0; i < tmp->alternative_count; i++)
-               if (!(tmp->alternative[i].msgstr_len == first->msgstr_len
-                     && memcmp (tmp->alternative[i].msgstr, first->msgstr,
-                                first->msgstr_len) == 0))
-                 break;
-
-             if (i == tmp->alternative_count)
-               {
-                 /* All alternatives are equal.  */
-                 tmp->msgstr = first->msgstr;
-                 tmp->msgstr_len = first->msgstr_len;
-               }
-             else
-               {
-                 /* Concatenate the alternative msgstrs into a single one,
-                    separated by markers.  */
-                 size_t len;
-                 const char *p;
-                 const char *p_end;
-                 char *new_msgstr;
-                 char *np;
-
-                 len = 0;
-                 for (i = 0; i < tmp->alternative_count; i++)
-                   {
-                     size_t id_len = strlen (tmp->alternative[i].id);
-
-                     len += tmp->alternative[i].msgstr_len;
-
-                     p = tmp->alternative[i].msgstr;
-                     p_end = tmp->alternative[i].msgstr_end;
-                     for (; p < p_end; p += strlen (p) + 1)
-                       len += id_len + 2;
-                   }
-
-                 new_msgstr = (char *) xmalloc (len);
-                 np = new_msgstr;
-                 for (;;)
-                   {
-                     /* Test whether there's one more plural form to
-                        process.  */
-                     for (i = 0; i < tmp->alternative_count; i++)
-                       if (tmp->alternative[i].msgstr
-                           < tmp->alternative[i].msgstr_end)
-                         break;
-                     if (i == tmp->alternative_count)
-                       break;
-
-                     /* Process next plural form.  */
-                     for (i = 0; i < tmp->alternative_count; i++)
-                       if (tmp->alternative[i].msgstr
-                           < tmp->alternative[i].msgstr_end)
-                         {
-                           if (np > new_msgstr && np[-1] != '\0'
-                               && np[-1] != '\n')
-                             *np++ = '\n';
-
-                           len = strlen (tmp->alternative[i].id);
-                           memcpy (np, tmp->alternative[i].id, len);
-                           np += len;
-                           *np++ = '\n';
-
-                           len = strlen (tmp->alternative[i].msgstr);
-                           memcpy (np, tmp->alternative[i].msgstr, len);
-                           np += len;
-                           tmp->alternative[i].msgstr += len + 1;
-                         }
-
-                     /* Plural forms are separated by NUL bytes.  */
-                     *np++ = '\0';
-                   }
-                 tmp->msgstr = new_msgstr;
-                 tmp->msgstr_len = np - new_msgstr;
-
-                 tmp->is_fuzzy = true;
-               }
-           }
-       }
-    }
-
-  return total_mdlp;
-}
diff --git a/src/msgl-cat.c b/src/msgl-cat.c

new file mode 100644 (file)

index 0000000..7324e43
--- /dev/null
+++ b/src/msgl-cat.c
@@ -0,0 +1,580 @@
+/* Message list concatenation and duplicate handling.
+   Copyright (C) 2001 Free Software Foundation, Inc.
+   Written by Bruno Haible <haible@clisp.cons.org>, 2001.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software Foundation,
+   Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
+
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <stdlib.h>
+
+#include "msgl-cat.h"
+#include "error.h"
+#include "xerror.h"
+#include "message.h"
+#include "read-po.h"
+#include "po-charset.h"
+#include "msgl-iconv.h"
+#include "system.h"
+#include "libgettext.h"
+
+#define _(str) gettext (str)
+
+
+/* These variables control which messages are selected.  */
+int more_than;
+int less_than;
+
+/* If true, use the first available translation.
+   If false, merge all available translations into one and fuzzy it.  */
+bool use_first;
+
+
+/* Prototypes for local functions.  */
+static bool is_message_selected PARAMS ((const message_ty *tmp));
+static bool is_message_needed PARAMS ((const message_ty *tmp));
+static bool is_message_first_needed PARAMS ((const message_ty *tmp));
+
+
+static bool
+is_message_selected (tmp)
+     const message_ty *tmp;
+{
+  int used = (tmp->used >= 0 ? tmp->used : - tmp->used);
+
+  return (tmp->msgid[0] == '\0') /* keep the header entry */
+        || (used > more_than && used < less_than);
+}
+
+
+static bool
+is_message_needed (mp)
+     const message_ty *mp;
+{
+  if ((mp->msgid[0] != '\0' && mp->is_fuzzy) || mp->msgstr[0] == '\0')
+    /* Weak translation.  Needed if there are only weak translations.  */
+    return mp->tmp->used < 0 && is_message_selected (mp->tmp);
+  else
+    /* Good translation.  */
+    return is_message_selected (mp->tmp);
+}
+
+
+/* The use_first logic.  */
+static bool
+is_message_first_needed (mp)
+     const message_ty *mp;
+{
+  if (mp->tmp->obsolete && is_message_needed (mp))
+    {
+      mp->tmp->obsolete = false;
+      return true;
+    }
+  else
+    return false;
+}
+
+
+msgdomain_list_ty *
+catenate_msgdomain_list (file_list, to_code)
+     string_list_ty *file_list;
+     const char *to_code;
+{
+  const char * const *files = file_list->item;
+  size_t nfiles = file_list->nitems;
+  msgdomain_list_ty **mdlps;
+  const char ***canon_charsets;
+  const char ***identifications;
+  msgdomain_list_ty *total_mdlp;
+  const char *canon_to_code;
+  size_t n, j, k;
+
+  /* Read input files.  */
+  mdlps =
+    (msgdomain_list_ty **) xmalloc (nfiles * sizeof (msgdomain_list_ty *));
+  for (n = 0; n < nfiles; n++)
+    mdlps[n] = read_po_file (files[n]);
+
+  /* Determine the canonical name of each input file's encoding.  */
+  canon_charsets = (const char ***) xmalloc (nfiles * sizeof (const char **));
+  for (n = 0; n < nfiles; n++)
+    {
+      msgdomain_list_ty *mdlp = mdlps[n];
+      size_t k;
+
+      canon_charsets[n] =
+       (const char **) xmalloc (mdlp->nitems * sizeof (const char *));
+      for (k = 0; k < mdlp->nitems; k++)
+       {
+         message_list_ty *mlp = mdlp->item[k]->messages;
+         const char *canon_from_code = NULL;
+
+         if (mlp->nitems > 0)
+           {
+             for (j = 0; j < mlp->nitems; j++)
+               if (mlp->item[j]->msgid[0] == '\0' && !mlp->item[j]->obsolete)
+                 {
+                   const char *header = mlp->item[j]->msgstr;
+
+                   if (header != NULL)
+                     {
+                       const char *charsetstr = strstr (header, "charset=");
+
+                       if (charsetstr != NULL)
+                         {
+                           size_t len;
+                           char *charset;
+                           const char *canon_charset;
+
+                           charsetstr += strlen ("charset=");
+                           len = strcspn (charsetstr, " \t\n");
+                           charset = (char *) alloca (len + 1);
+                           memcpy (charset, charsetstr, len);
+                           charset[len] = '\0';
+
+                           canon_charset = po_charset_canonicalize (charset);
+                           if (canon_charset == NULL)
+                             error (EXIT_FAILURE, 0,
+                                    _("\
+present charset \"%s\" is not a portable encoding name"),
+                                    charset);
+
+                           if (canon_from_code == NULL)
+                             canon_from_code = canon_charset;
+                           else if (canon_from_code != canon_charset)
+                             error (EXIT_FAILURE, 0,
+                                    _("\
+two different charsets \"%s\" and \"%s\" in input file"),
+                                    canon_from_code, canon_charset);
+                         }
+                     }
+                 }
+             if (canon_from_code == NULL)
+               {
+                 if (k == 0)
+                   error (EXIT_FAILURE, 0, _("\
+input file `%s' doesn't contain a header entry with a charset specification"),
+                          files[n]);
+                 else
+                   error (EXIT_FAILURE, 0, _("\
+domain \"%s\" in input file `%s' doesn't contain a header entry with a charset specification"),
+                          mdlp->item[k]->domain, files[n]);
+               }
+           }
+         canon_charsets[n][k] = canon_from_code;
+       }
+    }
+
+  /* Determine textual identifications of each file/domain combination.  */
+  identifications = (const char ***) xmalloc (nfiles * sizeof (const char **));
+  for (n = 0; n < nfiles; n++)
+    {
+      const char *filename = basename (files[n]);
+      msgdomain_list_ty *mdlp = mdlps[n];
+      size_t k;
+
+      identifications[n] =
+       (const char **) xmalloc (mdlp->nitems * sizeof (const char *));
+      for (k = 0; k < mdlp->nitems; k++)
+       {
+         const char *domain = mdlp->item[k]->domain;
+         message_list_ty *mlp = mdlp->item[k]->messages;
+         char *project_id = NULL;
+
+         for (j = 0; j < mlp->nitems; j++)
+           if (mlp->item[j]->msgid[0] == '\0' && !mlp->item[j]->obsolete)
+             {
+               const char *header = mlp->item[j]->msgstr;
+
+               if (header != NULL)
+                 {
+                   const char *cp = strstr (header, "Project-Id-Version:");
+
+                   if (cp != NULL)
+                     {
+                       const char *endp;
+
+                       cp += sizeof ("Project-Id-Version:") - 1;
+
+                       endp = strchr (cp, '\n');
+                       if (endp == NULL)
+                         endp = cp + strlen (cp);
+
+                       while (cp < endp && *cp == ' ')
+                         cp++;
+
+                       if (cp < endp)
+                         {
+                           size_t len = endp - cp;
+                           project_id = (char *) xmalloc (len + 1);
+                           memcpy (project_id, cp, len);
+                           project_id[len] = '\0';
+                         }
+                       break;
+                     }
+                 }
+             }
+
+         identifications[n][k] =
+           (project_id != NULL
+            ? (k > 0 ? xasprintf ("%s:%s (%s)", filename, domain, project_id)
+                     : xasprintf ("%s (%s)", filename, project_id))
+            : (k > 0 ? xasprintf ("%s:%s", filename, domain)
+                     : xasprintf ("%s", filename)));
+       }
+    }
+
+  /* Create list of resulting messages, but don't fill it.  Only count
+     the number of translations for each message.
+     If for a message, there is at least one non-fuzzy, non-empty translation,
+     use only the non-fuzzy, non-empty translations.  Otherwise use the
+     fuzzy or empty translations as well.  */
+  total_mdlp = msgdomain_list_alloc ();
+  for (n = 0; n < nfiles; n++)
+    {
+      msgdomain_list_ty *mdlp = mdlps[n];
+
+      for (k = 0; k < mdlp->nitems; k++)
+       {
+         const char *domain = mdlp->item[k]->domain;
+         message_list_ty *mlp = mdlp->item[k]->messages;
+         message_list_ty *total_mlp;
+
+         total_mlp = msgdomain_list_sublist (total_mdlp, domain, 1);
+
+         for (j = 0; j < mlp->nitems; j++)
+           {
+             message_ty *mp = mlp->item[j];
+             message_ty *tmp;
+
+             tmp = message_list_search (total_mlp, mp->msgid);
+             if (tmp == NULL)
+               {
+                 tmp = message_alloc (mp->msgid, mp->msgid_plural, NULL, 0,
+                                      &mp->pos);
+                 tmp->is_fuzzy = true; /* may be set to false later */
+                 tmp->is_c_format = undecided; /* may be set to yes/no later */
+                 tmp->do_wrap = yes; /* may be set to no later */
+                 tmp->obsolete = true; /* may be set to false later */
+                 tmp->alternative_count = 0;
+                 tmp->alternative = NULL;
+                 message_list_append (total_mlp, tmp);
+               }
+
+             if ((mp->msgid[0] != '\0' && mp->is_fuzzy)
+                 || mp->msgstr[0] == '\0')
+               /* Weak translation.  Counted as negative tmp->used.  */
+               {
+                 if (tmp->used <= 0)
+                   tmp->used--;
+               }
+             else
+               /* Good translation.  Counted as positive tmp->used.  */
+               {
+                 if (tmp->used < 0)
+                   tmp->used = 0;
+                 tmp->used++;
+               }
+             mp->tmp = tmp;
+           }
+       }
+    }
+
+  /* Remove messages that are not used and need not be converted.  */
+  for (n = 0; n < nfiles; n++)
+    {
+      msgdomain_list_ty *mdlp = mdlps[n];
+
+      for (k = 0; k < mdlp->nitems; k++)
+       {
+         message_list_ty *mlp = mdlp->item[k]->messages;
+
+         message_list_remove_if_not (mlp,
+                                     use_first
+                                     ? is_message_first_needed
+                                     : is_message_needed);
+
+         /* If no messages are remaining, drop the charset.  */
+         if (mlp->nitems == 0)
+           canon_charsets[n][k] = NULL;
+       }
+    }
+  for (k = 0; k < total_mdlp->nitems; k++)
+    {
+      message_list_ty *mlp = total_mdlp->item[k]->messages;
+
+      message_list_remove_if_not (mlp, is_message_selected);
+    }
+
+  /* Determine the target encoding for the remaining messages.  */
+  if (to_code != NULL)
+    {
+      /* Canonicalize target encoding.  */
+      canon_to_code = po_charset_canonicalize (to_code);
+      if (canon_to_code == NULL)
+       error (EXIT_FAILURE, 0,
+              _("target charset \"%s\" is not a portable encoding name."),
+              to_code);
+    }
+  else
+    {
+      /* No target encoding was specified.  Test whether the messages are
+        all in a single encoding.  If so, conversion is not needed.  */
+      const char *first = NULL;
+      const char *second = NULL;
+      bool with_UTF8 = false;
+
+      for (n = 0; n < nfiles; n++)
+       {
+         msgdomain_list_ty *mdlp = mdlps[n];
+
+         for (k = 0; k < mdlp->nitems; k++)
+           if (canon_charsets[n][k] != NULL)
+             {
+               if (first == NULL)
+                 first = canon_charsets[n][k];
+               else if (canon_charsets[n][k] != first && second == NULL)
+                 second = canon_charsets[n][k];
+
+               if (strcmp (canon_charsets[n][k], "UTF-8") == 0)
+                 with_UTF8 = true;
+             }
+       }
+
+      if (second != NULL)
+       {
+         /* A conversion is needed.  Warn the user since he hasn't asked
+            for it and might be surprised.  */
+         if (with_UTF8)
+           multiline_warning (xasprintf (_("warning: ")),
+                              xasprintf (_("\
+Input files contain messages in different encodings, UTF-8 among others.\n\
+Converting the output to UTF-8.\n\
+")));
+         else
+           multiline_warning (xasprintf (_("warning: ")),
+                              xasprintf (_("\
+Input files contain messages in different encodings, %s and %s among others.\n\
+Converting the output to UTF-8.\n\
+To select a different output encoding, use the --to-code option.\n\
+"), first, second));
+         canon_to_code = po_charset_canonicalize ("UTF-8");
+       }
+      else
+       {
+         /* No conversion needed.  */
+         canon_to_code = NULL;
+       }
+    }
+
+  /* Now convert the remaining messages to to_code.  */
+  if (canon_to_code != NULL)
+    for (n = 0; n < nfiles; n++)
+      {
+       msgdomain_list_ty *mdlp = mdlps[n];
+
+       for (k = 0; k < mdlp->nitems; k++)
+         if (canon_charsets[n][k] != NULL)
+           iconv_message_list (mdlp->item[k]->messages, canon_to_code);
+      }
+
+  /* Fill the resulting messages.  */
+  for (n = 0; n < nfiles; n++)
+    {
+      msgdomain_list_ty *mdlp = mdlps[n];
+
+      for (k = 0; k < mdlp->nitems; k++)
+       {
+         message_list_ty *mlp = mdlp->item[k]->messages;
+
+         for (j = 0; j < mlp->nitems; j++)
+           {
+             message_ty *mp = mlp->item[j];
+             message_ty *tmp = mp->tmp;
+             size_t i;
+
+             /* No need to discard unneeded weak translations here;
+                they have already been filtered out above.  */
+             if (use_first || tmp->used == 1 || tmp->used == -1)
+               {
+                 /* Copy mp, as only message, into tmp.  */
+                 tmp->msgstr = mp->msgstr;
+                 tmp->msgstr_len = mp->msgstr_len;
+                 tmp->pos = mp->pos;
+                 if (mp->comment)
+                   for (i = 0; i < mp->comment->nitems; i++)
+                     message_comment_append (tmp, mp->comment->item[i]);
+                 if (mp->comment_dot)
+                   for (i = 0; i < mp->comment_dot->nitems; i++)
+                     message_comment_dot_append (tmp,
+                                                 mp->comment_dot->item[i]);
+                 for (i = 0; i < mp->filepos_count; i++)
+                   message_comment_filepos (tmp, mp->filepos[i].file_name,
+                                            mp->filepos[i].line_number);
+                 tmp->is_fuzzy = mp->is_fuzzy;
+                 tmp->is_c_format = mp->is_c_format;
+                 tmp->do_wrap = mp->do_wrap;
+                 tmp->obsolete = mp->obsolete;
+               }
+             else
+               {
+                 /* Copy mp, among others, into tmp.  */
+                 char *id = xasprintf ("#-#-#-#-#  %s  #-#-#-#-#",
+                                       identifications[n][k]);
+                 size_t nbytes;
+
+                 if (tmp->alternative_count == 0)
+                   tmp->pos = mp->pos;
+
+                 i = tmp->alternative_count;
+                 nbytes = (i + 1) * sizeof (struct altstr);
+                 tmp->alternative = xrealloc (tmp->alternative, nbytes);
+                 tmp->alternative[i].msgstr = mp->msgstr;
+                 tmp->alternative[i].msgstr_len = mp->msgstr_len;
+                 tmp->alternative[i].msgstr_end =
+                   tmp->alternative[i].msgstr + tmp->alternative[i].msgstr_len;
+                 tmp->alternative[i].id = id;
+                 tmp->alternative_count = i + 1;
+
+                 if (mp->comment)
+                   {
+                     message_comment_append (tmp, id);
+                     for (i = 0; i < mp->comment->nitems; i++)
+                       message_comment_append (tmp, mp->comment->item[i]);
+                   }
+                 if (mp->comment_dot)
+                   {
+                     message_comment_dot_append (tmp, id);
+                     for (i = 0; i < mp->comment_dot->nitems; i++)
+                       message_comment_dot_append (tmp,
+                                                   mp->comment_dot->item[i]);
+                   }
+                 for (i = 0; i < mp->filepos_count; i++)
+                   message_comment_filepos (tmp, mp->filepos[i].file_name,
+                                            mp->filepos[i].line_number);
+                 if (!mp->is_fuzzy)
+                   tmp->is_fuzzy = false;
+                 if (mp->is_c_format == yes)
+                   tmp->is_c_format = yes;
+                 else if (mp->is_c_format == no
+                          && tmp->is_c_format == undecided)
+                   tmp->is_c_format = no;
+                 if (mp->do_wrap == no)
+                   tmp->do_wrap = no;
+                 if (!mp->obsolete)
+                   tmp->obsolete = false;
+               }
+           }
+       }
+    }
+  for (k = 0; k < total_mdlp->nitems; k++)
+    {
+      message_list_ty *mlp = total_mdlp->item[k]->messages;
+
+      for (j = 0; j < mlp->nitems; j++)
+       {
+         message_ty *tmp = mlp->item[j];
+
+         if (tmp->alternative_count > 0)
+           {
+             /* Test whether all alternative translations are equal.  */
+             struct altstr *first = &tmp->alternative[0];
+             size_t i;
+
+             for (i = 0; i < tmp->alternative_count; i++)
+               if (!(tmp->alternative[i].msgstr_len == first->msgstr_len
+                     && memcmp (tmp->alternative[i].msgstr, first->msgstr,
+                                first->msgstr_len) == 0))
+                 break;
+
+             if (i == tmp->alternative_count)
+               {
+                 /* All alternatives are equal.  */
+                 tmp->msgstr = first->msgstr;
+                 tmp->msgstr_len = first->msgstr_len;
+               }
+             else
+               {
+                 /* Concatenate the alternative msgstrs into a single one,
+                    separated by markers.  */
+                 size_t len;
+                 const char *p;
+                 const char *p_end;
+                 char *new_msgstr;
+                 char *np;
+
+                 len = 0;
+                 for (i = 0; i < tmp->alternative_count; i++)
+                   {
+                     size_t id_len = strlen (tmp->alternative[i].id);
+
+                     len += tmp->alternative[i].msgstr_len;
+
+                     p = tmp->alternative[i].msgstr;
+                     p_end = tmp->alternative[i].msgstr_end;
+                     for (; p < p_end; p += strlen (p) + 1)
+                       len += id_len + 2;
+                   }
+
+                 new_msgstr = (char *) xmalloc (len);
+                 np = new_msgstr;
+                 for (;;)
+                   {
+                     /* Test whether there's one more plural form to
+                        process.  */
+                     for (i = 0; i < tmp->alternative_count; i++)
+                       if (tmp->alternative[i].msgstr
+                           < tmp->alternative[i].msgstr_end)
+                         break;
+                     if (i == tmp->alternative_count)
+                       break;
+
+                     /* Process next plural form.  */
+                     for (i = 0; i < tmp->alternative_count; i++)
+                       if (tmp->alternative[i].msgstr
+                           < tmp->alternative[i].msgstr_end)
+                         {
+                           if (np > new_msgstr && np[-1] != '\0'
+                               && np[-1] != '\n')
+                             *np++ = '\n';
+
+                           len = strlen (tmp->alternative[i].id);
+                           memcpy (np, tmp->alternative[i].id, len);
+                           np += len;
+                           *np++ = '\n';
+
+                           len = strlen (tmp->alternative[i].msgstr);
+                           memcpy (np, tmp->alternative[i].msgstr, len);
+                           np += len;
+                           tmp->alternative[i].msgstr += len + 1;
+                         }
+
+                     /* Plural forms are separated by NUL bytes.  */
+                     *np++ = '\0';
+                   }
+                 tmp->msgstr = new_msgstr;
+                 tmp->msgstr_len = np - new_msgstr;
+
+                 tmp->is_fuzzy = true;
+               }
+           }
+       }
+    }
+
+  return total_mdlp;
+}
diff --git a/src/msgl-cat.h b/src/msgl-cat.h

new file mode 100644 (file)

index 0000000..2cc2827
--- /dev/null
+++ b/src/msgl-cat.h
@@ -0,0 +1,37 @@
+/* Message list concatenation and duplicate handling.
+   Copyright (C) 2001 Free Software Foundation, Inc.
+   Written by Bruno Haible <haible@clisp.cons.org>, 2001.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software Foundation,
+   Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
+
+#ifndef _MSGL_CAT_H
+#define _MSGL_CAT_H
+
+#include "message.h"
+#include "str-list.h"
+
+/* These variables control which messages are selected.  */
+extern int more_than;
+extern int less_than;
+
+/* If true, use the first available translation.
+   If false, merge all available translations into one and fuzzy it.  */
+extern bool use_first;
+
+extern msgdomain_list_ty *
+       catenate_msgdomain_list PARAMS ((string_list_ty *file_list,
+                                       const char *to_code));
+
+#endif /* _MSGL_CAT_H */
diff --git a/src/msguniq.c b/src/msguniq.c

new file mode 100644 (file)

index 0000000..3405691
--- /dev/null
+++ b/src/msguniq.c
@@ -0,0 +1,347 @@
+/* Remove, select or merge duplicate translations.
+   Copyright (C) 2001 Free Software Foundation, Inc.
+   Written by Bruno Haible <haible@clisp.cons.org>, 2001.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software Foundation,
+   Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
+
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <getopt.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <locale.h>
+
+#include "dir-list.h"
+#include "str-list.h"
+#include "error.h"
+#include "progname.h"
+#include "message.h"
+#include "read-po.h"
+#include "write-po.h"
+#include "msgl-cat.h"
+#include "system.h"
+#include "libgettext.h"
+
+#define _(str) gettext (str)
+
+
+/* Force output of PO file even if empty.  */
+static int force_po;
+
+/* Target encoding.  */
+static const char *to_code;
+
+/* Long options.  */
+static const struct option long_options[] =
+{
+  { "add-location", no_argument, &line_comment, 1 },
+  { "directory", required_argument, NULL, 'D' },
+  { "escape", no_argument, NULL, 'E' },
+  { "force-po", no_argument, &force_po, 1 },
+  { "help", no_argument, NULL, 'h' },
+  { "indent", no_argument, NULL, 'i' },
+  { "no-escape", no_argument, NULL, 'e' },
+  { "no-location", no_argument, &line_comment, 0 },
+  { "output-file", required_argument, NULL, 'o' },
+  { "repeated", no_argument, NULL, 'd' },
+  { "sort-by-file", no_argument, NULL, 'F' },
+  { "sort-output", no_argument, NULL, 's' },
+  { "strict", no_argument, NULL, 'S' },
+  { "to-code", required_argument, NULL, 't' },
+  { "unique", no_argument, NULL, 'u' },
+  { "use-first", no_argument, NULL, CHAR_MAX + 1 },
+  { "version", no_argument, NULL, 'V' },
+  { "width", required_argument, NULL, 'w', },
+  { NULL, 0, NULL, 0 }
+};
+
+
+/* Prototypes for local functions.  */
+static void usage PARAMS ((int status));
+
+
+int
+main (argc, argv)
+     int argc;
+     char **argv;
+{
+  int optchar;
+  bool do_help;
+  bool do_version;
+  char *output_file;
+  const char *input_file;
+  string_list_ty *file_list;
+  msgdomain_list_ty *result;
+  bool sort_by_msgid = false;
+  bool sort_by_filepos = false;
+
+  /* Set program name for messages.  */
+  program_name = argv[0];
+  error_print_progname = maybe_print_progname;
+
+#ifdef HAVE_SETLOCALE
+  /* Set locale via LC_ALL.  */
+  setlocale (LC_ALL, "");
+#endif
+
+  /* Set the text message domain.  */
+  bindtextdomain (PACKAGE, LOCALEDIR);
+  textdomain (PACKAGE);
+
+  /* Set default values for variables.  */
+  do_help = false;
+  do_version = false;
+  output_file = NULL;
+  input_file = NULL;
+  more_than = 0;
+  less_than = INT_MAX;
+  use_first = false;
+
+  while ((optchar = getopt_long (argc, argv, "dD:eEFhino:st:uVw:",
+                                long_options, NULL)) != EOF)
+    switch (optchar)
+      {
+      case '\0':               /* Long option.  */
+       break;
+
+      case 'd':
+        more_than = 1;
+       less_than = INT_MAX;
+       break;
+
+      case 'D':
+       dir_list_append (optarg);
+       break;
+
+      case 'e':
+       message_print_style_escape (false);
+       break;
+
+      case 'E':
+       message_print_style_escape (true);
+       break;
+
+      case 'F':
+       sort_by_filepos = true;
+        break;
+
+      case 'h':
+       do_help = true;
+       break;
+
+      case 'i':
+       message_print_style_indent ();
+       break;
+
+      case 'n':
+       line_comment = 1;
+       break;
+
+      case 'o':
+       output_file = optarg;
+       break;
+
+      case 's':
+       sort_by_msgid = true;
+       break;
+
+      case 'S':
+       message_print_style_uniforum ();
+       break;
+
+      case 't':
+       to_code = optarg;
+       break;
+
+      case 'u':
+        more_than = 0;
+       less_than = 2;
+        break;
+
+      case 'V':
+       do_version = true;
+       break;
+
+      case 'w':
+       {
+         int value;
+         char *endp;
+         value = strtol (optarg, &endp, 10);
+         if (endp != optarg)
+           message_page_width_set (value);
+       }
+       break;
+
+      case CHAR_MAX + 1:
+       use_first = true;
+       break;
+
+      default:
+       usage (EXIT_FAILURE);
+       /* NOTREACHED */
+      }
+
+  /* Verify selected options.  */
+  if (!line_comment && sort_by_filepos)
+    error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"),
+          "--no-location", "--sort-by-file");
+
+  if (sort_by_msgid && sort_by_filepos)
+    error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"),
+          "--sort-output", "--sort-by-file");
+
+  /* Version information requested.  */
+  if (do_version)
+    {
+      printf ("%s (GNU %s) %s\n", basename (program_name), PACKAGE, VERSION);
+      /* xgettext: no-wrap */
+      printf (_("Copyright (C) %s Free Software Foundation, Inc.\n\
+This is free software; see the source for copying conditions.  There is NO\n\
+warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
+"),
+             "2001");
+      printf (_("Written by %s.\n"), "Bruno Haible");
+      exit (EXIT_SUCCESS);
+    }
+
+  /* Help is requested.  */
+  if (do_help)
+    usage (EXIT_SUCCESS);
+
+  /* Test whether we have an .po file name as argument.  */
+  if (optind == argc)
+    input_file = "-";
+  else if (optind + 1 == argc)
+    input_file = argv[optind];
+  else
+    {
+      error (EXIT_SUCCESS, 0, _("at most one input file allowed"));
+      usage (EXIT_FAILURE);
+    }
+
+  /* Determine list of files we have to process: a single file.  */
+  file_list = string_list_alloc ();
+  string_list_append (file_list, input_file);
+
+  /* Read input files, then filter, convert and merge messages.  */
+  allow_duplicates = true;
+  result = catenate_msgdomain_list (file_list, to_code);
+
+  string_list_free (file_list);
+
+  /* Sorting the list of messages.  */
+  if (sort_by_filepos)
+    msgdomain_list_sort_by_filepos (result);
+  else if (sort_by_msgid)
+    msgdomain_list_sort_by_msgid (result);
+
+  /* Write the PO file.  */
+  msgdomain_list_print (result, output_file, force_po, false);
+
+  exit (EXIT_SUCCESS);
+}
+
+
+/* Display usage information and exit.  */
+static void
+usage (status)
+     int status;
+{
+  if (status != EXIT_SUCCESS)
+    fprintf (stderr, _("Try `%s --help' for more information.\n"),
+            program_name);
+  else
+    {
+      /* xgettext: no-wrap */
+      printf (_("\
+Usage: %s [OPTION] [INPUTFILE]\n\
+"), program_name);
+      printf ("\n");
+      /* xgettext: no-wrap */
+      printf (_("\
+Unifies duplicate translations in a translation catalog.\n\
+Finds duplicate translations of the same message ID.  Such duplicates are\n\
+invalid input for other programs like msgfmt, msgmerge or msgcat.  By\n\
+default, duplicates are merged together.  When using the --repeated option,\n\
+only duplicates are output, and all other messages are discarded.  Comments\n\
+and extracted comments will be cumulated, except that if --use-first is\n\
+specified, they will be taken from the first translation.  File positions\n\
+will be cumulated.  When using the --unique option, duplicates are discarded.\n\
+"));
+      printf ("\n");
+      /* xgettext: no-wrap */
+      printf (_("\
+Mandatory arguments to long options are mandatory for short options too.\n\
+"));
+      printf ("\n");
+      /* xgettext: no-wrap */
+      printf (_("\
+Input file location:\n\
+  INPUTFILE                   input PO file\n\
+  -D, --directory=DIRECTORY   add DIRECTORY to list for input files search\n\
+If no input file is given or if it is -, standard input is read.\n\
+"));
+      printf ("\n");
+      /* xgettext: no-wrap */
+      printf (_("\
+Output file location:\n\
+  -o, --output-file=FILE         write output to specified file\n\
+The results are written to standard output if no output file is specified\n\
+or if it is -.\n\
+"));
+      printf ("\n");
+      /* xgettext: no-wrap */
+      printf (_("\
+Message selection:\n\
+  -d, --repeated                 print only duplicates\n\
+  -u, --unique                   print only unique messages, discard duplicates\n\
+"));
+      printf ("\n");
+      /* xgettext: no-wrap */
+      printf (_("\
+Output details:\n\
+  -t, --to-code=NAME             encoding for output\n\
+      --use-first                use first available translation for each\n\
+                                 message, don't merge several translations\n\
+  -e, --no-escape                do not use C escapes in output (default)\n\
+  -E, --escape                   use C escapes in output, no extended chars\n\
+      --force-po                 write PO file even if empty\n\
+  -i, --indent                   write the .po file using indented style\n\
+      --no-location              do not write '#: filename:line' lines\n\
+  -n, --add-location             generate '#: filename:line' lines (default)\n\
+      --strict                   write out strict Uniforum conforming .po file\n\
+  -w, --width=NUMBER             set output page width\n\
+  -s, --sort-output              generate sorted output and remove duplicates\n\
+  -F, --sort-by-file             sort output by file location\n\
+"));
+      printf ("\n");
+      /* xgettext: no-wrap */
+      printf (_("\
+Informative output:\n\
+  -h, --help                     display this help and exit\n\
+  -V, --version                  output version information and exit\n\
+"));
+      printf ("\n");
+      fputs (_("Report bugs to <bug-gnu-utils@gnu.org>.\n"),
+            stdout);
+    }
+
+  exit (status);
+}
+
diff --git a/src/read-po.c b/src/read-po.c

index 505d47ab9c7801a8db6599e05a9049c01cdb285c..c736f617d41a164421a91a27670e3ca9ed525eeb 100644 (file)
--- a/src/read-po.c
+++ b/src/read-po.c
@@ -35,6 +35,11 @@
     msgid, if present in the reference input.  Defaults to true.  */
  int line_comment = 1;
  
+/* If false, duplicate msgids in the same domain and file generate an error.
+   If true, such msgids are allowed; the caller should treat them
+   appropriately.  Defaults to false.  */
+bool allow_duplicates = false;
+
  
  /* This structure defines a derived class of the po_ty class.  (See
     po.h for an explanation.)  */
@@ -177,8 +182,13 @@ readall_directive_message (that, msgid, msgid_pos, msgid_plural,
    /* Select the appropriate sublist of this->mdlp.  */
    this->mlp = msgdomain_list_sublist (this->mdlp, this->domain, 1);
  
-  /* See if this message ID has been seen before.  */
-  mp = message_list_search (this->mlp, msgid);
+  if (allow_duplicates && msgid[0] != '\0')
+    /* Doesn't matter if this message ID has been seen before.  */
+    mp = NULL;
+  else
+    /* See if this message ID has been seen before.  */
+    mp = message_list_search (this->mlp, msgid);
+
    if (mp)
      {
        po_gram_error_at_line (msgid_pos, _("duplicate message definition"));
diff --git a/src/read-po.h b/src/read-po.h

index 4346750fb3bcb1a1727350e77f7fa4609f95c24c..4e3b3c52d2c94a71b3b8aec18b1d85f11fef978c 100644 (file)
--- a/src/read-po.h
+++ b/src/read-po.h
@@ -25,6 +25,11 @@
     msgid, if present in the reference input.  Defaults to true.  */
  extern int line_comment;
  
+/* If false, duplicate msgids in the same domain and file generate an error.
+   If true, such msgids are allowed; the caller should treat them
+   appropriately.  Defaults to false.  */
+extern bool allow_duplicates;
+
  /* Read the input file with the name INPUT_NAME.  The ending .po is added
     if necessary.  If INPUT_NAME is not an absolute file name and the file is
     not found, the list of directories in "dir-list.h" is searched.  Returns
author	Bruno Haible <bruno@clisp.org>
	Sun, 29 Jul 2001 14:55:31 +0000 (14:55 +0000)
committer	Bruno Haible <bruno@clisp.org>
	Sun, 29 Jul 2001 14:55:31 +0000 (14:55 +0000)
man/ChangeLog		patch \| blob \| blame \| history
man/Makefile.am		patch \| blob \| blame \| history
man/msguniq.x	[new file with mode: 0644]	patch \| blob
src/ChangeLog		patch \| blob \| blame \| history
src/FILES		patch \| blob \| blame \| history
src/Makefile.am		patch \| blob \| blame \| history
src/msgcat.c		patch \| blob \| blame \| history
src/msgl-cat.c	[new file with mode: 0644]	patch \| blob
src/msgl-cat.h	[new file with mode: 0644]	patch \| blob
src/msguniq.c	[new file with mode: 0644]	patch \| blob
src/read-po.c		patch \| blob \| blame \| history
src/read-po.h		patch \| blob \| blame \| history