Refactor: Librarify the core of msgmerge.

author Bruno Haible <bruno@clisp.org>

Sun, 12 Oct 2025 00:12:13 +0000 (02:12 +0200)

committer Bruno Haible <bruno@clisp.org>

Sun, 12 Oct 2025 00:12:49 +0000 (02:12 +0200)
author Bruno Haible <bruno@clisp.org>
Sun, 12 Oct 2025 00:12:13 +0000 (02:12 +0200)
committer Bruno Haible <bruno@clisp.org>
Sun, 12 Oct 2025 00:12:49 +0000 (02:12 +0200)
diff --git a/gettext-tools/po/POTFILES.in b/gettext-tools/po/POTFILES.in

index 2de342158d0ac269999baa664e49d90b9eadab90..42d80f67db16a360b267aeac4157ea58afddb238 100644 (file)
--- a/gettext-tools/po/POTFILES.in
+++ b/gettext-tools/po/POTFILES.in
@@ -65,6 +65,7 @@ src/msgl-cat.c
  src/msgl-charset.c
  src/msgl-check.c
  src/msgl-iconv.c
+src/msgl-merge.c
  src/msgmerge.c
  src/msgunfmt.c
  src/msguniq.c
diff --git a/gettext-tools/src/FILES b/gettext-tools/src/FILES

index 997146bf88fdd1e98b697d892eff2e69604323e6..54d6ecaf2cf3a5af6342e03423730c9496f9f42b 100644 (file)
--- a/gettext-tools/src/FILES
+++ b/gettext-tools/src/FILES
@@ -149,6 +149,9 @@ msgl-charset.c
  | plural-count.h
  | plural-count.c
  |               Extracting the plural count of a header of a lists-of-messages.
+| msgl-merge.h
+| msgl-merge.c
+|               Merging a definitions file with a references file.
  | msgmerge.c
  |               Main source for the 'msgmerge' program.
  |
diff --git a/gettext-tools/src/Makefile.am b/gettext-tools/src/Makefile.am

index 93469a11c412b5d37413b41ad82f7639b0c84a34..797801e1cdee813b3e1cf2a4756f0511caf1dbea 100644 (file)
--- a/gettext-tools/src/Makefile.am
+++ b/gettext-tools/src/Makefile.am
@@ -54,7 +54,8 @@ noinst_HEADERS = \
    dir-list.h file-list.h read-po-gram.h cldr-plural.h \
    cldr-plural-exp.h locating-rules.h its.h search-path.h \
    msgl-charset.h msgl-equal.h msgl-iconv.h msgl-ascii.h msgl-ofn.h msgl-cat.h \
-  msgl-header.h msgl-english.h msgl-check.h msgl-fsearch.h msgfmt.h msgunfmt.h \
+  msgl-header.h msgl-english.h msgl-check.h msgl-fsearch.h msgl-merge.h \
+  msgfmt.h msgunfmt.h \
    plural-count.h plural-eval.h plural-distrib.h \
    read-mo.h write-mo.h \
    read-java.h write-java.h \
@@ -304,7 +305,7 @@ msgmerge_SOURCES = msgmerge.c
  else
  msgmerge_SOURCES = ../woe32dll/c++msgmerge.cc
  endif
-msgmerge_SOURCES += msgl-fsearch.c lang-table.c plural-count.c
+msgmerge_SOURCES += msgl-fsearch.c msgl-merge.c lang-table.c plural-count.c
  msgunfmt_SOURCES = msgunfmt.c
  msgunfmt_SOURCES += \
    read-mo.c read-java.c read-csharp.c read-resources.c read-tcl.c \
diff --git a/gettext-tools/src/msgl-merge.c b/gettext-tools/src/msgl-merge.c

new file mode 100644 (file)

index 0000000..73ea54d
--- /dev/null
+++ b/gettext-tools/src/msgl-merge.c
@@ -0,0 +1,1514 @@
+/* Merging a .po file with a .pot file.
+   Copyright (C) 1995-2025 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
+
+/* Written by Peter Miller, Ulrich Drepper, and Bruno Haible.  */
+
+#include <config.h>
+#include <alloca.h>
+
+/* Specification.  */
+#include "msgl-merge.h"
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "message.h"
+#include "read-catalog-file.h"
+#include "msgl-charset.h"
+#include "format.h"
+#include "xalloc.h"
+#include "xmalloca.h"
+#include "obstack.h"
+#include "c-strstr.h"
+#include "c-strcase.h"
+#include "po-charset.h"
+#include "msgl-iconv.h"
+#include "msgl-fsearch.h"
+#include "glthread/lock.h"
+#include "lang-table.h"
+#include "plural-exp.h"
+#include "plural-count.h"
+#include "msgl-check.h"
+#include "po-xerror.h"
+#include "xerror-handler.h"
+#include "xvasprintf.h"
+#include "gettext.h"
+
+#define _(str) gettext (str)
+
+#define obstack_chunk_alloc xmalloc
+#define obstack_chunk_free free
+
+
+/* If true do not print unneeded messages.  */
+bool quiet;
+
+/* Verbosity level.  */
+int verbosity_level;
+
+/* Apply the .pot file to each of the domains in the PO file.  */
+bool multi_domain_mode = false;
+
+/* Produce output for msgfmt, not for a translator.
+   msgfmt ignores
+     - untranslated messages,
+     - fuzzy messages, except the header entry,
+     - obsolete messages.
+   Therefore output for msgfmt does not need to include such messages.  */
+bool for_msgfmt = false;
+
+/* Determines whether to use fuzzy matching.  */
+bool use_fuzzy_matching = true;
+
+/* Determines whether to keep old msgids as previous msgids.  */
+bool keep_previous = false;
+
+/* Language (ISO-639 code) and optional territory (ISO-3166 code).  */
+const char *catalogname = NULL;
+
+/* List of user-specified compendiums.  */
+message_list_list_ty *compendiums;
+
+/* List of corresponding filenames.  */
+string_list_ty *compendium_filenames;
+
+
+struct statistics
+{
+  size_t merged;
+  size_t fuzzied;
+  size_t missing;
+  size_t obsolete;
+};
+
+
+/* Data structure representing the messages with known translations.
+   They are composed of
+     - A message list from def.po,
+     - The compendiums.
+   The data structure is optimized for exact and fuzzy searches.  */
+typedef struct definitions_ty definitions_ty;
+struct definitions_ty
+{
+  /* A list of message lists.  The first comes from def.po, the other ones
+     from the compendiums.  Each message list has a built-in hash table,
+     for speed when doing the exact searches.  */
+  message_list_list_ty *lists;
+
+  /* A fuzzy index of the current list of non-compendium messages, for speed
+     when doing fuzzy searches.  Used only if use_fuzzy_matching is true.  */
+  message_fuzzy_index_ty *curr_findex;
+  /* A once-only execution guard for the initialization of the fuzzy index.
+     Needed for OpenMP.  */
+  gl_lock_define(, curr_findex_init_lock)
+
+  /* A fuzzy index of the compendiums, for speed when doing fuzzy searches.
+     Used only if use_fuzzy_matching is true and compendiums != NULL.  */
+  message_fuzzy_index_ty *comp_findex;
+  /* A once-only execution guard for the initialization of the fuzzy index.
+     Needed for OpenMP.  */
+  gl_lock_define(, comp_findex_init_lock)
+
+  /* The canonical encoding of the definitions and the compendiums.
+     Only used for fuzzy matching.  */
+  const char *canon_charset;
+};
+
+static inline void
+definitions_init (definitions_ty *definitions, const char *canon_charset)
+{
+  definitions->lists = message_list_list_alloc ();
+  message_list_list_append (definitions->lists, NULL);
+  if (compendiums != NULL)
+    message_list_list_append_list (definitions->lists, compendiums);
+  definitions->curr_findex = NULL;
+  gl_lock_init (definitions->curr_findex_init_lock);
+  definitions->comp_findex = NULL;
+  gl_lock_init (definitions->comp_findex_init_lock);
+  definitions->canon_charset = canon_charset;
+}
+
+/* Return the current list of non-compendium messages.  */
+static inline message_list_ty *
+definitions_current_list (const definitions_ty *definitions)
+{
+  return definitions->lists->item[0];
+}
+
+/* Set the current list of non-compendium messages.  */
+static inline void
+definitions_set_current_list (definitions_ty *definitions, message_list_ty *mlp)
+{
+  definitions->lists->item[0] = mlp;
+  if (definitions->curr_findex != NULL)
+    {
+      message_fuzzy_index_free (definitions->curr_findex);
+      definitions->curr_findex = NULL;
+    }
+}
+
+/* Create the fuzzy index for the current list of non-compendium messages.
+   Used only if use_fuzzy_matching is true.  */
+static inline void
+definitions_init_curr_findex (definitions_ty *definitions)
+{
+  /* Protect against concurrent execution.  */
+  gl_lock_lock (definitions->curr_findex_init_lock);
+  if (definitions->curr_findex == NULL)
+    definitions->curr_findex =
+      message_fuzzy_index_alloc (definitions_current_list (definitions),
+                                 definitions->canon_charset);
+  gl_lock_unlock (definitions->curr_findex_init_lock);
+}
+
+/* Create the fuzzy index for the compendium messages.
+   Used only if use_fuzzy_matching is true and compendiums != NULL.  */
+static inline void
+definitions_init_comp_findex (definitions_ty *definitions)
+{
+  /* Protect against concurrent execution.  */
+  gl_lock_lock (definitions->comp_findex_init_lock);
+  if (definitions->comp_findex == NULL)
+    {
+      /* Combine all the compendium message lists into a single one.  Don't
+         bother checking for duplicates.  */
+      message_list_ty *all_compendium;
+      size_t i;
+
+      all_compendium = message_list_alloc (false);
+      for (i = 0; i < compendiums->nitems; i++)
+        {
+          message_list_ty *mlp = compendiums->item[i];
+          size_t j;
+
+          for (j = 0; j < mlp->nitems; j++)
+            message_list_append (all_compendium, mlp->item[j]);
+        }
+
+      /* Create the fuzzy index from it.  */
+      definitions->comp_findex =
+        message_fuzzy_index_alloc (all_compendium, definitions->canon_charset);
+    }
+  gl_lock_unlock (definitions->comp_findex_init_lock);
+}
+
+/* Exact search.  */
+static inline message_ty *
+definitions_search (const definitions_ty *definitions,
+                    const char *msgctxt, const char *msgid)
+{
+  return message_list_list_search (definitions->lists, msgctxt, msgid);
+}
+
+/* Fuzzy search.
+   Used only if use_fuzzy_matching is true.  */
+static inline message_ty *
+definitions_search_fuzzy (definitions_ty *definitions,
+                          const char *msgctxt, const char *msgid)
+{
+  message_ty *mp1;
+
+  if (false)
+    {
+      /* Old, slow code.  */
+      mp1 =
+        message_list_search_fuzzy (definitions_current_list (definitions),
+                                   msgctxt, msgid);
+    }
+  else
+    {
+      /* Speedup through early abort in fstrcmp(), combined with pre-sorting
+         of the messages through a hashed index.  */
+      /* Create the fuzzy index lazily.  */
+      if (definitions->curr_findex == NULL)
+        definitions_init_curr_findex (definitions);
+      mp1 = message_fuzzy_index_search (definitions->curr_findex,
+                                        msgctxt, msgid,
+                                        FUZZY_THRESHOLD, false);
+    }
+
+  if (compendiums != NULL)
+    {
+      double lower_bound_for_mp2;
+      message_ty *mp2;
+
+      lower_bound_for_mp2 =
+        (mp1 != NULL
+         ? fuzzy_search_goal_function (mp1, msgctxt, msgid, 0.0)
+         : FUZZY_THRESHOLD);
+      /* This lower bound must be >= FUZZY_THRESHOLD.  */
+      if (!(lower_bound_for_mp2 >= FUZZY_THRESHOLD))
+        abort ();
+
+      /* Create the fuzzy index lazily.  */
+      if (definitions->comp_findex == NULL)
+        definitions_init_comp_findex (definitions);
+
+      mp2 = message_fuzzy_index_search (definitions->comp_findex,
+                                        msgctxt, msgid,
+                                        lower_bound_for_mp2, true);
+
+      /* Choose the best among mp1, mp2.  */
+      if (mp1 == NULL
+          || (mp2 != NULL
+              && (fuzzy_search_goal_function (mp2, msgctxt, msgid,
+                                              lower_bound_for_mp2)
+                  > lower_bound_for_mp2)))
+        mp1 = mp2;
+    }
+
+  return mp1;
+}
+
+static inline void
+definitions_destroy (definitions_ty *definitions)
+{
+  message_list_list_free (definitions->lists, 2);
+  if (definitions->curr_findex != NULL)
+    message_fuzzy_index_free (definitions->curr_findex);
+  if (definitions->comp_findex != NULL)
+    message_fuzzy_index_free (definitions->comp_findex);
+}
+
+
+/* A silent error logger.  We are only interested in knowing whether errors
+   occurred at all.  */
+static void
+silent_error_logger (void *data, const char *format, ...)
+#if defined __GNUC__ && ((__GNUC__ == 2 && __GNUC_MINOR__ >= 7) || __GNUC__ > 2)
+     __attribute__ ((__format__ (__printf__, 2, 3)))
+#endif
+;
+static void
+silent_error_logger (void *data, const char *format, ...)
+{
+}
+
+
+/* Another silent error logger.  */
+static void
+silent_xerror (int severity,
+               const struct message_ty *message,
+               const char *filename, size_t lineno, size_t column,
+               int multiline_p, const char *message_text)
+{
+}
+static void
+silent_xerror2 (int severity,
+                const struct message_ty *message1,
+                const char *filename1, size_t lineno1, size_t column1,
+                int multiline_p1, const char *message_text1,
+                const struct message_ty *message2,
+                const char *filename2, size_t lineno2, size_t column2,
+                int multiline_p2, const char *message_text2)
+{
+}
+
+static message_ty *
+message_merge (message_ty *def, message_ty *ref, bool force_fuzzy,
+               const struct plural_distribution *distribution)
+{
+  const char *msgstr;
+  size_t msgstr_len;
+  const char *prev_msgctxt;
+  const char *prev_msgid;
+  const char *prev_msgid_plural;
+  message_ty *result;
+  size_t j;
+
+  /* Take the msgid from the reference.  When fuzzy matches are made,
+     the definition will not be unique, but the reference will be -
+     usually because it has only been slightly changed.  */
+
+  /* Take the msgstr from the definition.  The msgstr of the reference
+     is usually empty, as it was generated by xgettext.  If we currently
+     process the header entry we have to merge the msgstr by using the
+     Report-Msgid-Bugs-To and POT-Creation-Date fields from the reference.  */
+  if (is_header (ref))
+    {
+      /* Oh, oh.  The header entry and we have something to fill in.  */
+      static const struct
+      {
+        const char *name;
+        size_t len;
+      } known_fields[] =
+      {
+        { "Project-Id-Version:", sizeof ("Project-Id-Version:") - 1 },
+#define PROJECT_ID              0
+        { "Report-Msgid-Bugs-To:", sizeof ("Report-Msgid-Bugs-To:") - 1 },
+#define REPORT_MSGID_BUGS_TO    1
+        { "POT-Creation-Date:", sizeof ("POT-Creation-Date:") - 1 },
+#define POT_CREATION_DATE       2
+        { "PO-Revision-Date:", sizeof ("PO-Revision-Date:") - 1 },
+#define PO_REVISION_DATE        3
+        { "Last-Translator:", sizeof ("Last-Translator:") - 1 },
+#define LAST_TRANSLATOR         4
+        { "Language-Team:", sizeof ("Language-Team:") - 1 },
+#define LANGUAGE_TEAM           5
+        { "Language:", sizeof ("Language:") - 1 },
+#define LANGUAGE                6
+        { "MIME-Version:", sizeof ("MIME-Version:") - 1 },
+#define MIME_VERSION            7
+        { "Content-Type:", sizeof ("Content-Type:") - 1 },
+#define CONTENT_TYPE            8
+        { "Content-Transfer-Encoding:",
+          sizeof ("Content-Transfer-Encoding:") - 1 }
+#define CONTENT_TRANSFER        9
+      };
+#define UNKNOWN 10
+      struct
+      {
+        const char *string;
+        size_t len;
+      } header_fields[UNKNOWN + 1];
+      struct obstack pool;
+      const char *cp;
+      char *newp;
+
+      /* Clear all fields.  */
+      memset (header_fields, '\0', sizeof (header_fields));
+
+      /* Prepare a temporary memory pool.  */
+      obstack_init (&pool);
+
+      cp = def->msgstr;
+      while (*cp != '\0')
+        {
+          const char *endp = strchr (cp, '\n');
+          int terminated = endp != NULL;
+          size_t len;
+          size_t cnt;
+
+          if (!terminated)
+            {
+              /* Add a trailing newline.  */
+              char *copy;
+              endp = strchr (cp, '\0');
+
+              len = endp - cp + 1;
+
+              copy = (char *) obstack_alloc (&pool, len + 1);
+              stpcpy (stpcpy (copy, cp), "\n");
+              cp = copy;
+            }
+          else
+            {
+              len = (endp - cp) + 1;
+              ++endp;
+            }
+
+          /* Compare with any of the known fields.  */
+          for (cnt = 0;
+               cnt < sizeof (known_fields) / sizeof (known_fields[0]);
+               ++cnt)
+            if (c_strncasecmp (cp, known_fields[cnt].name, known_fields[cnt].len)
+                == 0)
+              break;
+
+          if (cnt < sizeof (known_fields) / sizeof (known_fields[0]))
+            {
+              header_fields[cnt].string = &cp[known_fields[cnt].len];
+              header_fields[cnt].len = len - known_fields[cnt].len;
+            }
+          else
+            {
+              /* It's an unknown field.  Append content to what is already
+                 known.  */
+              char *extended =
+                (char *) obstack_alloc (&pool,
+                                        header_fields[UNKNOWN].len + len + 1);
+              if (header_fields[UNKNOWN].string)
+                memcpy (extended, header_fields[UNKNOWN].string,
+                        header_fields[UNKNOWN].len);
+              memcpy (&extended[header_fields[UNKNOWN].len], cp, len);
+              extended[header_fields[UNKNOWN].len + len] = '\0';
+              header_fields[UNKNOWN].string = extended;
+              header_fields[UNKNOWN].len += len;
+            }
+
+          cp = endp;
+        }
+
+      /* Set the Language field if specified on the command line.  */
+      if (catalogname != NULL)
+        {
+          /* Prepend a space and append a newline.  */
+          size_t len = strlen (catalogname);
+          char *copy = (char *) obstack_alloc (&pool, 1 + len + 1 + 1);
+          stpcpy (stpcpy (stpcpy (copy, " "), catalogname), "\n");
+          header_fields[LANGUAGE].string = copy;
+          header_fields[LANGUAGE].len = strlen (header_fields[LANGUAGE].string);
+        }
+      /* Add a Language field to PO files that don't have one.  The Language
+         field was introduced in gettext-0.18.  */
+      else if (header_fields[LANGUAGE].string == NULL)
+        {
+          const char *language_team_ptr = header_fields[LANGUAGE_TEAM].string;
+
+          if (language_team_ptr != NULL)
+            {
+              size_t language_team_len = header_fields[LANGUAGE_TEAM].len;
+
+              /* Trim leading blanks.  */
+              while (language_team_len > 0
+                     && (*language_team_ptr == ' '
+                         || *language_team_ptr == '\t'))
+                {
+                  language_team_ptr++;
+                  language_team_len--;
+                }
+
+              /* Trim trailing blanks.  */
+              while (language_team_len > 0
+                     && (language_team_ptr[language_team_len - 1] == ' '
+                         || language_team_ptr[language_team_len - 1] == '\t'))
+                language_team_len--;
+
+              /* Trim last word, if it looks like an URL or email address.  */
+              {
+                size_t i;
+
+                for (i = language_team_len; i > 0; i--)
+                  if (language_team_ptr[i - 1] == ' '
+                      || language_team_ptr[i - 1] == '\t')
+                    break;
+                /* The last word: language_team_ptr[i..language_team_len-1].  */
+                if (i < language_team_len
+                    && (language_team_ptr[i] == '<'
+                        || language_team_ptr[language_team_len - 1] == '>'
+                        || memchr (language_team_ptr, '@', language_team_len)
+                           != NULL
+                        || memchr (language_team_ptr, '/', language_team_len)
+                           != NULL))
+                  {
+                    /* Trim last word and blanks before it.  */
+                    while (i > 0
+                           && (language_team_ptr[i - 1] == ' '
+                               || language_team_ptr[i - 1] == '\t'))
+                      i--;
+                    language_team_len = i;
+                  }
+              }
+
+              /* The rest of the Language-Team field should be the english name
+                 of the language.  Convert to ISO 639 and ISO 3166 syntax.  */
+              {
+                size_t i;
+
+                for (i = 0; i < language_variant_table_size; i++)
+                  if (strlen (language_variant_table[i].english)
+                      == language_team_len
+                      && memcmp (language_variant_table[i].english,
+                                 language_team_ptr, language_team_len) == 0)
+                    {
+                      header_fields[LANGUAGE].string =
+                        language_variant_table[i].code;
+                      break;
+                    }
+              }
+              if (header_fields[LANGUAGE].string == NULL)
+                {
+                  size_t i;
+
+                  for (i = 0; i < language_table_size; i++)
+                    if (strlen (language_table[i].english) == language_team_len
+                        && memcmp (language_table[i].english,
+                                   language_team_ptr, language_team_len) == 0)
+                      {
+                        header_fields[LANGUAGE].string = language_table[i].code;
+                        break;
+                      }
+                }
+              if (header_fields[LANGUAGE].string != NULL)
+                {
+                  /* Prepend a space and append a newline.  */
+                  const char *str = header_fields[LANGUAGE].string;
+                  size_t len = strlen (str);
+                  char *copy = (char *) obstack_alloc (&pool, 1 + len + 1 + 1);
+                  stpcpy (stpcpy (stpcpy (copy, " "), str), "\n");
+                  header_fields[LANGUAGE].string = copy;
+                }
+              else
+                header_fields[LANGUAGE].string = " \n";
+              header_fields[LANGUAGE].len =
+                strlen (header_fields[LANGUAGE].string);
+            }
+        }
+
+      {
+        const char *msgid_bugs_ptr;
+
+        msgid_bugs_ptr = c_strstr (ref->msgstr, "Report-Msgid-Bugs-To:");
+        if (msgid_bugs_ptr != NULL)
+          {
+            size_t msgid_bugs_len;
+            const char *endp;
+
+            msgid_bugs_ptr += sizeof ("Report-Msgid-Bugs-To:") - 1;
+
+            endp = strchr (msgid_bugs_ptr, '\n');
+            if (endp == NULL)
+              {
+                /* Add a trailing newline.  */
+                char *extended;
+                endp = strchr (msgid_bugs_ptr, '\0');
+                msgid_bugs_len = (endp - msgid_bugs_ptr) + 1;
+                extended = (char *) obstack_alloc (&pool, msgid_bugs_len + 1);
+                stpcpy (stpcpy (extended, msgid_bugs_ptr), "\n");
+                msgid_bugs_ptr = extended;
+              }
+            else
+              msgid_bugs_len = (endp - msgid_bugs_ptr) + 1;
+
+            header_fields[REPORT_MSGID_BUGS_TO].string = msgid_bugs_ptr;
+            header_fields[REPORT_MSGID_BUGS_TO].len = msgid_bugs_len;
+          }
+      }
+
+      {
+        const char *pot_date_ptr;
+
+        pot_date_ptr = c_strstr (ref->msgstr, "POT-Creation-Date:");
+        if (pot_date_ptr != NULL)
+          {
+            size_t pot_date_len;
+            const char *endp;
+
+            pot_date_ptr += sizeof ("POT-Creation-Date:") - 1;
+
+            endp = strchr (pot_date_ptr, '\n');
+            if (endp == NULL)
+              {
+                /* Add a trailing newline.  */
+                char *extended;
+                endp = strchr (pot_date_ptr, '\0');
+                pot_date_len = (endp - pot_date_ptr) + 1;
+                extended = (char *) obstack_alloc (&pool, pot_date_len + 1);
+                stpcpy (stpcpy (extended, pot_date_ptr), "\n");
+                pot_date_ptr = extended;
+              }
+            else
+              pot_date_len = (endp - pot_date_ptr) + 1;
+
+            header_fields[POT_CREATION_DATE].string = pot_date_ptr;
+            header_fields[POT_CREATION_DATE].len = pot_date_len;
+          }
+      }
+
+      /* Concatenate all the various fields.  */
+      {
+        size_t len;
+        size_t cnt;
+
+        len = 0;
+        for (cnt = 0; cnt < UNKNOWN; ++cnt)
+          if (header_fields[cnt].string != NULL)
+            len += known_fields[cnt].len + header_fields[cnt].len;
+        len += header_fields[UNKNOWN].len;
+
+        cp = newp = XNMALLOC (len + 1, char);
+        newp[len] = '\0';
+      }
+
+#define IF_FILLED(idx)                                                        \
+      if (header_fields[idx].string)                                          \
+        newp = stpncpy (stpcpy (newp, known_fields[idx].name),                \
+                        header_fields[idx].string, header_fields[idx].len)
+
+      IF_FILLED (PROJECT_ID);
+      IF_FILLED (REPORT_MSGID_BUGS_TO);
+      IF_FILLED (POT_CREATION_DATE);
+      IF_FILLED (PO_REVISION_DATE);
+      IF_FILLED (LAST_TRANSLATOR);
+      IF_FILLED (LANGUAGE_TEAM);
+      IF_FILLED (LANGUAGE);
+      IF_FILLED (MIME_VERSION);
+      IF_FILLED (CONTENT_TYPE);
+      IF_FILLED (CONTENT_TRANSFER);
+      if (header_fields[UNKNOWN].string != NULL)
+        stpcpy (newp, header_fields[UNKNOWN].string);
+
+#undef IF_FILLED
+
+      /* Free the temporary memory pool.  */
+      obstack_free (&pool, NULL);
+
+      msgstr = cp;
+      msgstr_len = strlen (cp) + 1;
+
+      prev_msgctxt = NULL;
+      prev_msgid = NULL;
+      prev_msgid_plural = NULL;
+    }
+  else
+    {
+      msgstr = def->msgstr;
+      msgstr_len = def->msgstr_len;
+
+      if (def->is_fuzzy)
+        {
+          prev_msgctxt = def->prev_msgctxt;
+          prev_msgid = def->prev_msgid;
+          prev_msgid_plural = def->prev_msgid_plural;
+        }
+      else
+        {
+          prev_msgctxt = def->msgctxt;
+          prev_msgid = def->msgid;
+          prev_msgid_plural = def->msgid_plural;
+        }
+    }
+
+  result = message_alloc (ref->msgctxt != NULL ? xstrdup (ref->msgctxt) : NULL,
+                          xstrdup (ref->msgid), ref->msgid_plural,
+                          msgstr, msgstr_len, &def->pos);
+
+  /* Take the comments from the definition file.  There will be none at
+     all in the reference file, as it was generated by xgettext.  */
+  if (def->comment)
+    for (j = 0; j < def->comment->nitems; ++j)
+      message_comment_append (result, def->comment->item[j]);
+
+  /* Take the dot comments from the reference file, as they are
+     generated by xgettext.  Any in the definition file are old ones
+     collected by previous runs of xgettext and msgmerge.  */
+  if (ref->comment_dot)
+    for (j = 0; j < ref->comment_dot->nitems; ++j)
+      message_comment_dot_append (result, ref->comment_dot->item[j]);
+
+  /* The flags are mixed in a special way.  Some informations come
+     from the reference message (such as format/no-format), others
+     come from the definition file (fuzzy or not).  */
+  result->is_fuzzy = def->is_fuzzy | force_fuzzy;
+
+  /* If ref and def have the same msgid but different msgid_plural, it's
+     a reason to mark the result fuzzy.  */
+  if (!result->is_fuzzy
+      && (ref->msgid_plural != NULL
+          ? def->msgid_plural == NULL
+            || strcmp (ref->msgid_plural, def->msgid_plural) != 0
+          : def->msgid_plural != NULL))
+    result->is_fuzzy = true;
+
+  {
+    size_t i;
+
+    for (i = 0; i < NFORMATS; i++)
+      {
+        result->is_format[i] = ref->is_format[i];
+
+        /* If the reference message is marked as being a format specifier,
+           but the definition message is not, we check if the resulting
+           message would pass "msgfmt -c".  If yes, then all is fine.  If
+           not, we add a fuzzy marker, because
+           1. the message needs the translator's attention,
+           2. msgmerge must not transform a PO file which passes "msgfmt -c"
+              into a PO file which doesn't.  */
+        if (!result->is_fuzzy
+            && possible_format_p (ref->is_format[i])
+            && !possible_format_p (def->is_format[i])
+            && check_msgid_msgstr_format_i (ref->msgid, ref->msgid_plural,
+                                            msgstr, msgstr_len, i, ref->range,
+                                            distribution,
+                                            silent_error_logger, NULL)
+               > 0)
+          result->is_fuzzy = true;
+      }
+  }
+
+  result->range = ref->range;
+  /* If the definition message was assuming a certain range, but the reference
+     message does not specify a range any more or specifies a range that is
+     not the same or a subset, we add a fuzzy marker, because
+       1. the message needs the translator's attention,
+       2. msgmerge must not transform a PO file which passes "msgfmt -c"
+          into a PO file which doesn't.  */
+  if (!result->is_fuzzy
+      && has_range_p (def->range)
+      && !(has_range_p (ref->range)
+           && ref->range.min >= def->range.min
+           && ref->range.max <= def->range.max))
+    result->is_fuzzy = true;
+
+  result->do_wrap = ref->do_wrap;
+
+  /* Insert previous msgid, commented out with "#|".
+     Do so only when --previous is specified, for backward compatibility.
+     Since the "previous msgid" represents the original msgid that led to
+     the current msgstr,
+       - we can omit it if the resulting message is not fuzzy or is
+         untranslated (but do this in a later pass, since result->is_fuzzy
+         is not finalized at this point),
+       - otherwise, if the corresponding message from the definition file
+         was translated (not fuzzy), we use that message's msgid,
+       - otherwise, we use that message's prev_msgid.  */
+  if (keep_previous)
+    {
+      result->prev_msgctxt = prev_msgctxt;
+      result->prev_msgid = prev_msgid;
+      result->prev_msgid_plural = prev_msgid_plural;
+    }
+
+  /* If the reference message was obsolete, make the resulting message
+     obsolete.  This case doesn't occur for POT files, but users sometimes
+     use PO files that are themselves the result of msgmerge instead of POT
+     files.  */
+  result->obsolete = ref->obsolete;
+
+  /* Take the file position comments from the reference file, as they
+     are generated by xgettext.  Any in the definition file are old ones
+     collected by previous runs of xgettext and msgmerge.  */
+  for (j = 0; j < ref->filepos_count; ++j)
+    {
+      lex_pos_ty *pp = &ref->filepos[j];
+      message_comment_filepos (result, pp->file_name, pp->line_number);
+    }
+
+  /* Special postprocessing is needed if the reference message is a
+     plural form and the definition message isn't, or vice versa.  */
+  if (ref->msgid_plural != NULL)
+    {
+      if (def->msgid_plural == NULL)
+        result->used = 1;
+    }
+  else
+    {
+      if (def->msgid_plural != NULL)
+        result->used = 2;
+    }
+
+  /* All done, return the merged message to the caller.  */
+  return result;
+}
+
+
+#define DOT_FREQUENCY 10
+
+static void
+match_domain (const char *definitions_file_name,
+              const char *references_file_name,
+              definitions_ty *definitions, message_list_ty *refmlp,
+              message_list_ty *resultmlp,
+              struct statistics *stats, unsigned int *processed)
+{
+  {
+    unsigned long int nplurals;
+    char *untranslated_plural_msgstr;
+    struct plural_distribution distribution;
+    struct search_result { message_ty *found; bool fuzzy; } *search_results;
+    size_t j;
+
+    {
+      message_ty *header_entry;
+      const struct expression *plural_expr;
+
+      header_entry =
+        message_list_search (definitions_current_list (definitions), NULL, "");
+      extract_plural_expression (header_entry ? header_entry->msgstr : NULL,
+                                 &plural_expr, &nplurals);
+      untranslated_plural_msgstr = XNMALLOC (nplurals, char);
+      memset (untranslated_plural_msgstr, '\0', nplurals);
+
+      /* Determine the plural distribution of the plural_expr formula.  */
+      {
+        /* Disable error output temporarily.  */
+        unsigned int error_count = 0;
+        struct xerror_handler local_xerror_handler =
+          {
+            silent_xerror,
+            silent_xerror2,
+            &error_count
+          };
+
+        if (check_plural_eval (plural_expr, nplurals, header_entry,
+                               &distribution, &local_xerror_handler) > 0)
+          {
+            distribution.expr = NULL;
+            distribution.often = NULL;
+            distribution.often_length = 0;
+            distribution.histogram = NULL;
+          }
+      }
+    }
+
+    /* Most of the time is spent in definitions_search_fuzzy.
+       Perform it in a separate loop that can be parallelized by an OpenMP
+       capable compiler.  */
+    search_results = XNMALLOC (refmlp->nitems, struct search_result);
+    {
+      long int nn = refmlp->nitems;
+      long int jj;
+
+      /* Tell the OpenMP capable compiler to distribute this loop across
+         several threads.  The schedule is dynamic, because for some messages
+         the loop body can be executed very quickly, whereas for others it takes
+         a long time.
+         Note: The Sun Workshop 6.2 C compiler does not allow a space between
+         '#' and 'pragma'.  */
+      #ifdef _OPENMP
+       #pragma omp parallel for schedule(dynamic)
+      #endif
+      for (jj = 0; jj < nn; jj++)
+        {
+          message_ty *refmsg = refmlp->item[jj];
+          message_ty *defmsg;
+
+          /* Because merging can take a while we print something to signal
+             we are not dead.  */
+          if (!quiet && verbosity_level <= 1 && *processed % DOT_FREQUENCY == 0)
+            fputc ('.', stderr);
+          #ifdef _OPENMP
+           #pragma omp atomic
+          #endif
+          (*processed)++;
+
+          /* See if it is in the other file.  */
+          defmsg =
+            definitions_search (definitions, refmsg->msgctxt, refmsg->msgid);
+          if (defmsg != NULL)
+            {
+              search_results[jj].found = defmsg;
+              search_results[jj].fuzzy = false;
+            }
+          else if (!is_header (refmsg)
+                   /* If the message was not defined at all, try to find a very
+                      similar message, it could be a typo, or the suggestion may
+                      help.  */
+                   && use_fuzzy_matching
+                   && ((defmsg =
+                          definitions_search_fuzzy (definitions,
+                                                    refmsg->msgctxt,
+                                                    refmsg->msgid)) != NULL))
+            {
+              search_results[jj].found = defmsg;
+              search_results[jj].fuzzy = true;
+            }
+          else
+            search_results[jj].found = NULL;
+        }
+    }
+
+    for (j = 0; j < refmlp->nitems; j++)
+      {
+        message_ty *refmsg = refmlp->item[j];
+
+        /* See if it is in the other file.
+           This used definitions_search.  */
+        if (search_results[j].found != NULL && !search_results[j].fuzzy)
+          {
+            message_ty *defmsg = search_results[j].found;
+            /* Merge the reference with the definition: take the #. and
+               #: comments from the reference, take the # comments from
+               the definition, take the msgstr from the definition.  Add
+               this merged entry to the output message list.  */
+            message_ty *mp =
+              message_merge (defmsg, refmsg, false, &distribution);
+
+            /* When producing output for msgfmt, omit messages that are
+               untranslated or fuzzy (except the header entry).  */
+            if (!(for_msgfmt
+                  && (mp->msgstr[0] == '\0' /* untranslated? */
+                      || (mp->is_fuzzy && !is_header (mp))))) /* fuzzy? */
+              {
+                message_list_append (resultmlp, mp);
+
+                /* Remember that this message has been used, when we scan
+                   later to see if anything was omitted.  */
+                defmsg->used = 1;
+              }
+
+            stats->merged++;
+          }
+        else if (!is_header (refmsg))
+          {
+            /* If the message was not defined at all, try to find a very
+               similar message, it could be a typo, or the suggestion may
+               help.  This search assumed use_fuzzy_matching and used
+               definitions_search_fuzzy.  */
+            if (search_results[j].found != NULL && search_results[j].fuzzy)
+              {
+                message_ty *defmsg = search_results[j].found;
+                message_ty *mp;
+
+                if (verbosity_level > 1)
+                  po_xerror2 (PO_SEVERITY_ERROR,
+                              refmsg, NULL, 0, 0, false,
+                              _("this message is used but not defined"),
+                              defmsg, NULL, 0, 0, false,
+                              _("but this definition is similar"));
+
+                /* Merge the reference with the definition: take the #. and
+                   #: comments from the reference, take the # comments from
+                   the definition, take the msgstr from the definition.  Add
+                   this merged entry to the output message list.  */
+                mp = message_merge (defmsg, refmsg, true, &distribution);
+
+                message_list_append (resultmlp, mp);
+
+                /* Remember that this message has been used, when we scan
+                   later to see if anything was omitted.  */
+                defmsg->used = 1;
+
+                stats->fuzzied++;
+                if (!quiet && verbosity_level <= 1)
+                  /* Always print a dot if we handled a fuzzy match.  */
+                  fputc ('.', stderr);
+              }
+            else
+              {
+                message_ty *mp;
+                bool is_untranslated;
+                const char *p;
+                const char *pend;
+
+                if (verbosity_level > 1)
+                  po_xerror (PO_SEVERITY_ERROR, refmsg, NULL, 0, 0, false,
+                             xasprintf (
+                               _("this message is used but not defined in %s"),
+                               definitions_file_name));
+
+                mp = message_copy (refmsg);
+
+                /* Test if mp is untranslated.  (It most likely is.)  */
+                is_untranslated = true;
+                for (p = mp->msgstr, pend = p + mp->msgstr_len; p < pend; p++)
+                  if (*p != '\0')
+                    {
+                      is_untranslated = false;
+                      break;
+                    }
+
+                if (mp->msgid_plural != NULL && is_untranslated)
+                  {
+                    /* Change mp->msgstr_len consecutive empty strings into
+                       nplurals consecutive empty strings.  */
+                    if (nplurals > mp->msgstr_len)
+                      mp->msgstr = untranslated_plural_msgstr;
+                    mp->msgstr_len = nplurals;
+                  }
+
+                /* When producing output for msgfmt, omit messages that are
+                   untranslated or fuzzy (except the header entry).  */
+                if (!(for_msgfmt && (is_untranslated || mp->is_fuzzy)))
+                  {
+                    message_list_append (resultmlp, mp);
+                  }
+
+                stats->missing++;
+              }
+          }
+      }
+
+    free (search_results);
+  }
+
+  /* Now postprocess the problematic merges.  This is needed because we
+     want the result to pass the "msgfmt -c -v" check.  */
+  {
+    /* message_merge sets mp->used to 1 or 2, depending on the problem.
+       Compute the bitwise OR of all these.  */
+    int problematic = 0;
+    size_t j;
+
+    for (j = 0; j < resultmlp->nitems; j++)
+      problematic |= resultmlp->item[j]->used;
+
+    if (problematic)
+      {
+        unsigned long int nplurals = 0;
+
+        if (problematic & 1)
+          {
+            /* Need to know nplurals of the result domain.  */
+            message_ty *header_entry =
+              message_list_search (resultmlp, NULL, "");
+
+            nplurals = get_plural_count (header_entry
+                                         ? header_entry->msgstr
+                                         : NULL);
+          }
+
+        for (j = 0; j < resultmlp->nitems; j++)
+          {
+            message_ty *mp = resultmlp->item[j];
+
+            if ((mp->used & 1) && (nplurals > 0))
+              {
+                /* ref->msgid_plural != NULL but def->msgid_plural == NULL.
+                   Use a copy of def->msgstr for each possible plural form.  */
+                size_t new_msgstr_len;
+                char *new_msgstr;
+                char *p;
+                unsigned long i;
+
+                if (verbosity_level > 1)
+                  po_xerror (PO_SEVERITY_ERROR, mp, NULL, 0, 0, false,
+                             _("this message should define plural forms"));
+
+                new_msgstr_len = nplurals * mp->msgstr_len;
+                new_msgstr = XNMALLOC (new_msgstr_len, char);
+                for (i = 0, p = new_msgstr; i < nplurals; i++)
+                  {
+                    memcpy (p, mp->msgstr, mp->msgstr_len);
+                    p += mp->msgstr_len;
+                  }
+                mp->msgstr = new_msgstr;
+                mp->msgstr_len = new_msgstr_len;
+                mp->is_fuzzy = true;
+              }
+
+            if ((mp->used & 2) && (mp->msgstr_len > strlen (mp->msgstr) + 1))
+              {
+                /* ref->msgid_plural == NULL but def->msgid_plural != NULL.
+                   Use only the first among the plural forms.  */
+
+                if (verbosity_level > 1)
+                  po_xerror (PO_SEVERITY_ERROR, mp, NULL, 0, 0, false,
+                             _("this message should not define plural forms"));
+
+                mp->msgstr_len = strlen (mp->msgstr) + 1;
+                mp->is_fuzzy = true;
+              }
+
+            /* Postprocessing of this message is done.  */
+            mp->used = 0;
+          }
+      }
+  }
+
+  /* Now that mp->is_fuzzy is finalized for all messages, remove the
+     "previous msgid" information from all messages that are not fuzzy or
+     are untranslated.  */
+  {
+    size_t j;
+
+    for (j = 0; j < resultmlp->nitems; j++)
+      {
+        message_ty *mp = resultmlp->item[j];
+
+        if (!mp->is_fuzzy || mp->msgstr[0] == '\0')
+          {
+            mp->prev_msgctxt = NULL;
+            mp->prev_msgid = NULL;
+            mp->prev_msgid_plural = NULL;
+          }
+      }
+  }
+}
+
+msgdomain_list_ty *
+merge (const char *definitions_file_name, const char *references_file_name,
+       catalog_input_format_ty input_syntax,
+       msgdomain_list_ty **defp)
+{
+  msgdomain_list_ty *def;
+  msgdomain_list_ty *ref;
+  size_t j, k;
+  unsigned int processed;
+  struct statistics stats;
+  msgdomain_list_ty *result;
+  const char *def_canon_charset;
+  definitions_ty definitions;
+  message_list_ty *empty_list;
+
+  stats.merged = stats.fuzzied = stats.missing = stats.obsolete = 0;
+
+  /* This is the definitions file, created by a human.  */
+  def = read_catalog_file (definitions_file_name, input_syntax);
+
+  /* This is the references file, created by groping the sources with
+     the xgettext program.  */
+  ref = read_catalog_file (references_file_name, input_syntax);
+  check_pot_charset (ref, references_file_name);
+  /* Add a dummy header entry, if the references file contains none.  */
+  for (k = 0; k < ref->nitems; k++)
+    if (message_list_search (ref->item[k]->messages, NULL, "") == NULL)
+      {
+        static lex_pos_ty pos = { __FILE__, __LINE__ };
+        message_ty *refheader = message_alloc (NULL, "", NULL, "", 1, &pos);
+
+        message_list_prepend (ref->item[k]->messages, refheader);
+      }
+
+  /* The references file can be either in ASCII or in UTF-8.  If it is
+     in UTF-8, we have to convert the definitions and the compendiums to
+     UTF-8 as well.  */
+  {
+    bool was_utf8 = false;
+    for (k = 0; k < ref->nitems; k++)
+      {
+        message_list_ty *mlp = ref->item[k]->messages;
+
+        for (j = 0; j < mlp->nitems; j++)
+          if (is_header (mlp->item[j]) && !mlp->item[j]->obsolete)
+            {
+              const char *header = mlp->item[j]->msgstr;
+
+              if (header != NULL)
+                {
+                  const char *charsetstr = c_strstr (header, "charset=");
+
+                  if (charsetstr != NULL)
+                    {
+                      size_t len;
+
+                      charsetstr += strlen ("charset=");
+                      len = strcspn (charsetstr, " \t\n");
+                      if (len == strlen ("UTF-8")
+                          && c_strncasecmp (charsetstr, "UTF-8", len) == 0)
+                        was_utf8 = true;
+                    }
+                }
+            }
+        }
+    if (was_utf8)
+      {
+        def = iconv_msgdomain_list (def, po_charset_utf8, true,
+                                    definitions_file_name,
+                                    textmode_xerror_handler);
+        if (compendiums != NULL)
+          for (k = 0; k < compendiums->nitems; k++)
+            iconv_message_list (compendiums->item[k], NULL, po_charset_utf8,
+                                compendium_filenames->item[k],
+                                textmode_xerror_handler);
+      }
+    else if (compendiums != NULL && compendiums->nitems > 0)
+      {
+        /* Ensure that the definitions and the compendiums are in the same
+           encoding.  Prefer the encoding of the definitions file, if
+           possible; otherwise, if the definitions file is empty and the
+           compendiums are all in the same encoding, use that encoding;
+           otherwise, use UTF-8.  */
+        bool conversion_done = false;
+        {
+          char *charset = NULL;
+
+          /* Get the encoding of the definitions file.  */
+          for (k = 0; k < def->nitems; k++)
+            {
+              message_list_ty *mlp = def->item[k]->messages;
+
+              for (j = 0; j < mlp->nitems; j++)
+                if (is_header (mlp->item[j]) && !mlp->item[j]->obsolete)
+                  {
+                    const char *header = mlp->item[j]->msgstr;
+
+                    if (header != NULL)
+                      {
+                        const char *charsetstr = c_strstr (header, "charset=");
+
+                        if (charsetstr != NULL)
+                          {
+                            size_t len;
+
+                            charsetstr += strlen ("charset=");
+                            len = strcspn (charsetstr, " \t\n");
+                            charset = (char *) xmalloca (len + 1);
+                            memcpy (charset, charsetstr, len);
+                            charset[len] = '\0';
+                            break;
+                          }
+                      }
+                  }
+              if (charset != NULL)
+                break;
+            }
+          if (charset != NULL)
+            {
+              const char *canon_charset = po_charset_canonicalize (charset);
+
+              if (canon_charset != NULL)
+                {
+                  bool all_compendiums_iconvable = true;
+
+                  if (compendiums != NULL)
+                    for (k = 0; k < compendiums->nitems; k++)
+                      if (!is_message_list_iconvable (compendiums->item[k],
+                                                      NULL, canon_charset))
+                        {
+                          all_compendiums_iconvable = false;
+                          break;
+                        }
+
+                  if (all_compendiums_iconvable)
+                    {
+                      /* Convert the compendiums to def's encoding.  */
+                      if (compendiums != NULL)
+                        for (k = 0; k < compendiums->nitems; k++)
+                          iconv_message_list (compendiums->item[k],
+                                              NULL, canon_charset,
+                                              compendium_filenames->item[k],
+                                              textmode_xerror_handler);
+                      conversion_done = true;
+                    }
+                }
+              freea (charset);
+            }
+        }
+        if (!conversion_done)
+          {
+            if (def->nitems == 0
+                || (def->nitems == 1 && def->item[0]->messages->nitems == 0))
+              {
+                /* The definitions file is empty.
+                   Compare the encodings of the compendiums.  */
+                const char *common_canon_charset = NULL;
+
+                for (k = 0; k < compendiums->nitems; k++)
+                  {
+                    message_list_ty *mlp = compendiums->item[k];
+                    char *charset = NULL;
+                    const char *canon_charset = NULL;
+
+                    for (j = 0; j < mlp->nitems; j++)
+                      if (is_header (mlp->item[j]) && !mlp->item[j]->obsolete)
+                        {
+                          const char *header = mlp->item[j]->msgstr;
+
+                          if (header != NULL)
+                            {
+                              const char *charsetstr =
+                                c_strstr (header, "charset=");
+
+                              if (charsetstr != NULL)
+                                {
+                                  size_t len;
+
+                                  charsetstr += strlen ("charset=");
+                                  len = strcspn (charsetstr, " \t\n");
+                                  charset = (char *) xmalloca (len + 1);
+                                  memcpy (charset, charsetstr, len);
+                                  charset[len] = '\0';
+
+                                  break;
+                                }
+                            }
+                        }
+                    if (charset != NULL)
+                      {
+                        canon_charset = po_charset_canonicalize (charset);
+                        freea (charset);
+                      }
+                    /* If no charset declaration was found in this file,
+                       or if it is not a valid encoding name, or if it
+                       differs from the common charset found so far,
+                       we have no common charset.  */
+                    if (canon_charset == NULL
+                        || (common_canon_charset != NULL
+                            && canon_charset != common_canon_charset))
+                      {
+                        common_canon_charset = NULL;
+                        break;
+                      }
+                    common_canon_charset = canon_charset;
+                  }
+
+                if (common_canon_charset != NULL)
+                  /* No conversion needed in this case.  */
+                  conversion_done = true;
+              }
+            if (!conversion_done)
+              {
+                /* It's too hairy to find out what would be the optimal target
+                   encoding.  So, convert everything to UTF-8.  */
+                def = iconv_msgdomain_list (def, po_charset_utf8, true,
+                                            definitions_file_name,
+                                            textmode_xerror_handler);
+                if (compendiums != NULL)
+                  for (k = 0; k < compendiums->nitems; k++)
+                    iconv_message_list (compendiums->item[k],
+                                        NULL, po_charset_utf8,
+                                        compendium_filenames->item[k],
+                                        textmode_xerror_handler);
+              }
+          }
+      }
+  }
+
+  /* Determine canonicalized encoding name of the definitions now, after
+     conversion.  Only used for fuzzy matching.  */
+  if (use_fuzzy_matching)
+    {
+      def_canon_charset = def->encoding;
+      if (def_canon_charset == NULL)
+        {
+          char *charset = NULL;
+
+          /* Get the encoding of the definitions file.  */
+          for (k = 0; k < def->nitems; k++)
+            {
+              message_list_ty *mlp = def->item[k]->messages;
+
+              for (j = 0; j < mlp->nitems; j++)
+                if (is_header (mlp->item[j]) && !mlp->item[j]->obsolete)
+                  {
+                    const char *header = mlp->item[j]->msgstr;
+
+                    if (header != NULL)
+                      {
+                        const char *charsetstr = c_strstr (header, "charset=");
+
+                        if (charsetstr != NULL)
+                          {
+                            size_t len;
+
+                            charsetstr += strlen ("charset=");
+                            len = strcspn (charsetstr, " \t\n");
+                            charset = (char *) xmalloca (len + 1);
+                            memcpy (charset, charsetstr, len);
+                            charset[len] = '\0';
+                            break;
+                          }
+                      }
+                  }
+              if (charset != NULL)
+                break;
+            }
+          if (charset != NULL)
+            def_canon_charset = po_charset_canonicalize (charset);
+          if (def_canon_charset == NULL)
+            /* Unspecified encoding.  Assume unibyte encoding.  */
+            def_canon_charset = po_charset_ascii;
+        }
+    }
+  else
+    def_canon_charset = NULL;
+
+  /* Initialize and preprocess the total set of message definitions.  */
+  definitions_init (&definitions, def_canon_charset);
+  empty_list = message_list_alloc (false);
+
+  result = msgdomain_list_alloc (false);
+  processed = 0;
+
+  /* Every reference must be matched with its definition. */
+  if (!multi_domain_mode)
+    for (k = 0; k < ref->nitems; k++)
+      {
+        const char *domain = ref->item[k]->domain;
+        message_list_ty *refmlp = ref->item[k]->messages;
+        message_list_ty *resultmlp =
+          msgdomain_list_sublist (result, domain, true);
+        message_list_ty *defmlp;
+
+        defmlp = msgdomain_list_sublist (def, domain, false);
+        if (defmlp == NULL)
+          defmlp = empty_list;
+        definitions_set_current_list (&definitions, defmlp);
+
+        match_domain (definitions_file_name, references_file_name,
+                      &definitions, refmlp, resultmlp,
+                      &stats, &processed);
+      }
+  else
+    {
+      /* Apply the references messages in the default domain to each of
+         the definition domains.  */
+      message_list_ty *refmlp = ref->item[0]->messages;
+
+      for (k = 0; k < def->nitems; k++)
+        {
+          const char *domain = def->item[k]->domain;
+          message_list_ty *defmlp = def->item[k]->messages;
+
+          /* Ignore the default message domain if it has no messages.  */
+          if (k > 0 || defmlp->nitems > 0)
+            {
+              message_list_ty *resultmlp =
+                msgdomain_list_sublist (result, domain, true);
+
+              definitions_set_current_list (&definitions, defmlp);
+
+              match_domain (definitions_file_name, references_file_name,
+                            &definitions, refmlp, resultmlp,
+                            &stats, &processed);
+            }
+        }
+    }
+
+  definitions_destroy (&definitions);
+
+  if (!for_msgfmt)
+    {
+      /* Look for messages in the definition file, which are not present
+         in the reference file, indicating messages which defined but not
+         used in the program.  Don't scan the compendium(s).  */
+      for (k = 0; k < def->nitems; ++k)
+        {
+          const char *domain = def->item[k]->domain;
+          message_list_ty *defmlp = def->item[k]->messages;
+
+          for (j = 0; j < defmlp->nitems; j++)
+            {
+              message_ty *defmsg = defmlp->item[j];
+
+              if (!defmsg->used)
+                {
+                  /* Remember the old translation although it is not used anymore.
+                     But we mark it as obsolete.  */
+                  message_ty *mp;
+
+                  mp = message_copy (defmsg);
+                  /* Clear the extracted comments.  */
+                  if (mp->comment_dot != NULL)
+                    {
+                      string_list_free (mp->comment_dot);
+                      mp->comment_dot = NULL;
+                    }
+                  /* Clear the file position comments.  */
+                  if (mp->filepos != NULL)
+                    {
+                      size_t i;
+
+                      for (i = 0; i < mp->filepos_count; i++)
+                        free ((char *) mp->filepos[i].file_name);
+                      mp->filepos_count = 0;
+                      free (mp->filepos);
+                      mp->filepos = NULL;
+                    }
+                  /* Mark as obsolete.   */
+                  mp->obsolete = true;
+
+                  message_list_append (msgdomain_list_sublist (result, domain, true),
+                                       mp);
+                  stats.obsolete++;
+                }
+            }
+        }
+    }
+
+  /* Determine the known a-priori encoding, if any.  */
+  if (def->encoding == ref->encoding)
+    result->encoding = def->encoding;
+
+  /* Report some statistics.  */
+  if (verbosity_level > 0)
+    fprintf (stderr, _("%s\
+Read %ld old + %ld reference, \
+merged %ld, fuzzied %ld, missing %ld, obsolete %ld.\n"),
+             !quiet && verbosity_level <= 1 ? "\n" : "",
+             (long) def->nitems, (long) ref->nitems,
+             (long) stats.merged, (long) stats.fuzzied, (long) stats.missing,
+             (long) stats.obsolete);
+  else if (!quiet)
+    fputs (_(" done.\n"), stderr);
+
+  /* Return results.  */
+  *defp = def;
+  return result;
+}
diff --git a/gettext-tools/src/msgl-merge.h b/gettext-tools/src/msgl-merge.h

new file mode 100644 (file)

index 0000000..566fca7
--- /dev/null
+++ b/gettext-tools/src/msgl-merge.h
@@ -0,0 +1,76 @@
+/* Merging a .po file with a .pot file.
+   Copyright (C) 1995-2025 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
+
+/* Written by Peter Miller, Ulrich Drepper, and Bruno Haible.  */
+
+#ifndef _MSGL_MERGE_H
+#define _MSGL_MERGE_H 1
+
+#include <stdbool.h>
+
+#include "message.h"
+#include "read-catalog-abstract.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/* If true do not print unneeded messages.  */
+extern bool quiet;
+
+/* Verbosity level.  */
+extern int verbosity_level;
+
+/* Apply the .pot file to each of the domains in the PO file.  */
+extern bool multi_domain_mode;
+
+/* Produce output for msgfmt, not for a translator.
+   msgfmt ignores
+     - untranslated messages,
+     - fuzzy messages, except the header entry,
+     - obsolete messages.
+   Therefore output for msgfmt does not need to include such messages.  */
+extern bool for_msgfmt;
+
+/* Determines whether to use fuzzy matching.  */
+extern bool use_fuzzy_matching;
+
+/* Determines whether to keep old msgids as previous msgids.  */
+extern bool keep_previous;
+
+/* Language (ISO-639 code) and optional territory (ISO-3166 code).  */
+extern const char *catalogname;
+
+/* List of user-specified compendiums.  */
+extern message_list_list_ty *compendiums;
+
+/* List of corresponding filenames.  */
+extern string_list_ty *compendium_filenames;
+
+
+extern msgdomain_list_ty *
+       merge (const char *definitions_file_name,
+              const char *references_file_name,
+              catalog_input_format_ty input_syntax,
+              msgdomain_list_ty **defp);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _MSGL_MERGE_H */
diff --git a/gettext-tools/src/msgmerge.c b/gettext-tools/src/msgmerge.c

index 6ebc528beb140e1a66435c2130d5dc9d4cc392c8..37dc06eda34ca11fd5e63554ce33db313e59506b 100644 (file)
--- a/gettext-tools/src/msgmerge.c
+++ b/gettext-tools/src/msgmerge.c
@@ -16,7 +16,6 @@
     along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
  
  #include <config.h>
-#include <alloca.h>
  
  #include <limits.h>
  #include <stdbool.h>
@@ -48,25 +47,10 @@
  #include "write-po.h"
  #include "write-properties.h"
  #include "write-stringtable.h"
-#include "msgl-charset.h"
-#include "format.h"
  #include "xalloc.h"
-#include "xmalloca.h"
-#include "obstack.h"
-#include "c-strstr.h"
-#include "c-strcase.h"
-#include "po-charset.h"
-#include "msgl-iconv.h"
  #include "msgl-equal.h"
-#include "msgl-fsearch.h"
-#include "glthread/lock.h"
-#include "lang-table.h"
-#include "plural-exp.h"
-#include "plural-count.h"
-#include "msgl-check.h"
-#include "po-xerror.h"
+#include "msgl-merge.h"
  #include "xerror-handler.h"
-#include "xvasprintf.h"
  #include "backupfile.h"
  #include "copy-file.h"
  #include "propername.h"
@@ -74,67 +58,20 @@
  
  #define _(str) gettext (str)
  
-#define obstack_chunk_alloc xmalloc
-#define obstack_chunk_free free
-
-
-/* If true do not print unneeded messages.  */
-static bool quiet;
-
-/* Verbosity level.  */
-static int verbosity_level;
  
  /* Force output of PO file even if empty.  */
  static int force_po;
  
-/* Apply the .pot file to each of the domains in the PO file.  */
-static bool multi_domain_mode = false;
-
-/* Produce output for msgfmt, not for a translator.
-   msgfmt ignores
-     - untranslated messages,
-     - fuzzy messages, except the header entry,
-     - obsolete messages.
-   Therefore output for msgfmt does not need to include such messages.  */
-static bool for_msgfmt = false;
-
-/* Determines whether to use fuzzy matching.  */
-static bool use_fuzzy_matching = true;
-
-/* Determines whether to keep old msgids as previous msgids.  */
-static bool keep_previous = false;
-
-/* Language (ISO-639 code) and optional territory (ISO-3166 code).  */
-static const char *catalogname = NULL;
-
-/* List of user-specified compendiums.  */
-static message_list_list_ty *compendiums;
-
-/* List of corresponding filenames.  */
-static string_list_ty *compendium_filenames;
-
  /* Update mode.  */
  static bool update_mode = false;
  static const char *version_control_string;
  static const char *backup_suffix_string;
  
  
-struct statistics
-{
-  size_t merged;
-  size_t fuzzied;
-  size_t missing;
-  size_t obsolete;
-};
-
-
  /* Forward declaration of local functions.  */
  _GL_NORETURN_FUNC static void usage (int status);
  static void compendium (const char *filename);
  static void msgdomain_list_stablesort_by_obsolete (msgdomain_list_ty *mdlp);
-static msgdomain_list_ty *merge (const char *fn1, const char *fn2,
-                                 catalog_input_format_ty input_syntax,
-                                 msgdomain_list_ty **defp);
  
  
  int
@@ -756,1414 +693,3 @@ msgdomain_list_stablesort_by_obsolete (msgdomain_list_ty *mdlp)
          }
      }
  }
-
-
-/* Data structure representing the messages with known translations.
-   They are composed of
-     - A message list from def.po,
-     - The compendiums.
-   The data structure is optimized for exact and fuzzy searches.  */
-typedef struct definitions_ty definitions_ty;
-struct definitions_ty
-{
-  /* A list of message lists.  The first comes from def.po, the other ones
-     from the compendiums.  Each message list has a built-in hash table,
-     for speed when doing the exact searches.  */
-  message_list_list_ty *lists;
-
-  /* A fuzzy index of the current list of non-compendium messages, for speed
-     when doing fuzzy searches.  Used only if use_fuzzy_matching is true.  */
-  message_fuzzy_index_ty *curr_findex;
-  /* A once-only execution guard for the initialization of the fuzzy index.
-     Needed for OpenMP.  */
-  gl_lock_define(, curr_findex_init_lock)
-
-  /* A fuzzy index of the compendiums, for speed when doing fuzzy searches.
-     Used only if use_fuzzy_matching is true and compendiums != NULL.  */
-  message_fuzzy_index_ty *comp_findex;
-  /* A once-only execution guard for the initialization of the fuzzy index.
-     Needed for OpenMP.  */
-  gl_lock_define(, comp_findex_init_lock)
-
-  /* The canonical encoding of the definitions and the compendiums.
-     Only used for fuzzy matching.  */
-  const char *canon_charset;
-};
-
-static inline void
-definitions_init (definitions_ty *definitions, const char *canon_charset)
-{
-  definitions->lists = message_list_list_alloc ();
-  message_list_list_append (definitions->lists, NULL);
-  if (compendiums != NULL)
-    message_list_list_append_list (definitions->lists, compendiums);
-  definitions->curr_findex = NULL;
-  gl_lock_init (definitions->curr_findex_init_lock);
-  definitions->comp_findex = NULL;
-  gl_lock_init (definitions->comp_findex_init_lock);
-  definitions->canon_charset = canon_charset;
-}
-
-/* Return the current list of non-compendium messages.  */
-static inline message_list_ty *
-definitions_current_list (const definitions_ty *definitions)
-{
-  return definitions->lists->item[0];
-}
-
-/* Set the current list of non-compendium messages.  */
-static inline void
-definitions_set_current_list (definitions_ty *definitions, message_list_ty *mlp)
-{
-  definitions->lists->item[0] = mlp;
-  if (definitions->curr_findex != NULL)
-    {
-      message_fuzzy_index_free (definitions->curr_findex);
-      definitions->curr_findex = NULL;
-    }
-}
-
-/* Create the fuzzy index for the current list of non-compendium messages.
-   Used only if use_fuzzy_matching is true.  */
-static inline void
-definitions_init_curr_findex (definitions_ty *definitions)
-{
-  /* Protect against concurrent execution.  */
-  gl_lock_lock (definitions->curr_findex_init_lock);
-  if (definitions->curr_findex == NULL)
-    definitions->curr_findex =
-      message_fuzzy_index_alloc (definitions_current_list (definitions),
-                                 definitions->canon_charset);
-  gl_lock_unlock (definitions->curr_findex_init_lock);
-}
-
-/* Create the fuzzy index for the compendium messages.
-   Used only if use_fuzzy_matching is true and compendiums != NULL.  */
-static inline void
-definitions_init_comp_findex (definitions_ty *definitions)
-{
-  /* Protect against concurrent execution.  */
-  gl_lock_lock (definitions->comp_findex_init_lock);
-  if (definitions->comp_findex == NULL)
-    {
-      /* Combine all the compendium message lists into a single one.  Don't
-         bother checking for duplicates.  */
-      message_list_ty *all_compendium;
-      size_t i;
-
-      all_compendium = message_list_alloc (false);
-      for (i = 0; i < compendiums->nitems; i++)
-        {
-          message_list_ty *mlp = compendiums->item[i];
-          size_t j;
-
-          for (j = 0; j < mlp->nitems; j++)
-            message_list_append (all_compendium, mlp->item[j]);
-        }
-
-      /* Create the fuzzy index from it.  */
-      definitions->comp_findex =
-        message_fuzzy_index_alloc (all_compendium, definitions->canon_charset);
-    }
-  gl_lock_unlock (definitions->comp_findex_init_lock);
-}
-
-/* Exact search.  */
-static inline message_ty *
-definitions_search (const definitions_ty *definitions,
-                    const char *msgctxt, const char *msgid)
-{
-  return message_list_list_search (definitions->lists, msgctxt, msgid);
-}
-
-/* Fuzzy search.
-   Used only if use_fuzzy_matching is true.  */
-static inline message_ty *
-definitions_search_fuzzy (definitions_ty *definitions,
-                          const char *msgctxt, const char *msgid)
-{
-  message_ty *mp1;
-
-  if (false)
-    {
-      /* Old, slow code.  */
-      mp1 =
-        message_list_search_fuzzy (definitions_current_list (definitions),
-                                   msgctxt, msgid);
-    }
-  else
-    {
-      /* Speedup through early abort in fstrcmp(), combined with pre-sorting
-         of the messages through a hashed index.  */
-      /* Create the fuzzy index lazily.  */
-      if (definitions->curr_findex == NULL)
-        definitions_init_curr_findex (definitions);
-      mp1 = message_fuzzy_index_search (definitions->curr_findex,
-                                        msgctxt, msgid,
-                                        FUZZY_THRESHOLD, false);
-    }
-
-  if (compendiums != NULL)
-    {
-      double lower_bound_for_mp2;
-      message_ty *mp2;
-
-      lower_bound_for_mp2 =
-        (mp1 != NULL
-         ? fuzzy_search_goal_function (mp1, msgctxt, msgid, 0.0)
-         : FUZZY_THRESHOLD);
-      /* This lower bound must be >= FUZZY_THRESHOLD.  */
-      if (!(lower_bound_for_mp2 >= FUZZY_THRESHOLD))
-        abort ();
-
-      /* Create the fuzzy index lazily.  */
-      if (definitions->comp_findex == NULL)
-        definitions_init_comp_findex (definitions);
-
-      mp2 = message_fuzzy_index_search (definitions->comp_findex,
-                                        msgctxt, msgid,
-                                        lower_bound_for_mp2, true);
-
-      /* Choose the best among mp1, mp2.  */
-      if (mp1 == NULL
-          || (mp2 != NULL
-              && (fuzzy_search_goal_function (mp2, msgctxt, msgid,
-                                              lower_bound_for_mp2)
-                  > lower_bound_for_mp2)))
-        mp1 = mp2;
-    }
-
-  return mp1;
-}
-
-static inline void
-definitions_destroy (definitions_ty *definitions)
-{
-  message_list_list_free (definitions->lists, 2);
-  if (definitions->curr_findex != NULL)
-    message_fuzzy_index_free (definitions->curr_findex);
-  if (definitions->comp_findex != NULL)
-    message_fuzzy_index_free (definitions->comp_findex);
-}
-
-
-/* A silent error logger.  We are only interested in knowing whether errors
-   occurred at all.  */
-static void
-silent_error_logger (void *data, const char *format, ...)
-#if defined __GNUC__ && ((__GNUC__ == 2 && __GNUC_MINOR__ >= 7) || __GNUC__ > 2)
-     __attribute__ ((__format__ (__printf__, 2, 3)))
-#endif
-;
-static void
-silent_error_logger (void *data, const char *format, ...)
-{
-}
-
-
-/* Another silent error logger.  */
-static void
-silent_xerror (int severity,
-               const struct message_ty *message,
-               const char *filename, size_t lineno, size_t column,
-               int multiline_p, const char *message_text)
-{
-}
-static void
-silent_xerror2 (int severity,
-                const struct message_ty *message1,
-                const char *filename1, size_t lineno1, size_t column1,
-                int multiline_p1, const char *message_text1,
-                const struct message_ty *message2,
-                const char *filename2, size_t lineno2, size_t column2,
-                int multiline_p2, const char *message_text2)
-{
-}
-
-static message_ty *
-message_merge (message_ty *def, message_ty *ref, bool force_fuzzy,
-               const struct plural_distribution *distribution)
-{
-  const char *msgstr;
-  size_t msgstr_len;
-  const char *prev_msgctxt;
-  const char *prev_msgid;
-  const char *prev_msgid_plural;
-  message_ty *result;
-  size_t j;
-
-  /* Take the msgid from the reference.  When fuzzy matches are made,
-     the definition will not be unique, but the reference will be -
-     usually because it has only been slightly changed.  */
-
-  /* Take the msgstr from the definition.  The msgstr of the reference
-     is usually empty, as it was generated by xgettext.  If we currently
-     process the header entry we have to merge the msgstr by using the
-     Report-Msgid-Bugs-To and POT-Creation-Date fields from the reference.  */
-  if (is_header (ref))
-    {
-      /* Oh, oh.  The header entry and we have something to fill in.  */
-      static const struct
-      {
-        const char *name;
-        size_t len;
-      } known_fields[] =
-      {
-        { "Project-Id-Version:", sizeof ("Project-Id-Version:") - 1 },
-#define PROJECT_ID              0
-        { "Report-Msgid-Bugs-To:", sizeof ("Report-Msgid-Bugs-To:") - 1 },
-#define REPORT_MSGID_BUGS_TO    1
-        { "POT-Creation-Date:", sizeof ("POT-Creation-Date:") - 1 },
-#define POT_CREATION_DATE       2
-        { "PO-Revision-Date:", sizeof ("PO-Revision-Date:") - 1 },
-#define PO_REVISION_DATE        3
-        { "Last-Translator:", sizeof ("Last-Translator:") - 1 },
-#define LAST_TRANSLATOR         4
-        { "Language-Team:", sizeof ("Language-Team:") - 1 },
-#define LANGUAGE_TEAM           5
-        { "Language:", sizeof ("Language:") - 1 },
-#define LANGUAGE                6
-        { "MIME-Version:", sizeof ("MIME-Version:") - 1 },
-#define MIME_VERSION            7
-        { "Content-Type:", sizeof ("Content-Type:") - 1 },
-#define CONTENT_TYPE            8
-        { "Content-Transfer-Encoding:",
-          sizeof ("Content-Transfer-Encoding:") - 1 }
-#define CONTENT_TRANSFER        9
-      };
-#define UNKNOWN 10
-      struct
-      {
-        const char *string;
-        size_t len;
-      } header_fields[UNKNOWN + 1];
-      struct obstack pool;
-      const char *cp;
-      char *newp;
-
-      /* Clear all fields.  */
-      memset (header_fields, '\0', sizeof (header_fields));
-
-      /* Prepare a temporary memory pool.  */
-      obstack_init (&pool);
-
-      cp = def->msgstr;
-      while (*cp != '\0')
-        {
-          const char *endp = strchr (cp, '\n');
-          int terminated = endp != NULL;
-          size_t len;
-          size_t cnt;
-
-          if (!terminated)
-            {
-              /* Add a trailing newline.  */
-              char *copy;
-              endp = strchr (cp, '\0');
-
-              len = endp - cp + 1;
-
-              copy = (char *) obstack_alloc (&pool, len + 1);
-              stpcpy (stpcpy (copy, cp), "\n");
-              cp = copy;
-            }
-          else
-            {
-              len = (endp - cp) + 1;
-              ++endp;
-            }
-
-          /* Compare with any of the known fields.  */
-          for (cnt = 0;
-               cnt < sizeof (known_fields) / sizeof (known_fields[0]);
-               ++cnt)
-            if (c_strncasecmp (cp, known_fields[cnt].name, known_fields[cnt].len)
-                == 0)
-              break;
-
-          if (cnt < sizeof (known_fields) / sizeof (known_fields[0]))
-            {
-              header_fields[cnt].string = &cp[known_fields[cnt].len];
-              header_fields[cnt].len = len - known_fields[cnt].len;
-            }
-          else
-            {
-              /* It's an unknown field.  Append content to what is already
-                 known.  */
-              char *extended =
-                (char *) obstack_alloc (&pool,
-                                        header_fields[UNKNOWN].len + len + 1);
-              if (header_fields[UNKNOWN].string)
-                memcpy (extended, header_fields[UNKNOWN].string,
-                        header_fields[UNKNOWN].len);
-              memcpy (&extended[header_fields[UNKNOWN].len], cp, len);
-              extended[header_fields[UNKNOWN].len + len] = '\0';
-              header_fields[UNKNOWN].string = extended;
-              header_fields[UNKNOWN].len += len;
-            }
-
-          cp = endp;
-        }
-
-      /* Set the Language field if specified on the command line.  */
-      if (catalogname != NULL)
-        {
-          /* Prepend a space and append a newline.  */
-          size_t len = strlen (catalogname);
-          char *copy = (char *) obstack_alloc (&pool, 1 + len + 1 + 1);
-          stpcpy (stpcpy (stpcpy (copy, " "), catalogname), "\n");
-          header_fields[LANGUAGE].string = copy;
-          header_fields[LANGUAGE].len = strlen (header_fields[LANGUAGE].string);
-        }
-      /* Add a Language field to PO files that don't have one.  The Language
-         field was introduced in gettext-0.18.  */
-      else if (header_fields[LANGUAGE].string == NULL)
-        {
-          const char *language_team_ptr = header_fields[LANGUAGE_TEAM].string;
-
-          if (language_team_ptr != NULL)
-            {
-              size_t language_team_len = header_fields[LANGUAGE_TEAM].len;
-
-              /* Trim leading blanks.  */
-              while (language_team_len > 0
-                     && (*language_team_ptr == ' '
-                         || *language_team_ptr == '\t'))
-                {
-                  language_team_ptr++;
-                  language_team_len--;
-                }
-
-              /* Trim trailing blanks.  */
-              while (language_team_len > 0
-                     && (language_team_ptr[language_team_len - 1] == ' '
-                         || language_team_ptr[language_team_len - 1] == '\t'))
-                language_team_len--;
-
-              /* Trim last word, if it looks like an URL or email address.  */
-              {
-                size_t i;
-
-                for (i = language_team_len; i > 0; i--)
-                  if (language_team_ptr[i - 1] == ' '
-                      || language_team_ptr[i - 1] == '\t')
-                    break;
-                /* The last word: language_team_ptr[i..language_team_len-1].  */
-                if (i < language_team_len
-                    && (language_team_ptr[i] == '<'
-                        || language_team_ptr[language_team_len - 1] == '>'
-                        || memchr (language_team_ptr, '@', language_team_len)
-                           != NULL
-                        || memchr (language_team_ptr, '/', language_team_len)
-                           != NULL))
-                  {
-                    /* Trim last word and blanks before it.  */
-                    while (i > 0
-                           && (language_team_ptr[i - 1] == ' '
-                               || language_team_ptr[i - 1] == '\t'))
-                      i--;
-                    language_team_len = i;
-                  }
-              }
-
-              /* The rest of the Language-Team field should be the english name
-                 of the language.  Convert to ISO 639 and ISO 3166 syntax.  */
-              {
-                size_t i;
-
-                for (i = 0; i < language_variant_table_size; i++)
-                  if (strlen (language_variant_table[i].english)
-                      == language_team_len
-                      && memcmp (language_variant_table[i].english,
-                                 language_team_ptr, language_team_len) == 0)
-                    {
-                      header_fields[LANGUAGE].string =
-                        language_variant_table[i].code;
-                      break;
-                    }
-              }
-              if (header_fields[LANGUAGE].string == NULL)
-                {
-                  size_t i;
-
-                  for (i = 0; i < language_table_size; i++)
-                    if (strlen (language_table[i].english) == language_team_len
-                        && memcmp (language_table[i].english,
-                                   language_team_ptr, language_team_len) == 0)
-                      {
-                        header_fields[LANGUAGE].string = language_table[i].code;
-                        break;
-                      }
-                }
-              if (header_fields[LANGUAGE].string != NULL)
-                {
-                  /* Prepend a space and append a newline.  */
-                  const char *str = header_fields[LANGUAGE].string;
-                  size_t len = strlen (str);
-                  char *copy = (char *) obstack_alloc (&pool, 1 + len + 1 + 1);
-                  stpcpy (stpcpy (stpcpy (copy, " "), str), "\n");
-                  header_fields[LANGUAGE].string = copy;
-                }
-              else
-                header_fields[LANGUAGE].string = " \n";
-              header_fields[LANGUAGE].len =
-                strlen (header_fields[LANGUAGE].string);
-            }
-        }
-
-      {
-        const char *msgid_bugs_ptr;
-
-        msgid_bugs_ptr = c_strstr (ref->msgstr, "Report-Msgid-Bugs-To:");
-        if (msgid_bugs_ptr != NULL)
-          {
-            size_t msgid_bugs_len;
-            const char *endp;
-
-            msgid_bugs_ptr += sizeof ("Report-Msgid-Bugs-To:") - 1;
-
-            endp = strchr (msgid_bugs_ptr, '\n');
-            if (endp == NULL)
-              {
-                /* Add a trailing newline.  */
-                char *extended;
-                endp = strchr (msgid_bugs_ptr, '\0');
-                msgid_bugs_len = (endp - msgid_bugs_ptr) + 1;
-                extended = (char *) obstack_alloc (&pool, msgid_bugs_len + 1);
-                stpcpy (stpcpy (extended, msgid_bugs_ptr), "\n");
-                msgid_bugs_ptr = extended;
-              }
-            else
-              msgid_bugs_len = (endp - msgid_bugs_ptr) + 1;
-
-            header_fields[REPORT_MSGID_BUGS_TO].string = msgid_bugs_ptr;
-            header_fields[REPORT_MSGID_BUGS_TO].len = msgid_bugs_len;
-          }
-      }
-
-      {
-        const char *pot_date_ptr;
-
-        pot_date_ptr = c_strstr (ref->msgstr, "POT-Creation-Date:");
-        if (pot_date_ptr != NULL)
-          {
-            size_t pot_date_len;
-            const char *endp;
-
-            pot_date_ptr += sizeof ("POT-Creation-Date:") - 1;
-
-            endp = strchr (pot_date_ptr, '\n');
-            if (endp == NULL)
-              {
-                /* Add a trailing newline.  */
-                char *extended;
-                endp = strchr (pot_date_ptr, '\0');
-                pot_date_len = (endp - pot_date_ptr) + 1;
-                extended = (char *) obstack_alloc (&pool, pot_date_len + 1);
-                stpcpy (stpcpy (extended, pot_date_ptr), "\n");
-                pot_date_ptr = extended;
-              }
-            else
-              pot_date_len = (endp - pot_date_ptr) + 1;
-
-            header_fields[POT_CREATION_DATE].string = pot_date_ptr;
-            header_fields[POT_CREATION_DATE].len = pot_date_len;
-          }
-      }
-
-      /* Concatenate all the various fields.  */
-      {
-        size_t len;
-        size_t cnt;
-
-        len = 0;
-        for (cnt = 0; cnt < UNKNOWN; ++cnt)
-          if (header_fields[cnt].string != NULL)
-            len += known_fields[cnt].len + header_fields[cnt].len;
-        len += header_fields[UNKNOWN].len;
-
-        cp = newp = XNMALLOC (len + 1, char);
-        newp[len] = '\0';
-      }
-
-#define IF_FILLED(idx)                                                        \
-      if (header_fields[idx].string)                                          \
-        newp = stpncpy (stpcpy (newp, known_fields[idx].name),                \
-                        header_fields[idx].string, header_fields[idx].len)
-
-      IF_FILLED (PROJECT_ID);
-      IF_FILLED (REPORT_MSGID_BUGS_TO);
-      IF_FILLED (POT_CREATION_DATE);
-      IF_FILLED (PO_REVISION_DATE);
-      IF_FILLED (LAST_TRANSLATOR);
-      IF_FILLED (LANGUAGE_TEAM);
-      IF_FILLED (LANGUAGE);
-      IF_FILLED (MIME_VERSION);
-      IF_FILLED (CONTENT_TYPE);
-      IF_FILLED (CONTENT_TRANSFER);
-      if (header_fields[UNKNOWN].string != NULL)
-        stpcpy (newp, header_fields[UNKNOWN].string);
-
-#undef IF_FILLED
-
-      /* Free the temporary memory pool.  */
-      obstack_free (&pool, NULL);
-
-      msgstr = cp;
-      msgstr_len = strlen (cp) + 1;
-
-      prev_msgctxt = NULL;
-      prev_msgid = NULL;
-      prev_msgid_plural = NULL;
-    }
-  else
-    {
-      msgstr = def->msgstr;
-      msgstr_len = def->msgstr_len;
-
-      if (def->is_fuzzy)
-        {
-          prev_msgctxt = def->prev_msgctxt;
-          prev_msgid = def->prev_msgid;
-          prev_msgid_plural = def->prev_msgid_plural;
-        }
-      else
-        {
-          prev_msgctxt = def->msgctxt;
-          prev_msgid = def->msgid;
-          prev_msgid_plural = def->msgid_plural;
-        }
-    }
-
-  result = message_alloc (ref->msgctxt != NULL ? xstrdup (ref->msgctxt) : NULL,
-                          xstrdup (ref->msgid), ref->msgid_plural,
-                          msgstr, msgstr_len, &def->pos);
-
-  /* Take the comments from the definition file.  There will be none at
-     all in the reference file, as it was generated by xgettext.  */
-  if (def->comment)
-    for (j = 0; j < def->comment->nitems; ++j)
-      message_comment_append (result, def->comment->item[j]);
-
-  /* Take the dot comments from the reference file, as they are
-     generated by xgettext.  Any in the definition file are old ones
-     collected by previous runs of xgettext and msgmerge.  */
-  if (ref->comment_dot)
-    for (j = 0; j < ref->comment_dot->nitems; ++j)
-      message_comment_dot_append (result, ref->comment_dot->item[j]);
-
-  /* The flags are mixed in a special way.  Some informations come
-     from the reference message (such as format/no-format), others
-     come from the definition file (fuzzy or not).  */
-  result->is_fuzzy = def->is_fuzzy | force_fuzzy;
-
-  /* If ref and def have the same msgid but different msgid_plural, it's
-     a reason to mark the result fuzzy.  */
-  if (!result->is_fuzzy
-      && (ref->msgid_plural != NULL
-          ? def->msgid_plural == NULL
-            || strcmp (ref->msgid_plural, def->msgid_plural) != 0
-          : def->msgid_plural != NULL))
-    result->is_fuzzy = true;
-
-  {
-    size_t i;
-
-    for (i = 0; i < NFORMATS; i++)
-      {
-        result->is_format[i] = ref->is_format[i];
-
-        /* If the reference message is marked as being a format specifier,
-           but the definition message is not, we check if the resulting
-           message would pass "msgfmt -c".  If yes, then all is fine.  If
-           not, we add a fuzzy marker, because
-           1. the message needs the translator's attention,
-           2. msgmerge must not transform a PO file which passes "msgfmt -c"
-              into a PO file which doesn't.  */
-        if (!result->is_fuzzy
-            && possible_format_p (ref->is_format[i])
-            && !possible_format_p (def->is_format[i])
-            && check_msgid_msgstr_format_i (ref->msgid, ref->msgid_plural,
-                                            msgstr, msgstr_len, i, ref->range,
-                                            distribution,
-                                            silent_error_logger, NULL)
-               > 0)
-          result->is_fuzzy = true;
-      }
-  }
-
-  result->range = ref->range;
-  /* If the definition message was assuming a certain range, but the reference
-     message does not specify a range any more or specifies a range that is
-     not the same or a subset, we add a fuzzy marker, because
-       1. the message needs the translator's attention,
-       2. msgmerge must not transform a PO file which passes "msgfmt -c"
-          into a PO file which doesn't.  */
-  if (!result->is_fuzzy
-      && has_range_p (def->range)
-      && !(has_range_p (ref->range)
-           && ref->range.min >= def->range.min
-           && ref->range.max <= def->range.max))
-    result->is_fuzzy = true;
-
-  result->do_wrap = ref->do_wrap;
-
-  /* Insert previous msgid, commented out with "#|".
-     Do so only when --previous is specified, for backward compatibility.
-     Since the "previous msgid" represents the original msgid that led to
-     the current msgstr,
-       - we can omit it if the resulting message is not fuzzy or is
-         untranslated (but do this in a later pass, since result->is_fuzzy
-         is not finalized at this point),
-       - otherwise, if the corresponding message from the definition file
-         was translated (not fuzzy), we use that message's msgid,
-       - otherwise, we use that message's prev_msgid.  */
-  if (keep_previous)
-    {
-      result->prev_msgctxt = prev_msgctxt;
-      result->prev_msgid = prev_msgid;
-      result->prev_msgid_plural = prev_msgid_plural;
-    }
-
-  /* If the reference message was obsolete, make the resulting message
-     obsolete.  This case doesn't occur for POT files, but users sometimes
-     use PO files that are themselves the result of msgmerge instead of POT
-     files.  */
-  result->obsolete = ref->obsolete;
-
-  /* Take the file position comments from the reference file, as they
-     are generated by xgettext.  Any in the definition file are old ones
-     collected by previous runs of xgettext and msgmerge.  */
-  for (j = 0; j < ref->filepos_count; ++j)
-    {
-      lex_pos_ty *pp = &ref->filepos[j];
-      message_comment_filepos (result, pp->file_name, pp->line_number);
-    }
-
-  /* Special postprocessing is needed if the reference message is a
-     plural form and the definition message isn't, or vice versa.  */
-  if (ref->msgid_plural != NULL)
-    {
-      if (def->msgid_plural == NULL)
-        result->used = 1;
-    }
-  else
-    {
-      if (def->msgid_plural != NULL)
-        result->used = 2;
-    }
-
-  /* All done, return the merged message to the caller.  */
-  return result;
-}
-
-
-#define DOT_FREQUENCY 10
-
-static void
-match_domain (const char *fn1, const char *fn2,
-              definitions_ty *definitions, message_list_ty *refmlp,
-              message_list_ty *resultmlp,
-              struct statistics *stats, unsigned int *processed)
-{
-  {
-    unsigned long int nplurals;
-    char *untranslated_plural_msgstr;
-    struct plural_distribution distribution;
-    struct search_result { message_ty *found; bool fuzzy; } *search_results;
-    size_t j;
-
-    {
-      message_ty *header_entry;
-      const struct expression *plural_expr;
-
-      header_entry =
-        message_list_search (definitions_current_list (definitions), NULL, "");
-      extract_plural_expression (header_entry ? header_entry->msgstr : NULL,
-                                 &plural_expr, &nplurals);
-      untranslated_plural_msgstr = XNMALLOC (nplurals, char);
-      memset (untranslated_plural_msgstr, '\0', nplurals);
-
-      /* Determine the plural distribution of the plural_expr formula.  */
-      {
-        /* Disable error output temporarily.  */
-        unsigned int error_count = 0;
-        struct xerror_handler local_xerror_handler =
-          {
-            silent_xerror,
-            silent_xerror2,
-            &error_count
-          };
-
-        if (check_plural_eval (plural_expr, nplurals, header_entry,
-                               &distribution, &local_xerror_handler) > 0)
-          {
-            distribution.expr = NULL;
-            distribution.often = NULL;
-            distribution.often_length = 0;
-            distribution.histogram = NULL;
-          }
-      }
-    }
-
-    /* Most of the time is spent in definitions_search_fuzzy.
-       Perform it in a separate loop that can be parallelized by an OpenMP
-       capable compiler.  */
-    search_results = XNMALLOC (refmlp->nitems, struct search_result);
-    {
-      long int nn = refmlp->nitems;
-      long int jj;
-
-      /* Tell the OpenMP capable compiler to distribute this loop across
-         several threads.  The schedule is dynamic, because for some messages
-         the loop body can be executed very quickly, whereas for others it takes
-         a long time.
-         Note: The Sun Workshop 6.2 C compiler does not allow a space between
-         '#' and 'pragma'.  */
-      #ifdef _OPENMP
-       #pragma omp parallel for schedule(dynamic)
-      #endif
-      for (jj = 0; jj < nn; jj++)
-        {
-          message_ty *refmsg = refmlp->item[jj];
-          message_ty *defmsg;
-
-          /* Because merging can take a while we print something to signal
-             we are not dead.  */
-          if (!quiet && verbosity_level <= 1 && *processed % DOT_FREQUENCY == 0)
-            fputc ('.', stderr);
-          #ifdef _OPENMP
-           #pragma omp atomic
-          #endif
-          (*processed)++;
-
-          /* See if it is in the other file.  */
-          defmsg =
-            definitions_search (definitions, refmsg->msgctxt, refmsg->msgid);
-          if (defmsg != NULL)
-            {
-              search_results[jj].found = defmsg;
-              search_results[jj].fuzzy = false;
-            }
-          else if (!is_header (refmsg)
-                   /* If the message was not defined at all, try to find a very
-                      similar message, it could be a typo, or the suggestion may
-                      help.  */
-                   && use_fuzzy_matching
-                   && ((defmsg =
-                          definitions_search_fuzzy (definitions,
-                                                    refmsg->msgctxt,
-                                                    refmsg->msgid)) != NULL))
-            {
-              search_results[jj].found = defmsg;
-              search_results[jj].fuzzy = true;
-            }
-          else
-            search_results[jj].found = NULL;
-        }
-    }
-
-    for (j = 0; j < refmlp->nitems; j++)
-      {
-        message_ty *refmsg = refmlp->item[j];
-
-        /* See if it is in the other file.
-           This used definitions_search.  */
-        if (search_results[j].found != NULL && !search_results[j].fuzzy)
-          {
-            message_ty *defmsg = search_results[j].found;
-            /* Merge the reference with the definition: take the #. and
-               #: comments from the reference, take the # comments from
-               the definition, take the msgstr from the definition.  Add
-               this merged entry to the output message list.  */
-            message_ty *mp =
-              message_merge (defmsg, refmsg, false, &distribution);
-
-            /* When producing output for msgfmt, omit messages that are
-               untranslated or fuzzy (except the header entry).  */
-            if (!(for_msgfmt
-                  && (mp->msgstr[0] == '\0' /* untranslated? */
-                      || (mp->is_fuzzy && !is_header (mp))))) /* fuzzy? */
-              {
-                message_list_append (resultmlp, mp);
-
-                /* Remember that this message has been used, when we scan
-                   later to see if anything was omitted.  */
-                defmsg->used = 1;
-              }
-
-            stats->merged++;
-          }
-        else if (!is_header (refmsg))
-          {
-            /* If the message was not defined at all, try to find a very
-               similar message, it could be a typo, or the suggestion may
-               help.  This search assumed use_fuzzy_matching and used
-               definitions_search_fuzzy.  */
-            if (search_results[j].found != NULL && search_results[j].fuzzy)
-              {
-                message_ty *defmsg = search_results[j].found;
-                message_ty *mp;
-
-                if (verbosity_level > 1)
-                  po_xerror2 (PO_SEVERITY_ERROR,
-                              refmsg, NULL, 0, 0, false,
-                              _("this message is used but not defined"),
-                              defmsg, NULL, 0, 0, false,
-                              _("but this definition is similar"));
-
-                /* Merge the reference with the definition: take the #. and
-                   #: comments from the reference, take the # comments from
-                   the definition, take the msgstr from the definition.  Add
-                   this merged entry to the output message list.  */
-                mp = message_merge (defmsg, refmsg, true, &distribution);
-
-                message_list_append (resultmlp, mp);
-
-                /* Remember that this message has been used, when we scan
-                   later to see if anything was omitted.  */
-                defmsg->used = 1;
-
-                stats->fuzzied++;
-                if (!quiet && verbosity_level <= 1)
-                  /* Always print a dot if we handled a fuzzy match.  */
-                  fputc ('.', stderr);
-              }
-            else
-              {
-                message_ty *mp;
-                bool is_untranslated;
-                const char *p;
-                const char *pend;
-
-                if (verbosity_level > 1)
-                  po_xerror (PO_SEVERITY_ERROR, refmsg, NULL, 0, 0, false,
-                             xasprintf (
-                               _("this message is used but not defined in %s"),
-                               fn1));
-
-                mp = message_copy (refmsg);
-
-                /* Test if mp is untranslated.  (It most likely is.)  */
-                is_untranslated = true;
-                for (p = mp->msgstr, pend = p + mp->msgstr_len; p < pend; p++)
-                  if (*p != '\0')
-                    {
-                      is_untranslated = false;
-                      break;
-                    }
-
-                if (mp->msgid_plural != NULL && is_untranslated)
-                  {
-                    /* Change mp->msgstr_len consecutive empty strings into
-                       nplurals consecutive empty strings.  */
-                    if (nplurals > mp->msgstr_len)
-                      mp->msgstr = untranslated_plural_msgstr;
-                    mp->msgstr_len = nplurals;
-                  }
-
-                /* When producing output for msgfmt, omit messages that are
-                   untranslated or fuzzy (except the header entry).  */
-                if (!(for_msgfmt && (is_untranslated || mp->is_fuzzy)))
-                  {
-                    message_list_append (resultmlp, mp);
-                  }
-
-                stats->missing++;
-              }
-          }
-      }
-
-    free (search_results);
-  }
-
-  /* Now postprocess the problematic merges.  This is needed because we
-     want the result to pass the "msgfmt -c -v" check.  */
-  {
-    /* message_merge sets mp->used to 1 or 2, depending on the problem.
-       Compute the bitwise OR of all these.  */
-    int problematic = 0;
-    size_t j;
-
-    for (j = 0; j < resultmlp->nitems; j++)
-      problematic |= resultmlp->item[j]->used;
-
-    if (problematic)
-      {
-        unsigned long int nplurals = 0;
-
-        if (problematic & 1)
-          {
-            /* Need to know nplurals of the result domain.  */
-            message_ty *header_entry =
-              message_list_search (resultmlp, NULL, "");
-
-            nplurals = get_plural_count (header_entry
-                                         ? header_entry->msgstr
-                                         : NULL);
-          }
-
-        for (j = 0; j < resultmlp->nitems; j++)
-          {
-            message_ty *mp = resultmlp->item[j];
-
-            if ((mp->used & 1) && (nplurals > 0))
-              {
-                /* ref->msgid_plural != NULL but def->msgid_plural == NULL.
-                   Use a copy of def->msgstr for each possible plural form.  */
-                size_t new_msgstr_len;
-                char *new_msgstr;
-                char *p;
-                unsigned long i;
-
-                if (verbosity_level > 1)
-                  po_xerror (PO_SEVERITY_ERROR, mp, NULL, 0, 0, false,
-                             _("this message should define plural forms"));
-
-                new_msgstr_len = nplurals * mp->msgstr_len;
-                new_msgstr = XNMALLOC (new_msgstr_len, char);
-                for (i = 0, p = new_msgstr; i < nplurals; i++)
-                  {
-                    memcpy (p, mp->msgstr, mp->msgstr_len);
-                    p += mp->msgstr_len;
-                  }
-                mp->msgstr = new_msgstr;
-                mp->msgstr_len = new_msgstr_len;
-                mp->is_fuzzy = true;
-              }
-
-            if ((mp->used & 2) && (mp->msgstr_len > strlen (mp->msgstr) + 1))
-              {
-                /* ref->msgid_plural == NULL but def->msgid_plural != NULL.
-                   Use only the first among the plural forms.  */
-
-                if (verbosity_level > 1)
-                  po_xerror (PO_SEVERITY_ERROR, mp, NULL, 0, 0, false,
-                             _("this message should not define plural forms"));
-
-                mp->msgstr_len = strlen (mp->msgstr) + 1;
-                mp->is_fuzzy = true;
-              }
-
-            /* Postprocessing of this message is done.  */
-            mp->used = 0;
-          }
-      }
-  }
-
-  /* Now that mp->is_fuzzy is finalized for all messages, remove the
-     "previous msgid" information from all messages that are not fuzzy or
-     are untranslated.  */
-  {
-    size_t j;
-
-    for (j = 0; j < resultmlp->nitems; j++)
-      {
-        message_ty *mp = resultmlp->item[j];
-
-        if (!mp->is_fuzzy || mp->msgstr[0] == '\0')
-          {
-            mp->prev_msgctxt = NULL;
-            mp->prev_msgid = NULL;
-            mp->prev_msgid_plural = NULL;
-          }
-      }
-  }
-}
-
-static msgdomain_list_ty *
-merge (const char *fn1, const char *fn2, catalog_input_format_ty input_syntax,
-       msgdomain_list_ty **defp)
-{
-  msgdomain_list_ty *def;
-  msgdomain_list_ty *ref;
-  size_t j, k;
-  unsigned int processed;
-  struct statistics stats;
-  msgdomain_list_ty *result;
-  const char *def_canon_charset;
-  definitions_ty definitions;
-  message_list_ty *empty_list;
-
-  stats.merged = stats.fuzzied = stats.missing = stats.obsolete = 0;
-
-  /* This is the definitions file, created by a human.  */
-  def = read_catalog_file (fn1, input_syntax);
-
-  /* This is the references file, created by groping the sources with
-     the xgettext program.  */
-  ref = read_catalog_file (fn2, input_syntax);
-  check_pot_charset (ref, fn2);
-  /* Add a dummy header entry, if the references file contains none.  */
-  for (k = 0; k < ref->nitems; k++)
-    if (message_list_search (ref->item[k]->messages, NULL, "") == NULL)
-      {
-        static lex_pos_ty pos = { __FILE__, __LINE__ };
-        message_ty *refheader = message_alloc (NULL, "", NULL, "", 1, &pos);
-
-        message_list_prepend (ref->item[k]->messages, refheader);
-      }
-
-  /* The references file can be either in ASCII or in UTF-8.  If it is
-     in UTF-8, we have to convert the definitions and the compendiums to
-     UTF-8 as well.  */
-  {
-    bool was_utf8 = false;
-    for (k = 0; k < ref->nitems; k++)
-      {
-        message_list_ty *mlp = ref->item[k]->messages;
-
-        for (j = 0; j < mlp->nitems; j++)
-          if (is_header (mlp->item[j]) && !mlp->item[j]->obsolete)
-            {
-              const char *header = mlp->item[j]->msgstr;
-
-              if (header != NULL)
-                {
-                  const char *charsetstr = c_strstr (header, "charset=");
-
-                  if (charsetstr != NULL)
-                    {
-                      size_t len;
-
-                      charsetstr += strlen ("charset=");
-                      len = strcspn (charsetstr, " \t\n");
-                      if (len == strlen ("UTF-8")
-                          && c_strncasecmp (charsetstr, "UTF-8", len) == 0)
-                        was_utf8 = true;
-                    }
-                }
-            }
-        }
-    if (was_utf8)
-      {
-        def = iconv_msgdomain_list (def, po_charset_utf8, true, fn1,
-                                    textmode_xerror_handler);
-        if (compendiums != NULL)
-          for (k = 0; k < compendiums->nitems; k++)
-            iconv_message_list (compendiums->item[k], NULL, po_charset_utf8,
-                                compendium_filenames->item[k],
-                                textmode_xerror_handler);
-      }
-    else if (compendiums != NULL && compendiums->nitems > 0)
-      {
-        /* Ensure that the definitions and the compendiums are in the same
-           encoding.  Prefer the encoding of the definitions file, if
-           possible; otherwise, if the definitions file is empty and the
-           compendiums are all in the same encoding, use that encoding;
-           otherwise, use UTF-8.  */
-        bool conversion_done = false;
-        {
-          char *charset = NULL;
-
-          /* Get the encoding of the definitions file.  */
-          for (k = 0; k < def->nitems; k++)
-            {
-              message_list_ty *mlp = def->item[k]->messages;
-
-              for (j = 0; j < mlp->nitems; j++)
-                if (is_header (mlp->item[j]) && !mlp->item[j]->obsolete)
-                  {
-                    const char *header = mlp->item[j]->msgstr;
-
-                    if (header != NULL)
-                      {
-                        const char *charsetstr = c_strstr (header, "charset=");
-
-                        if (charsetstr != NULL)
-                          {
-                            size_t len;
-
-                            charsetstr += strlen ("charset=");
-                            len = strcspn (charsetstr, " \t\n");
-                            charset = (char *) xmalloca (len + 1);
-                            memcpy (charset, charsetstr, len);
-                            charset[len] = '\0';
-                            break;
-                          }
-                      }
-                  }
-              if (charset != NULL)
-                break;
-            }
-          if (charset != NULL)
-            {
-              const char *canon_charset = po_charset_canonicalize (charset);
-
-              if (canon_charset != NULL)
-                {
-                  bool all_compendiums_iconvable = true;
-
-                  if (compendiums != NULL)
-                    for (k = 0; k < compendiums->nitems; k++)
-                      if (!is_message_list_iconvable (compendiums->item[k],
-                                                      NULL, canon_charset))
-                        {
-                          all_compendiums_iconvable = false;
-                          break;
-                        }
-
-                  if (all_compendiums_iconvable)
-                    {
-                      /* Convert the compendiums to def's encoding.  */
-                      if (compendiums != NULL)
-                        for (k = 0; k < compendiums->nitems; k++)
-                          iconv_message_list (compendiums->item[k],
-                                              NULL, canon_charset,
-                                              compendium_filenames->item[k],
-                                              textmode_xerror_handler);
-                      conversion_done = true;
-                    }
-                }
-              freea (charset);
-            }
-        }
-        if (!conversion_done)
-          {
-            if (def->nitems == 0
-                || (def->nitems == 1 && def->item[0]->messages->nitems == 0))
-              {
-                /* The definitions file is empty.
-                   Compare the encodings of the compendiums.  */
-                const char *common_canon_charset = NULL;
-
-                for (k = 0; k < compendiums->nitems; k++)
-                  {
-                    message_list_ty *mlp = compendiums->item[k];
-                    char *charset = NULL;
-                    const char *canon_charset = NULL;
-
-                    for (j = 0; j < mlp->nitems; j++)
-                      if (is_header (mlp->item[j]) && !mlp->item[j]->obsolete)
-                        {
-                          const char *header = mlp->item[j]->msgstr;
-
-                          if (header != NULL)
-                            {
-                              const char *charsetstr =
-                                c_strstr (header, "charset=");
-
-                              if (charsetstr != NULL)
-                                {
-                                  size_t len;
-
-                                  charsetstr += strlen ("charset=");
-                                  len = strcspn (charsetstr, " \t\n");
-                                  charset = (char *) xmalloca (len + 1);
-                                  memcpy (charset, charsetstr, len);
-                                  charset[len] = '\0';
-
-                                  break;
-                                }
-                            }
-                        }
-                    if (charset != NULL)
-                      {
-                        canon_charset = po_charset_canonicalize (charset);
-                        freea (charset);
-                      }
-                    /* If no charset declaration was found in this file,
-                       or if it is not a valid encoding name, or if it
-                       differs from the common charset found so far,
-                       we have no common charset.  */
-                    if (canon_charset == NULL
-                        || (common_canon_charset != NULL
-                            && canon_charset != common_canon_charset))
-                      {
-                        common_canon_charset = NULL;
-                        break;
-                      }
-                    common_canon_charset = canon_charset;
-                  }
-
-                if (common_canon_charset != NULL)
-                  /* No conversion needed in this case.  */
-                  conversion_done = true;
-              }
-            if (!conversion_done)
-              {
-                /* It's too hairy to find out what would be the optimal target
-                   encoding.  So, convert everything to UTF-8.  */
-                def = iconv_msgdomain_list (def, po_charset_utf8, true, fn1,
-                                            textmode_xerror_handler);
-                if (compendiums != NULL)
-                  for (k = 0; k < compendiums->nitems; k++)
-                    iconv_message_list (compendiums->item[k],
-                                        NULL, po_charset_utf8,
-                                        compendium_filenames->item[k],
-                                        textmode_xerror_handler);
-              }
-          }
-      }
-  }
-
-  /* Determine canonicalized encoding name of the definitions now, after
-     conversion.  Only used for fuzzy matching.  */
-  if (use_fuzzy_matching)
-    {
-      def_canon_charset = def->encoding;
-      if (def_canon_charset == NULL)
-        {
-          char *charset = NULL;
-
-          /* Get the encoding of the definitions file.  */
-          for (k = 0; k < def->nitems; k++)
-            {
-              message_list_ty *mlp = def->item[k]->messages;
-
-              for (j = 0; j < mlp->nitems; j++)
-                if (is_header (mlp->item[j]) && !mlp->item[j]->obsolete)
-                  {
-                    const char *header = mlp->item[j]->msgstr;
-
-                    if (header != NULL)
-                      {
-                        const char *charsetstr = c_strstr (header, "charset=");
-
-                        if (charsetstr != NULL)
-                          {
-                            size_t len;
-
-                            charsetstr += strlen ("charset=");
-                            len = strcspn (charsetstr, " \t\n");
-                            charset = (char *) xmalloca (len + 1);
-                            memcpy (charset, charsetstr, len);
-                            charset[len] = '\0';
-                            break;
-                          }
-                      }
-                  }
-              if (charset != NULL)
-                break;
-            }
-          if (charset != NULL)
-            def_canon_charset = po_charset_canonicalize (charset);
-          if (def_canon_charset == NULL)
-            /* Unspecified encoding.  Assume unibyte encoding.  */
-            def_canon_charset = po_charset_ascii;
-        }
-    }
-  else
-    def_canon_charset = NULL;
-
-  /* Initialize and preprocess the total set of message definitions.  */
-  definitions_init (&definitions, def_canon_charset);
-  empty_list = message_list_alloc (false);
-
-  result = msgdomain_list_alloc (false);
-  processed = 0;
-
-  /* Every reference must be matched with its definition. */
-  if (!multi_domain_mode)
-    for (k = 0; k < ref->nitems; k++)
-      {
-        const char *domain = ref->item[k]->domain;
-        message_list_ty *refmlp = ref->item[k]->messages;
-        message_list_ty *resultmlp =
-          msgdomain_list_sublist (result, domain, true);
-        message_list_ty *defmlp;
-
-        defmlp = msgdomain_list_sublist (def, domain, false);
-        if (defmlp == NULL)
-          defmlp = empty_list;
-        definitions_set_current_list (&definitions, defmlp);
-
-        match_domain (fn1, fn2, &definitions, refmlp, resultmlp,
-                      &stats, &processed);
-      }
-  else
-    {
-      /* Apply the references messages in the default domain to each of
-         the definition domains.  */
-      message_list_ty *refmlp = ref->item[0]->messages;
-
-      for (k = 0; k < def->nitems; k++)
-        {
-          const char *domain = def->item[k]->domain;
-          message_list_ty *defmlp = def->item[k]->messages;
-
-          /* Ignore the default message domain if it has no messages.  */
-          if (k > 0 || defmlp->nitems > 0)
-            {
-              message_list_ty *resultmlp =
-                msgdomain_list_sublist (result, domain, true);
-
-              definitions_set_current_list (&definitions, defmlp);
-
-              match_domain (fn1, fn2, &definitions, refmlp, resultmlp,
-                            &stats, &processed);
-            }
-        }
-    }
-
-  definitions_destroy (&definitions);
-
-  if (!for_msgfmt)
-    {
-      /* Look for messages in the definition file, which are not present
-         in the reference file, indicating messages which defined but not
-         used in the program.  Don't scan the compendium(s).  */
-      for (k = 0; k < def->nitems; ++k)
-        {
-          const char *domain = def->item[k]->domain;
-          message_list_ty *defmlp = def->item[k]->messages;
-
-          for (j = 0; j < defmlp->nitems; j++)
-            {
-              message_ty *defmsg = defmlp->item[j];
-
-              if (!defmsg->used)
-                {
-                  /* Remember the old translation although it is not used anymore.
-                     But we mark it as obsolete.  */
-                  message_ty *mp;
-
-                  mp = message_copy (defmsg);
-                  /* Clear the extracted comments.  */
-                  if (mp->comment_dot != NULL)
-                    {
-                      string_list_free (mp->comment_dot);
-                      mp->comment_dot = NULL;
-                    }
-                  /* Clear the file position comments.  */
-                  if (mp->filepos != NULL)
-                    {
-                      size_t i;
-
-                      for (i = 0; i < mp->filepos_count; i++)
-                        free ((char *) mp->filepos[i].file_name);
-                      mp->filepos_count = 0;
-                      free (mp->filepos);
-                      mp->filepos = NULL;
-                    }
-                  /* Mark as obsolete.   */
-                  mp->obsolete = true;
-
-                  message_list_append (msgdomain_list_sublist (result, domain, true),
-                                       mp);
-                  stats.obsolete++;
-                }
-            }
-        }
-    }
-
-  /* Determine the known a-priori encoding, if any.  */
-  if (def->encoding == ref->encoding)
-    result->encoding = def->encoding;
-
-  /* Report some statistics.  */
-  if (verbosity_level > 0)
-    fprintf (stderr, _("%s\
-Read %ld old + %ld reference, \
-merged %ld, fuzzied %ld, missing %ld, obsolete %ld.\n"),
-             !quiet && verbosity_level <= 1 ? "\n" : "",
-             (long) def->nitems, (long) ref->nitems,
-             (long) stats.merged, (long) stats.fuzzied, (long) stats.missing,
-             (long) stats.obsolete);
-  else if (!quiet)
-    fputs (_(" done.\n"), stderr);
-
-  /* Return results.  */
-  *defp = def;
-  return result;
-}
author	Bruno Haible <bruno@clisp.org>
	Sun, 12 Oct 2025 00:12:13 +0000 (02:12 +0200)
committer	Bruno Haible <bruno@clisp.org>
	Sun, 12 Oct 2025 00:12:49 +0000 (02:12 +0200)
gettext-tools/po/POTFILES.in		patch \| blob \| blame \| history
gettext-tools/src/FILES		patch \| blob \| blame \| history
gettext-tools/src/Makefile.am		patch \| blob \| blame \| history
gettext-tools/src/msgl-merge.c	[new file with mode: 0644]	patch \| blob
gettext-tools/src/msgl-merge.h	[new file with mode: 0644]	patch \| blob
gettext-tools/src/msgmerge.c		patch \| blob \| blame \| history