]> git.ipfire.org Git - thirdparty/gettext.git/commitdiff
Fix behaviour of msgmerge when the PO file and the compendium are in different
authorBruno Haible <bruno@clisp.org>
Fri, 4 Aug 2006 14:17:06 +0000 (14:17 +0000)
committerBruno Haible <bruno@clisp.org>
Tue, 23 Jun 2009 10:13:53 +0000 (12:13 +0200)
encodings.

gettext-tools/src/ChangeLog
gettext-tools/src/msgmerge.c
gettext-tools/tests/ChangeLog
gettext-tools/tests/Makefile.am

index bfa4507040130698dffcf36dc003bf84d0535bb4..ac435a5b007114992b0f273ecb3896d956dd8af1 100644 (file)
@@ -1,3 +1,16 @@
+2006-08-01  Bruno Haible  <bruno@clisp.org>
+
+       Fix behaviour of msgmerge when the PO file and the compendium are
+       in different encodings.
+       * msgl-iconv.h (is_message_list_iconvable): New declaration.
+       * msgl-iconv.c (iconvable_string, iconvable_string_list,
+       iconvable_msgid, iconvable_msgstr, is_message_list_iconvable): New
+       functions.
+       * msgmerge.c: Include xallocsa.h.
+       (merge): In the case that the .pot file is ASCII, not UTF-8, convert
+       the definitions and the compendia contents to a common encoding.
+       Reported by Stanislav Brabec <sbrabec@suse.cz>.
+
 2006-07-30  Bruno Haible  <bruno@clisp.org>
 
        * Makefile.am (AM_CPPFLAGS, LDADD, libgettextsrc_la_LDFLAGS,
index 60f68601013202191a5670f938191362f10ad262..d183290550b3123fd39697b16ab1c54c36e79b76 100644 (file)
@@ -41,6 +41,7 @@
 #include "write-po.h"
 #include "format.h"
 #include "xalloc.h"
+#include "xallocsa.h"
 #include "obstack.h"
 #include "c-strstr.h"
 #include "exit.h"
@@ -1369,9 +1370,152 @@ merge (const char *fn1, const char *fn2, msgdomain_list_ty **defp)
            iconv_message_list (compendiums->item[k], NULL, po_charset_utf8,
                                compendium_filenames->item[k]);
       }
-    else
+    else if (compendiums != NULL && compendiums->nitems > 0)
       {
-       /* TODO: Convert all compendiums->item[k] to the same encoding.  */
+       /* Ensure that the definitions and the compendiums are in the same
+          encoding.  Prefer the encoding of the definitions file, if
+          possible; otherwise, if the definitions file is empty and the
+          compendiums are all in the same encoding, use that encoding;
+          otherwise, use UTF-8.  */
+       bool conversion_done = false;
+       {
+         char *charset = NULL;
+
+         /* Get the encoding of the definitions file.  */
+         for (k = 0; k < def->nitems; k++)
+           {
+             message_list_ty *mlp = def->item[k]->messages;
+
+             for (j = 0; j < mlp->nitems; j++)
+               if (is_header (mlp->item[j]) && !mlp->item[j]->obsolete)
+                 {
+                   const char *header = mlp->item[j]->msgstr;
+
+                   if (header != NULL)
+                     {
+                       const char *charsetstr = c_strstr (header, "charset=");
+
+                       if (charsetstr != NULL)
+                         {
+                           size_t len;
+
+                           charsetstr += strlen ("charset=");
+                           len = strcspn (charsetstr, " \t\n");
+                           charset = (char *) xallocsa (len + 1);
+                           memcpy (charset, charsetstr, len);
+                           charset[len] = '\0';
+                           break;
+                         }
+                     }
+                 }
+             if (charset != NULL)
+               break;
+           }
+         if (charset != NULL)
+           {
+             const char *canon_charset = po_charset_canonicalize (charset);
+
+             if (canon_charset != NULL)
+               {
+                 bool all_compendiums_iconvable = true;
+
+                 if (compendiums != NULL)
+                   for (k = 0; k < compendiums->nitems; k++)
+                     if (!is_message_list_iconvable (compendiums->item[k],
+                                                     NULL, canon_charset))
+                       {
+                         all_compendiums_iconvable = false;
+                         break;
+                       }
+
+                 if (all_compendiums_iconvable)
+                   {
+                     /* Convert the compendiums to def's encoding.  */
+                     if (compendiums != NULL)
+                       for (k = 0; k < compendiums->nitems; k++)
+                         iconv_message_list (compendiums->item[k],
+                                             NULL, canon_charset,
+                                             compendium_filenames->item[k]);
+                     conversion_done = true;
+                   }
+               }
+             freesa (charset);
+           }
+       }
+       if (!conversion_done)
+         {
+           if (def->nitems == 0
+               || (def->nitems == 1 && def->item[0]->messages->nitems == 0))
+             {
+               /* The definitions file is empty.
+                  Compare the encodings of the compendiums.  */
+               const char *common_canon_charset = NULL;
+
+               for (k = 0; k < compendiums->nitems; k++)
+                 {
+                   message_list_ty *mlp = compendiums->item[k];
+                   char *charset = NULL;
+                   const char *canon_charset = NULL;
+
+                   for (j = 0; j < mlp->nitems; j++)
+                     if (is_header (mlp->item[j]) && !mlp->item[j]->obsolete)
+                       {
+                         const char *header = mlp->item[j]->msgstr;
+
+                         if (header != NULL)
+                           {
+                             const char *charsetstr =
+                               c_strstr (header, "charset=");
+
+                             if (charsetstr != NULL)
+                               {
+                                 size_t len;
+
+                                 charsetstr += strlen ("charset=");
+                                 len = strcspn (charsetstr, " \t\n");
+                                 charset = (char *) xallocsa (len + 1);
+                                 memcpy (charset, charsetstr, len);
+                                 charset[len] = '\0';
+
+                                 break;
+                               }
+                           }
+                       }
+                   if (charset != NULL)
+                     {
+                       canon_charset = po_charset_canonicalize (charset);
+                       freesa (charset);
+                     }
+                   /* If no charset declaration was found in this file,
+                      or if it is not a valid encoding name, or if it
+                      differs from the common charset found so far,
+                      we have no common charset.  */
+                   if (canon_charset == NULL
+                       || (common_canon_charset != NULL
+                           && canon_charset != common_canon_charset))
+                     {
+                       common_canon_charset = NULL;
+                       break;
+                     }
+                   common_canon_charset = canon_charset;
+                 }
+
+               if (common_canon_charset != NULL)
+                 /* No conversion needed in this case.  */
+                 conversion_done = true;
+             }
+           if (!conversion_done)
+             {
+               /* It's too hairy to find out what would be the optimal target
+                  encoding.  So, convert everything to UTF-8.  */
+               def = iconv_msgdomain_list (def, "UTF-8", fn1);
+               if (compendiums != NULL)
+                 for (k = 0; k < compendiums->nitems; k++)
+                   iconv_message_list (compendiums->item[k],
+                                       NULL, po_charset_utf8,
+                                       compendium_filenames->item[k]);
+             }
+         }
       }
   }
 
index bb6c8c1c2db87281fa57c619ad73ddd793e04b69..278037197a99859e025fa2c51aa345a327e267ae 100644 (file)
@@ -1,3 +1,13 @@
+2006-08-01  Bruno Haible  <bruno@clisp.org>
+
+       * msgmerge-compendium-6: New file.
+       * mm-ko.ascii.pot: New file.
+       * mm-ko.euc-kr.po: New file.
+       * mm-ko-comp.euc-kr.po: New file.
+       * Makefile.am (TESTS): Add msgmerge-compendium-6.
+       (EXTRA_DIST): Add mm-ko.ascii.pot, mm-ko.euc-kr.po,
+       mm-ko-comp.euc-kr.po.
+
 2006-08-01  Bruno Haible  <bruno@clisp.org>
 
        * mm-viet.comp.po: Renamed from msgmerge-v.comp.po.
index e67afe423ec4bde938b54a8ccc9ddd2806f9d5a3..a1a399f85e2729a17bd5b5e2f4eb842ceb4c452e 100644 (file)
@@ -53,7 +53,7 @@ TESTS = gettext-1 gettext-2 gettext-3 gettext-4 gettext-5 gettext-6 gettext-7 \
        msgmerge-13 msgmerge-14 msgmerge-15 msgmerge-16 msgmerge-17 \
        msgmerge-18 \
        msgmerge-compendium-1 msgmerge-compendium-2 msgmerge-compendium-3 \
-       msgmerge-compendium-4 msgmerge-compendium-5 \
+       msgmerge-compendium-4 msgmerge-compendium-5 msgmerge-compendium-6 \
        msgmerge-properties-1 msgmerge-properties-2 \
        msgmerge-update-1 msgmerge-update-2 msgmerge-update-3 \
        msgunfmt-1 \
@@ -118,6 +118,7 @@ TESTS = gettext-1 gettext-2 gettext-3 gettext-4 gettext-5 gettext-6 gettext-7 \
 
 EXTRA_DIST += $(TESTS) \
        test.mo xg-c-1.ok.po mex-test2.ok \
+       mm-ko.ascii.pot mm-ko.euc-kr.po mm-ko-comp.euc-kr.po \
        mm-viet.comp.po mm-viet.pot mm-viet.out \
        msguniq-a.in msguniq-a.inp msguniq-a.out \
        qttest_pl.po qttest_pl.qm \