From 1fabf61b3e8fec3d0f6cb7edc22aaa742c3543a4 Mon Sep 17 00:00:00 2001 From: Bruno Haible Date: Fri, 4 Aug 2006 14:17:06 +0000 Subject: [PATCH] Fix behaviour of msgmerge when the PO file and the compendium are in different encodings. --- gettext-tools/src/ChangeLog | 13 +++ gettext-tools/src/msgmerge.c | 148 +++++++++++++++++++++++++++++++- gettext-tools/tests/ChangeLog | 10 +++ gettext-tools/tests/Makefile.am | 3 +- 4 files changed, 171 insertions(+), 3 deletions(-) diff --git a/gettext-tools/src/ChangeLog b/gettext-tools/src/ChangeLog index bfa450704..ac435a5b0 100644 --- a/gettext-tools/src/ChangeLog +++ b/gettext-tools/src/ChangeLog @@ -1,3 +1,16 @@ +2006-08-01 Bruno Haible + + Fix behaviour of msgmerge when the PO file and the compendium are + in different encodings. + * msgl-iconv.h (is_message_list_iconvable): New declaration. + * msgl-iconv.c (iconvable_string, iconvable_string_list, + iconvable_msgid, iconvable_msgstr, is_message_list_iconvable): New + functions. + * msgmerge.c: Include xallocsa.h. + (merge): In the case that the .pot file is ASCII, not UTF-8, convert + the definitions and the compendia contents to a common encoding. + Reported by Stanislav Brabec . + 2006-07-30 Bruno Haible * Makefile.am (AM_CPPFLAGS, LDADD, libgettextsrc_la_LDFLAGS, diff --git a/gettext-tools/src/msgmerge.c b/gettext-tools/src/msgmerge.c index 60f686010..d18329055 100644 --- a/gettext-tools/src/msgmerge.c +++ b/gettext-tools/src/msgmerge.c @@ -41,6 +41,7 @@ #include "write-po.h" #include "format.h" #include "xalloc.h" +#include "xallocsa.h" #include "obstack.h" #include "c-strstr.h" #include "exit.h" @@ -1369,9 +1370,152 @@ merge (const char *fn1, const char *fn2, msgdomain_list_ty **defp) iconv_message_list (compendiums->item[k], NULL, po_charset_utf8, compendium_filenames->item[k]); } - else + else if (compendiums != NULL && compendiums->nitems > 0) { - /* TODO: Convert all compendiums->item[k] to the same encoding. */ + /* Ensure that the definitions and the compendiums are in the same + encoding. Prefer the encoding of the definitions file, if + possible; otherwise, if the definitions file is empty and the + compendiums are all in the same encoding, use that encoding; + otherwise, use UTF-8. */ + bool conversion_done = false; + { + char *charset = NULL; + + /* Get the encoding of the definitions file. */ + for (k = 0; k < def->nitems; k++) + { + message_list_ty *mlp = def->item[k]->messages; + + for (j = 0; j < mlp->nitems; j++) + if (is_header (mlp->item[j]) && !mlp->item[j]->obsolete) + { + const char *header = mlp->item[j]->msgstr; + + if (header != NULL) + { + const char *charsetstr = c_strstr (header, "charset="); + + if (charsetstr != NULL) + { + size_t len; + + charsetstr += strlen ("charset="); + len = strcspn (charsetstr, " \t\n"); + charset = (char *) xallocsa (len + 1); + memcpy (charset, charsetstr, len); + charset[len] = '\0'; + break; + } + } + } + if (charset != NULL) + break; + } + if (charset != NULL) + { + const char *canon_charset = po_charset_canonicalize (charset); + + if (canon_charset != NULL) + { + bool all_compendiums_iconvable = true; + + if (compendiums != NULL) + for (k = 0; k < compendiums->nitems; k++) + if (!is_message_list_iconvable (compendiums->item[k], + NULL, canon_charset)) + { + all_compendiums_iconvable = false; + break; + } + + if (all_compendiums_iconvable) + { + /* Convert the compendiums to def's encoding. */ + if (compendiums != NULL) + for (k = 0; k < compendiums->nitems; k++) + iconv_message_list (compendiums->item[k], + NULL, canon_charset, + compendium_filenames->item[k]); + conversion_done = true; + } + } + freesa (charset); + } + } + if (!conversion_done) + { + if (def->nitems == 0 + || (def->nitems == 1 && def->item[0]->messages->nitems == 0)) + { + /* The definitions file is empty. + Compare the encodings of the compendiums. */ + const char *common_canon_charset = NULL; + + for (k = 0; k < compendiums->nitems; k++) + { + message_list_ty *mlp = compendiums->item[k]; + char *charset = NULL; + const char *canon_charset = NULL; + + for (j = 0; j < mlp->nitems; j++) + if (is_header (mlp->item[j]) && !mlp->item[j]->obsolete) + { + const char *header = mlp->item[j]->msgstr; + + if (header != NULL) + { + const char *charsetstr = + c_strstr (header, "charset="); + + if (charsetstr != NULL) + { + size_t len; + + charsetstr += strlen ("charset="); + len = strcspn (charsetstr, " \t\n"); + charset = (char *) xallocsa (len + 1); + memcpy (charset, charsetstr, len); + charset[len] = '\0'; + + break; + } + } + } + if (charset != NULL) + { + canon_charset = po_charset_canonicalize (charset); + freesa (charset); + } + /* If no charset declaration was found in this file, + or if it is not a valid encoding name, or if it + differs from the common charset found so far, + we have no common charset. */ + if (canon_charset == NULL + || (common_canon_charset != NULL + && canon_charset != common_canon_charset)) + { + common_canon_charset = NULL; + break; + } + common_canon_charset = canon_charset; + } + + if (common_canon_charset != NULL) + /* No conversion needed in this case. */ + conversion_done = true; + } + if (!conversion_done) + { + /* It's too hairy to find out what would be the optimal target + encoding. So, convert everything to UTF-8. */ + def = iconv_msgdomain_list (def, "UTF-8", fn1); + if (compendiums != NULL) + for (k = 0; k < compendiums->nitems; k++) + iconv_message_list (compendiums->item[k], + NULL, po_charset_utf8, + compendium_filenames->item[k]); + } + } } } diff --git a/gettext-tools/tests/ChangeLog b/gettext-tools/tests/ChangeLog index bb6c8c1c2..278037197 100644 --- a/gettext-tools/tests/ChangeLog +++ b/gettext-tools/tests/ChangeLog @@ -1,3 +1,13 @@ +2006-08-01 Bruno Haible + + * msgmerge-compendium-6: New file. + * mm-ko.ascii.pot: New file. + * mm-ko.euc-kr.po: New file. + * mm-ko-comp.euc-kr.po: New file. + * Makefile.am (TESTS): Add msgmerge-compendium-6. + (EXTRA_DIST): Add mm-ko.ascii.pot, mm-ko.euc-kr.po, + mm-ko-comp.euc-kr.po. + 2006-08-01 Bruno Haible * mm-viet.comp.po: Renamed from msgmerge-v.comp.po. diff --git a/gettext-tools/tests/Makefile.am b/gettext-tools/tests/Makefile.am index e67afe423..a1a399f85 100644 --- a/gettext-tools/tests/Makefile.am +++ b/gettext-tools/tests/Makefile.am @@ -53,7 +53,7 @@ TESTS = gettext-1 gettext-2 gettext-3 gettext-4 gettext-5 gettext-6 gettext-7 \ msgmerge-13 msgmerge-14 msgmerge-15 msgmerge-16 msgmerge-17 \ msgmerge-18 \ msgmerge-compendium-1 msgmerge-compendium-2 msgmerge-compendium-3 \ - msgmerge-compendium-4 msgmerge-compendium-5 \ + msgmerge-compendium-4 msgmerge-compendium-5 msgmerge-compendium-6 \ msgmerge-properties-1 msgmerge-properties-2 \ msgmerge-update-1 msgmerge-update-2 msgmerge-update-3 \ msgunfmt-1 \ @@ -118,6 +118,7 @@ TESTS = gettext-1 gettext-2 gettext-3 gettext-4 gettext-5 gettext-6 gettext-7 \ EXTRA_DIST += $(TESTS) \ test.mo xg-c-1.ok.po mex-test2.ok \ + mm-ko.ascii.pot mm-ko.euc-kr.po mm-ko-comp.euc-kr.po \ mm-viet.comp.po mm-viet.pot mm-viet.out \ msguniq-a.in msguniq-a.inp msguniq-a.out \ qttest_pl.po qttest_pl.qm \ -- 2.47.3