From: Bruno Haible Date: Sun, 28 Jul 2024 19:32:56 +0000 (+0200) Subject: libgettextpo: Remove static variables po_lex_charset, po_lex_iconv, po_lex_isolate_... X-Git-Tag: v0.23~206 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=0713ab45b379404f04541cb9c81aceb8328fd03a;p=thirdparty%2Fgettext.git libgettextpo: Remove static variables po_lex_charset, po_lex_iconv, po_lex_isolate_*, po_lex_weird_cjk. * gettext-tools/src/read-catalog-abstract.h (ABSTRACT_CATALOG_READER_TY): Add fields po_lex_isolate_start, po_lex_isolate_end. * gettext-tools/src/read-catalog-abstract.c (catalog_reader_alloc): Initialize the new abstract_catalog_reader_ty fields. (parse_comment_filepos): Update. * gettext-tools/src/po-gram.h: Include . (struct po_parser_state): Add fields po_lex_charset, po_lex_iconv, po_lex_weird_cjk. * gettext-tools/src/po-lex.h (lex_end): Add a 'struct po_parser_state *' parameter. (po_lex_charset_set): New declaration, moved here from po-charset.h. * gettext-tools/src/po-lex.c: Include xmalloca.h, basename-lgpl.h, progname.h, c-strstr.h. (po_lex_charset_init, po_lex_charset_set, po_lex_charset_close): Moved here from po-charset.c. Take a 'struct po_parser_state *' as argument. (mb_width, mbfile_getc, lex_start): Update. (lex_end): Add a 'struct po_parser_state *' parameter. * gettext-tools/src/po-charset.h (po_lex_charset, po_lex_isolate_start, po_lex_isolate_end, po_lex_iconv, po_lex_weird_cjk): Remove declarations. (po_lex_charset_init, po_lex_charset_set, po_lex_charset_close): Remove declarations. * gettext-tools/src/po-charset.c: Don't include , xmalloca.h, xvasprintf.h, po-xerror.h, basename-lgpl.h, progname.h, c-strstr.h. (po_lex_charset, po_lex_isolate_start, po_lex_isolate_end, po_lex_iconv, po_lex_weird_cjk): Remove variables. (po_lex_charset_init, po_lex_charset_set, po_lex_charset_close): Moved to po-lex.c. * gettext-tools/src/po-gram-gen.y: Update. * gettext-tools/src/read-po.c (po_parse): Update. * gettext-tools/woe32dll/gettextsrc-exports.c (po_lex_charset, po_lex_isolate_start, po_lex_isolate_end, po_lex_iconv, po_lex_weird_cjk): Remove exports. --- diff --git a/gettext-tools/src/po-charset.c b/gettext-tools/src/po-charset.c index 0e1d92018..58aa680bc 100644 --- a/gettext-tools/src/po-charset.c +++ b/gettext-tools/src/po-charset.c @@ -24,17 +24,8 @@ /* Specification. */ #include "po-charset.h" -#include #include -#include "xmalloca.h" -#include "xvasprintf.h" -#include "po-xerror.h" -#if !IN_LIBGETTEXTPO -# include "basename-lgpl.h" -# include "progname.h" -#endif -#include "c-strstr.h" #include "c-strcase.h" #include "gettext.h" @@ -431,278 +422,3 @@ po_charset_character_iterator (const char *canon_charset) return johab_character_iterator; return char_iterator; } - - -/* The PO file's encoding, as specified in the header entry. */ -const char *po_lex_charset; - -/* Representation of U+2068 FIRST STRONG ISOLATE (FSI) in the PO file's - encoding, or NULL if not available. */ -const char *po_lex_isolate_start; -/* Representation of U+2069 POP DIRECTIONAL ISOLATE (PDI) in the PO file's - encoding, or NULL if not available. */ -const char *po_lex_isolate_end; - -#if HAVE_ICONV -/* Converter from the PO file's encoding to UTF-8. */ -iconv_t po_lex_iconv; -#endif -/* If no converter is available, some information about the structure of the - PO file's encoding. */ -bool po_lex_weird_cjk; - -void -po_lex_charset_init () -{ - po_lex_charset = NULL; - po_lex_isolate_start = NULL; - po_lex_isolate_end = NULL; -#if HAVE_ICONV - po_lex_iconv = (iconv_t)(-1); -#endif - po_lex_weird_cjk = false; -} - -void -po_lex_charset_set (const char *header_entry, - const char *filename, bool is_pot_role) -{ - /* Verify the validity of CHARSET. It is necessary - 1. for the correct treatment of multibyte characters containing - 0x5C bytes in the PO lexer, - 2. so that at run time, gettext() can call iconv() to convert - msgstr. */ - const char *charsetstr = c_strstr (header_entry, "charset="); - - if (charsetstr != NULL) - { - size_t len; - char *charset; - const char *canon_charset; - - charsetstr += strlen ("charset="); - len = strcspn (charsetstr, " \t\n"); - charset = (char *) xmalloca (len + 1); - memcpy (charset, charsetstr, len); - charset[len] = '\0'; - - canon_charset = po_charset_canonicalize (charset); - if (canon_charset == NULL) - { - /* Don't warn for POT files, because POT files usually contain - only ASCII msgids. */ - size_t filenamelen = strlen (filename); - - if (!(strcmp (charset, "CHARSET") == 0 - && ((filenamelen >= 4 - && memcmp (filename + filenamelen - 4, ".pot", 4) == 0) - || is_pot_role))) - { - char *warning_message = - xasprintf (_("\ -Charset \"%s\" is not a portable encoding name.\n\ -Message conversion to user's charset might not work.\n"), - charset); - po_xerror (PO_SEVERITY_WARNING, NULL, - filename, (size_t)(-1), (size_t)(-1), true, - warning_message); - free (warning_message); - } - } - else - { - const char *envval; - - po_lex_charset = canon_charset; - - if (strcmp (canon_charset, "UTF-8") == 0) - { - po_lex_isolate_start = "\xE2\x81\xA8"; - po_lex_isolate_end = "\xE2\x81\xA9"; - } - else if (strcmp (canon_charset, "GB18030") == 0) - { - po_lex_isolate_start = "\x81\x36\xAC\x34"; - po_lex_isolate_end = "\x81\x36\xAC\x35"; - } - else - { - /* The other encodings don't contain U+2068, U+2069. */ - po_lex_isolate_start = NULL; - po_lex_isolate_end = NULL; - } - -#if HAVE_ICONV - if (po_lex_iconv != (iconv_t)(-1)) - iconv_close (po_lex_iconv); -#endif - - /* The old Solaris/openwin msgfmt and GNU msgfmt <= 0.10.35 - don't know about multibyte encodings, and require a spurious - backslash after every multibyte character whose last byte is - 0x5C. Some programs, like vim, distribute PO files in this - broken format. GNU msgfmt must continue to support this old - PO file format when the Makefile requests it. */ - envval = getenv ("OLD_PO_FILE_INPUT"); - if (envval != NULL && *envval != '\0') - { - /* Assume the PO file is in old format, with extraneous - backslashes. */ -#if HAVE_ICONV - po_lex_iconv = (iconv_t)(-1); -#endif - po_lex_weird_cjk = false; - } - else - { - /* Use iconv() to parse multibyte characters. */ -#if HAVE_ICONV - /* Avoid glibc-2.1 bug with EUC-KR. */ -# if ((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \ - && !defined _LIBICONV_VERSION - if (strcmp (po_lex_charset, "EUC-KR") == 0) - po_lex_iconv = (iconv_t)(-1); - else -# endif - /* Avoid Solaris 2.9 bug with GB2312, EUC-TW, BIG5, BIG5-HKSCS, - GBK, GB18030. */ -# if defined __sun && !defined _LIBICONV_VERSION - if ( strcmp (po_lex_charset, "GB2312") == 0 - || strcmp (po_lex_charset, "EUC-TW") == 0 - || strcmp (po_lex_charset, "BIG5") == 0 - || strcmp (po_lex_charset, "BIG5-HKSCS") == 0 - || strcmp (po_lex_charset, "GBK") == 0 - || strcmp (po_lex_charset, "GB18030") == 0) - po_lex_iconv = (iconv_t)(-1); - else -# endif - po_lex_iconv = iconv_open ("UTF-8", po_lex_charset); - if (po_lex_iconv == (iconv_t)(-1)) - { - const char *progname; - char *warning_message; - const char *recommendation; - const char *note; - char *whole_message; - -# if IN_LIBGETTEXTPO - progname = "libgettextpo"; -# else - progname = last_component (program_name); -# endif - - warning_message = - xasprintf (_("\ -Charset \"%s\" is not supported. %s relies on iconv(),\n\ -and iconv() does not support \"%s\".\n"), - po_lex_charset, progname, po_lex_charset); - -# if !defined _LIBICONV_VERSION || (_LIBICONV_VERSION == 0x10b && defined __APPLE__) - recommendation = _("\ -Installing GNU libiconv and then reinstalling GNU gettext\n\ -would fix this problem.\n"); -# else - recommendation = ""; -# endif - - /* Test for a charset which has double-byte characters - ending in 0x5C. For these encodings, the string parser - is likely to be confused if it can't see the character - boundaries. */ - po_lex_weird_cjk = po_is_charset_weird_cjk (po_lex_charset); - if (po_is_charset_weird (po_lex_charset) - && !po_lex_weird_cjk) - note = _("Continuing anyway, expect parse errors."); - else - note = _("Continuing anyway."); - - whole_message = - xasprintf ("%s%s%s\n", - warning_message, recommendation, note); - - po_xerror (PO_SEVERITY_WARNING, NULL, - filename, (size_t)(-1), (size_t)(-1), true, - whole_message); - - free (whole_message); - free (warning_message); - } -#else - /* Test for a charset which has double-byte characters - ending in 0x5C. For these encodings, the string parser - is likely to be confused if it can't see the character - boundaries. */ - po_lex_weird_cjk = po_is_charset_weird_cjk (po_lex_charset); - if (po_is_charset_weird (po_lex_charset) && !po_lex_weird_cjk) - { - const char *progname; - char *warning_message; - const char *recommendation; - const char *note; - char *whole_message; - -# if IN_LIBGETTEXTPO - progname = "libgettextpo"; -# else - progname = last_component (program_name); -# endif - - warning_message = - xasprintf (_("\ -Charset \"%s\" is not supported. %s relies on iconv().\n\ -This version was built without iconv().\n"), - po_lex_charset, progname); - - recommendation = _("\ -Installing GNU libiconv and then reinstalling GNU gettext\n\ -would fix this problem.\n"); - - note = _("Continuing anyway, expect parse errors."); - - whole_message = - xasprintf ("%s%s%s\n", - warning_message, recommendation, note); - - po_xerror (PO_SEVERITY_WARNING, NULL, - filename, (size_t)(-1), (size_t)(-1), true, - whole_message); - - free (whole_message); - free (warning_message); - } -#endif - } - } - freea (charset); - } - else - { - /* Don't warn for POT files, because POT files usually contain - only ASCII msgids. */ - size_t filenamelen = strlen (filename); - - if (!(filenamelen >= 4 - && memcmp (filename + filenamelen - 4, ".pot", 4) == 0)) - po_xerror (PO_SEVERITY_WARNING, - NULL, filename, (size_t)(-1), (size_t)(-1), true, - _("\ -Charset missing in header.\n\ -Message conversion to user's charset will not work.\n")); - } -} - -void -po_lex_charset_close () -{ - po_lex_charset = NULL; - po_lex_isolate_start = NULL; - po_lex_isolate_end = NULL; -#if HAVE_ICONV - if (po_lex_iconv != (iconv_t)(-1)) - { - iconv_close (po_lex_iconv); - po_lex_iconv = (iconv_t)(-1); - } -#endif - po_lex_weird_cjk = false; -} diff --git a/gettext-tools/src/po-charset.h b/gettext-tools/src/po-charset.h index ed0db85a8..9b7506bd9 100644 --- a/gettext-tools/src/po-charset.h +++ b/gettext-tools/src/po-charset.h @@ -1,5 +1,5 @@ /* Charset handling while reading PO files. - Copyright (C) 2001-2003, 2006, 2021, 2023 Free Software Foundation, Inc. + Copyright (C) 2001-2024 Free Software Foundation, Inc. Written by Bruno Haible , 2001. This program is free software: you can redistribute it and/or modify @@ -64,37 +64,6 @@ typedef size_t (*character_iterator_t) (const char *s); extern character_iterator_t po_charset_character_iterator (const char *canon_charset); -/* The PO file's encoding, as specified in the header entry. */ -extern DLL_VARIABLE const char *po_lex_charset; - -/* Representation of U+2068 FIRST STRONG ISOLATE (FSI) in the PO file's - encoding, or NULL if not available. */ -extern DLL_VARIABLE const char *po_lex_isolate_start; -/* Representation of U+2069 POP DIRECTIONAL ISOLATE (PDI) in the PO file's - encoding, or NULL if not available. */ -extern DLL_VARIABLE const char *po_lex_isolate_end; - -#if HAVE_ICONV -/* Converter from the PO file's encoding to UTF-8. */ -extern DLL_VARIABLE iconv_t po_lex_iconv; -#endif -/* If no converter is available, some information about the structure of the - PO file's encoding. */ -extern DLL_VARIABLE bool po_lex_weird_cjk; - -/* Initialize the PO file's encoding. */ -extern void po_lex_charset_init (void); - -/* Set the PO file's encoding from the header entry. - If is_pot_role is true, "charset=CHARSET" is expected and does not deserve - a warning. */ -extern void po_lex_charset_set (const char *header_entry, - const char *filename, bool is_pot_role); - -/* Finish up with the PO file's encoding. */ -extern void po_lex_charset_close (void); - - #ifdef __cplusplus } #endif diff --git a/gettext-tools/src/po-gram-gen.y b/gettext-tools/src/po-gram-gen.y index 9d25960fe..2c9e9368e 100644 --- a/gettext-tools/src/po-gram-gen.y +++ b/gettext-tools/src/po-gram-gen.y @@ -54,7 +54,7 @@ do_callback_message (struct po_parser_state *ps, { /* Test for header entry. Ignore fuzziness of the header entry. */ if (msgctxt == NULL && msgid[0] == '\0' && !obsolete) - po_lex_charset_set (msgstr, gram_pos.file_name, ps->gram_pot_role); + po_lex_charset_set (ps, msgstr, gram_pos.file_name, ps->gram_pot_role); catalog_reader_seen_message (ps->catr, msgctxt, diff --git a/gettext-tools/src/po-gram.h b/gettext-tools/src/po-gram.h index 04271ae01..02469e13e 100644 --- a/gettext-tools/src/po-gram.h +++ b/gettext-tools/src/po-gram.h @@ -23,6 +23,7 @@ #include #if HAVE_ICONV +#include # include "unistr.h" #endif @@ -89,6 +90,17 @@ struct po_parser_state /* ----- Local variables of po-lex.c ----- */ + /* The PO file's encoding, as specified in the header entry. */ + const char *po_lex_charset; + +#if HAVE_ICONV + /* Converter from the PO file's encoding to UTF-8. */ + iconv_t po_lex_iconv; +#endif + /* If no converter is available, some information about the structure of the + PO file's encoding. */ + bool po_lex_weird_cjk; + /* Current position within the PO file. */ int gram_pos_column; diff --git a/gettext-tools/src/po-lex.c b/gettext-tools/src/po-lex.c index 67d1dab09..cd5921080 100644 --- a/gettext-tools/src/po-lex.c +++ b/gettext-tools/src/po-lex.c @@ -47,6 +47,12 @@ #include "xvasprintf.h" #include "po-error.h" #include "po-xerror.h" +#include "xmalloca.h" +#if !IN_LIBGETTEXTPO +# include "basename-lgpl.h" +# include "progname.h" +#endif +#include "c-strstr.h" #include "pos.h" #include "message.h" #include "str-list.h" @@ -107,6 +113,271 @@ po_gram_error_at_line (const lex_pos_ty *pp, const char *fmt, ...) } +/* Charset handling while parsing PO files. */ + +/* Initialize the PO file's encoding. */ +static void +po_lex_charset_init (struct po_parser_state *ps) +{ + ps->po_lex_charset = NULL; + ps->catr->po_lex_isolate_start = NULL; + ps->catr->po_lex_isolate_end = NULL; +#if HAVE_ICONV + ps->po_lex_iconv = (iconv_t)(-1); +#endif + ps->po_lex_weird_cjk = false; +} + +/* Set the PO file's encoding from the header entry. + If is_pot_role is true, "charset=CHARSET" is expected and does not deserve + a warning. */ +void +po_lex_charset_set (struct po_parser_state *ps, + const char *header_entry, + const char *filename, bool is_pot_role) +{ + /* Verify the validity of CHARSET. It is necessary + 1. for the correct treatment of multibyte characters containing + 0x5C bytes in the PO lexer, + 2. so that at run time, gettext() can call iconv() to convert + msgstr. */ + const char *charsetstr = c_strstr (header_entry, "charset="); + + if (charsetstr != NULL) + { + size_t len; + char *charset; + const char *canon_charset; + + charsetstr += strlen ("charset="); + len = strcspn (charsetstr, " \t\n"); + charset = (char *) xmalloca (len + 1); + memcpy (charset, charsetstr, len); + charset[len] = '\0'; + + canon_charset = po_charset_canonicalize (charset); + if (canon_charset == NULL) + { + /* Don't warn for POT files, because POT files usually contain + only ASCII msgids. */ + size_t filenamelen = strlen (filename); + + if (!(strcmp (charset, "CHARSET") == 0 + && ((filenamelen >= 4 + && memcmp (filename + filenamelen - 4, ".pot", 4) == 0) + || is_pot_role))) + { + char *warning_message = + xasprintf (_("\ +Charset \"%s\" is not a portable encoding name.\n\ +Message conversion to user's charset might not work.\n"), + charset); + po_xerror (PO_SEVERITY_WARNING, NULL, + filename, (size_t)(-1), (size_t)(-1), true, + warning_message); + free (warning_message); + } + } + else + { + const char *envval; + + ps->po_lex_charset = canon_charset; + + if (strcmp (canon_charset, "UTF-8") == 0) + { + ps->catr->po_lex_isolate_start = "\xE2\x81\xA8"; + ps->catr->po_lex_isolate_end = "\xE2\x81\xA9"; + } + else if (strcmp (canon_charset, "GB18030") == 0) + { + ps->catr->po_lex_isolate_start = "\x81\x36\xAC\x34"; + ps->catr->po_lex_isolate_end = "\x81\x36\xAC\x35"; + } + else + { + /* The other encodings don't contain U+2068, U+2069. */ + ps->catr->po_lex_isolate_start = NULL; + ps->catr->po_lex_isolate_end = NULL; + } + +#if HAVE_ICONV + if (ps->po_lex_iconv != (iconv_t)(-1)) + iconv_close (ps->po_lex_iconv); +#endif + + /* The old Solaris/openwin msgfmt and GNU msgfmt <= 0.10.35 + don't know about multibyte encodings, and require a spurious + backslash after every multibyte character whose last byte is + 0x5C. Some programs, like vim, distribute PO files in this + broken format. GNU msgfmt must continue to support this old + PO file format when the Makefile requests it. */ + envval = getenv ("OLD_PO_FILE_INPUT"); + if (envval != NULL && *envval != '\0') + { + /* Assume the PO file is in old format, with extraneous + backslashes. */ +#if HAVE_ICONV + ps->po_lex_iconv = (iconv_t)(-1); +#endif + ps->po_lex_weird_cjk = false; + } + else + { + /* Use iconv() to parse multibyte characters. */ +#if HAVE_ICONV + /* Avoid glibc-2.1 bug with EUC-KR. */ +# if ((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \ + && !defined _LIBICONV_VERSION + if (strcmp (ps->po_lex_charset, "EUC-KR") == 0) + ps->po_lex_iconv = (iconv_t)(-1); + else +# endif + /* Avoid Solaris 2.9 bug with GB2312, EUC-TW, BIG5, BIG5-HKSCS, + GBK, GB18030. */ +# if defined __sun && !defined _LIBICONV_VERSION + if ( strcmp (ps->po_lex_charset, "GB2312") == 0 + || strcmp (ps->po_lex_charset, "EUC-TW") == 0 + || strcmp (ps->po_lex_charset, "BIG5") == 0 + || strcmp (ps->po_lex_charset, "BIG5-HKSCS") == 0 + || strcmp (ps->po_lex_charset, "GBK") == 0 + || strcmp (ps->po_lex_charset, "GB18030") == 0) + ps->po_lex_iconv = (iconv_t)(-1); + else +# endif + ps->po_lex_iconv = iconv_open ("UTF-8", ps->po_lex_charset); + if (ps->po_lex_iconv == (iconv_t)(-1)) + { + const char *progname; + char *warning_message; + const char *recommendation; + const char *note; + char *whole_message; + +# if IN_LIBGETTEXTPO + progname = "libgettextpo"; +# else + progname = last_component (program_name); +# endif + + warning_message = + xasprintf (_("\ +Charset \"%s\" is not supported. %s relies on iconv(),\n\ +and iconv() does not support \"%s\".\n"), + ps->po_lex_charset, progname, ps->po_lex_charset); + +# if !defined _LIBICONV_VERSION || (_LIBICONV_VERSION == 0x10b && defined __APPLE__) + recommendation = _("\ +Installing GNU libiconv and then reinstalling GNU gettext\n\ +would fix this problem.\n"); +# else + recommendation = ""; +# endif + + /* Test for a charset which has double-byte characters + ending in 0x5C. For these encodings, the string parser + is likely to be confused if it can't see the character + boundaries. */ + ps->po_lex_weird_cjk = po_is_charset_weird_cjk (ps->po_lex_charset); + if (po_is_charset_weird (ps->po_lex_charset) + && !ps->po_lex_weird_cjk) + note = _("Continuing anyway, expect parse errors."); + else + note = _("Continuing anyway."); + + whole_message = + xasprintf ("%s%s%s\n", + warning_message, recommendation, note); + + po_xerror (PO_SEVERITY_WARNING, NULL, + filename, (size_t)(-1), (size_t)(-1), true, + whole_message); + + free (whole_message); + free (warning_message); + } +#else + /* Test for a charset which has double-byte characters + ending in 0x5C. For these encodings, the string parser + is likely to be confused if it can't see the character + boundaries. */ + ps->po_lex_weird_cjk = po_is_charset_weird_cjk (ps->po_lex_charset); + if (po_is_charset_weird (ps->po_lex_charset) && !ps->po_lex_weird_cjk) + { + const char *progname; + char *warning_message; + const char *recommendation; + const char *note; + char *whole_message; + +# if IN_LIBGETTEXTPO + progname = "libgettextpo"; +# else + progname = last_component (program_name); +# endif + + warning_message = + xasprintf (_("\ +Charset \"%s\" is not supported. %s relies on iconv().\n\ +This version was built without iconv().\n"), + ps->po_lex_charset, progname); + + recommendation = _("\ +Installing GNU libiconv and then reinstalling GNU gettext\n\ +would fix this problem.\n"); + + note = _("Continuing anyway, expect parse errors."); + + whole_message = + xasprintf ("%s%s%s\n", + warning_message, recommendation, note); + + po_xerror (PO_SEVERITY_WARNING, NULL, + filename, (size_t)(-1), (size_t)(-1), true, + whole_message); + + free (whole_message); + free (warning_message); + } +#endif + } + } + freea (charset); + } + else + { + /* Don't warn for POT files, because POT files usually contain + only ASCII msgids. */ + size_t filenamelen = strlen (filename); + + if (!(filenamelen >= 4 + && memcmp (filename + filenamelen - 4, ".pot", 4) == 0)) + po_xerror (PO_SEVERITY_WARNING, + NULL, filename, (size_t)(-1), (size_t)(-1), true, + _("\ +Charset missing in header.\n\ +Message conversion to user's charset will not work.\n")); + } +} + +/* Finish up with the PO file's encoding. */ +static void +po_lex_charset_close (struct po_parser_state *ps) +{ + ps->po_lex_charset = NULL; + ps->catr->po_lex_isolate_start = NULL; + ps->catr->po_lex_isolate_end = NULL; +#if HAVE_ICONV + if (ps->po_lex_iconv != (iconv_t)(-1)) + { + iconv_close (ps->po_lex_iconv); + ps->po_lex_iconv = (iconv_t)(-1); + } +#endif + ps->po_lex_weird_cjk = false; +} + + /* The lowest level of PO file parsing converts bytes to multibyte characters. This is needed 1. for C compatibility: ISO C 99 section 5.1.1.2 says that the first @@ -241,7 +512,7 @@ mb_width (struct po_parser_state *ps, const mbchar_t mbc) { ucs4_t uc = mbc->uc; const char *encoding = - (po_lex_iconv != (iconv_t)(-1) ? po_lex_charset : ""); + (ps->po_lex_iconv != (iconv_t)(-1) ? ps->po_lex_charset : ""); int w = uc_width (uc, encoding); /* For unprintable characters, arbitrarily return 0 for control characters (except tab) and MB_UNPRINTABLE_WIDTH otherwise. */ @@ -356,7 +627,7 @@ mbfile_getc (struct po_parser_state *ps, mbchar_t mbc, mbfile_t mbf) } #if HAVE_ICONV - if (po_lex_iconv != (iconv_t)(-1)) + if (ps->po_lex_iconv != (iconv_t)(-1)) { /* Use iconv on an increasing number of bytes. Read only as many bytes from mbf->fp as needed. This is needed to give reasonable @@ -370,7 +641,7 @@ mbfile_getc (struct po_parser_state *ps, mbchar_t mbc, mbfile_t mbf) char *outptr = (char *) &scratchbuf[0]; size_t outsize = sizeof (scratchbuf); - size_t res = iconv (po_lex_iconv, + size_t res = iconv (ps->po_lex_iconv, (ICONV_CONST char **) &inptr, &insize, &outptr, &outsize); /* We expect that a character has been produced if and only if @@ -468,7 +739,7 @@ mbfile_getc (struct po_parser_state *ps, mbchar_t mbc, mbfile_t mbf) else #endif { - if (po_lex_weird_cjk + if (ps->po_lex_weird_cjk /* Special handling of encodings with CJK structure. */ && (unsigned char) mbf->buf[0] >= 0x80) { @@ -569,16 +840,16 @@ lex_start (struct po_parser_state *ps, ps->signal_eilseq = true; ps->po_lex_obsolete = false; ps->po_lex_previous = false; - po_lex_charset_init (); + po_lex_charset_init (ps); } /* Terminate lexical analysis. */ void -lex_end () +lex_end (struct po_parser_state *ps) { gram_pos.file_name = NULL; gram_pos.line_number = 0; - po_lex_charset_close (); + po_lex_charset_close (ps); } diff --git a/gettext-tools/src/po-lex.h b/gettext-tools/src/po-lex.h index fdbeef0f5..6027d1732 100644 --- a/gettext-tools/src/po-lex.h +++ b/gettext-tools/src/po-lex.h @@ -71,7 +71,7 @@ extern void lex_start (struct po_parser_state *ps, const char *logical_filename); /* Terminate lexical analysis. */ -extern void lex_end (void); +extern void lex_end (struct po_parser_state *ps); /* Return the next token in the PO file. The return codes are defined in "po-gram-gen.h". Associated data is put in '*lval'. */ @@ -90,6 +90,13 @@ extern void po_gram_error (struct po_parser_state *ps, const char *fmt, ...) extern void po_gram_error_at_line (const lex_pos_ty *pos, const char *fmt, ...) __attribute__ ((__format__ (__printf__, 2, 3))); +/* Set the PO file's encoding from the header entry. + If is_pot_role is true, "charset=CHARSET" is expected and does not deserve + a warning. */ +extern void po_lex_charset_set (struct po_parser_state *ps, + const char *header_entry, + const char *filename, bool is_pot_role); + /* Contains information about the definition of one translation. */ struct msgstr_def diff --git a/gettext-tools/src/read-catalog-abstract.c b/gettext-tools/src/read-catalog-abstract.c index ebd796275..9edf07893 100644 --- a/gettext-tools/src/read-catalog-abstract.c +++ b/gettext-tools/src/read-catalog-abstract.c @@ -47,6 +47,8 @@ catalog_reader_alloc (abstract_catalog_reader_class_ty *method_table) catr = (abstract_catalog_reader_ty *) xmalloc (method_table->size); catr->methods = method_table; + catr->po_lex_isolate_start = NULL; + catr->po_lex_isolate_end = NULL; if (method_table->constructor) method_table->constructor (catr); return catr; @@ -250,11 +252,11 @@ parse_comment_filepos (abstract_catalog_reader_ty *catr, const char *s) if (*s != '\0') { bool isolated_filename = - (po_lex_isolate_start != NULL - && strncmp (s, po_lex_isolate_start, - strlen (po_lex_isolate_start)) == 0); + (catr->po_lex_isolate_start != NULL + && strncmp (s, catr->po_lex_isolate_start, + strlen (catr->po_lex_isolate_start)) == 0); if (isolated_filename) - s += strlen (po_lex_isolate_start); + s += strlen (catr->po_lex_isolate_start); const char *filename_start = s; const char *filename_end; @@ -268,11 +270,11 @@ parse_comment_filepos (abstract_catalog_reader_ty *catr, const char *s) filename_end = s; break; } - if (strncmp (s, po_lex_isolate_end, - strlen (po_lex_isolate_end)) == 0) + if (strncmp (s, catr->po_lex_isolate_end, + strlen (catr->po_lex_isolate_end)) == 0) { filename_end = s; - s += strlen (po_lex_isolate_end); + s += strlen (catr->po_lex_isolate_end); break; } } diff --git a/gettext-tools/src/read-catalog-abstract.h b/gettext-tools/src/read-catalog-abstract.h index b9147917c..7b1173e33 100644 --- a/gettext-tools/src/read-catalog-abstract.h +++ b/gettext-tools/src/read-catalog-abstract.h @@ -124,7 +124,14 @@ struct abstract_catalog_reader_class_ty etc. */ #define ABSTRACT_CATALOG_READER_TY \ - abstract_catalog_reader_class_ty *methods; + abstract_catalog_reader_class_ty *methods; \ + \ + /* Representation of U+2068 FIRST STRONG ISOLATE (FSI) in the \ + PO file's encoding, or NULL if not available. */ \ + const char *po_lex_isolate_start; \ + /* Representation of U+2069 POP DIRECTIONAL ISOLATE (PDI) in the \ + PO file's encoding, or NULL if not available. */ \ + const char *po_lex_isolate_end; \ typedef struct abstract_catalog_reader_ty abstract_catalog_reader_ty; struct abstract_catalog_reader_ty diff --git a/gettext-tools/src/read-po.c b/gettext-tools/src/read-po.c index 77ba9dec4..7b28953e0 100644 --- a/gettext-tools/src/read-po.c +++ b/gettext-tools/src/read-po.c @@ -37,7 +37,7 @@ po_parse (abstract_catalog_reader_ty *catr, FILE *fp, ps.gram_pot_role = is_pot_role; lex_start (&ps, fp, real_filename, logical_filename); po_gram_parse (&ps); - lex_end (); + lex_end (&ps); } const struct catalog_input_format input_format_po = diff --git a/gettext-tools/woe32dll/gettextsrc-exports.c b/gettext-tools/woe32dll/gettextsrc-exports.c index 5d74025f8..4bf249a31 100644 --- a/gettext-tools/woe32dll/gettextsrc-exports.c +++ b/gettext-tools/woe32dll/gettextsrc-exports.c @@ -1,5 +1,5 @@ /* List of exported symbols of libgettextsrc on Cygwin and native Windows. - Copyright (C) 2006-2007, 2009-2011, 2013-2015, 2019, 2021, 2023 Free Software Foundation, Inc. + Copyright (C) 2006-2024 Free Software Foundation, Inc. Written by Bruno Haible , 2006. This program is free software: you can redistribute it and/or modify @@ -75,13 +75,6 @@ VARIABLE(po_charset_ascii) VARIABLE(po_charset_utf8) VARIABLE(po_error) VARIABLE(po_error_at_line) -VARIABLE(po_lex_charset) -VARIABLE(po_lex_isolate_start) -VARIABLE(po_lex_isolate_end) -#if HAVE_ICONV -VARIABLE(po_lex_iconv) -#endif -VARIABLE(po_lex_weird_cjk) VARIABLE(po_multiline_error) VARIABLE(po_multiline_warning) VARIABLE(po_xerror)