encodings), including
single-byte character sets such as the ISO 8859 series and
multiple-byte character sets such as <acronym>EUC</acronym> (Extended Unix
- Code), UTF-8, and Mule internal code. All supported character sets
+ Code) and UTF-8. All supported character sets
can be used transparently by clients, but a few are not supported
for use within the server (that is, as a server-side encoding).
The default character set is selected while
<entry>1</entry>
<entry><literal>ISO885916</literal></entry>
</row>
- <row>
- <entry><literal>MULE_INTERNAL</literal></entry>
- <entry>Mule internal code</entry>
- <entry>Multilingual Emacs</entry>
- <entry>Yes</entry>
- <entry>No</entry>
- <entry>1–4</entry>
- <entry></entry>
- </row>
<row>
<entry><literal>SJIS</literal></entry>
<entry>Shift JIS</entry>
<para>
Not all client <acronym>API</acronym>s support all the listed character sets. For example, the
<productname>PostgreSQL</productname>
- JDBC driver does not support <literal>MULE_INTERNAL</literal>, <literal>LATIN6</literal>,
+ JDBC driver does not support <literal>LATIN6</literal>,
<literal>LATIN8</literal>, and <literal>LATIN10</literal>.
</para>
<row>
<entry><literal>EUC_CN</literal></entry>
<entry><emphasis>EUC_CN</emphasis>,
- <literal>MULE_INTERNAL</literal>,
<literal>UTF8</literal>
</entry>
</row>
<row>
<entry><literal>EUC_JP</literal></entry>
<entry><emphasis>EUC_JP</emphasis>,
- <literal>MULE_INTERNAL</literal>,
<literal>SJIS</literal>,
<literal>UTF8</literal>
</entry>
<row>
<entry><literal>EUC_KR</literal></entry>
<entry><emphasis>EUC_KR</emphasis>,
- <literal>MULE_INTERNAL</literal>,
<literal>UTF8</literal>
</entry>
</row>
<entry><literal>EUC_TW</literal></entry>
<entry><emphasis>EUC_TW</emphasis>,
<literal>BIG5</literal>,
- <literal>MULE_INTERNAL</literal>,
<literal>UTF8</literal>
</entry>
</row>
<entry><literal>ISO_8859_5</literal></entry>
<entry><emphasis>ISO_8859_5</emphasis>,
<literal>KOI8R</literal>,
- <literal>MULE_INTERNAL</literal>,
<literal>UTF8</literal>,
<literal>WIN866</literal>,
<literal>WIN1251</literal>
<entry><literal>KOI8R</literal></entry>
<entry><emphasis>KOI8R</emphasis>,
<literal>ISO_8859_5</literal>,
- <literal>MULE_INTERNAL</literal>,
<literal>UTF8</literal>,
<literal>WIN866</literal>,
<literal>WIN1251</literal>
<row>
<entry><literal>LATIN1</literal></entry>
<entry><emphasis>LATIN1</emphasis>,
- <literal>MULE_INTERNAL</literal>,
<literal>UTF8</literal>
</entry>
</row>
<row>
<entry><literal>LATIN2</literal></entry>
<entry><emphasis>LATIN2</emphasis>,
- <literal>MULE_INTERNAL</literal>,
<literal>UTF8</literal>,
<literal>WIN1250</literal>
</entry>
<row>
<entry><literal>LATIN3</literal></entry>
<entry><emphasis>LATIN3</emphasis>,
- <literal>MULE_INTERNAL</literal>,
<literal>UTF8</literal>
</entry>
</row>
<row>
<entry><literal>LATIN4</literal></entry>
<entry><emphasis>LATIN4</emphasis>,
- <literal>MULE_INTERNAL</literal>,
<literal>UTF8</literal>
</entry>
</row>
<literal>UTF8</literal>
</entry>
</row>
- <row>
- <entry><literal>MULE_INTERNAL</literal></entry>
- <entry><emphasis>MULE_INTERNAL</emphasis>,
- <literal>BIG5</literal>,
- <literal>EUC_CN</literal>,
- <literal>EUC_JP</literal>,
- <literal>EUC_KR</literal>,
- <literal>EUC_TW</literal>,
- <literal>ISO_8859_5</literal>,
- <literal>KOI8R</literal>,
- <literal>LATIN1</literal> to <literal>LATIN4</literal>,
- <literal>SJIS</literal>,
- <literal>WIN866</literal>,
- <literal>WIN1250</literal>,
- <literal>WIN1251</literal>
- </entry>
- </row>
<row>
<entry><literal>SJIS</literal></entry>
<entry><emphasis>not supported as a server encoding</emphasis>
<entry><emphasis>WIN866</emphasis>,
<literal>ISO_8859_5</literal>,
<literal>KOI8R</literal>,
- <literal>MULE_INTERNAL</literal>,
<literal>UTF8</literal>,
<literal>WIN1251</literal>
</entry>
<entry><literal>WIN1250</literal></entry>
<entry><emphasis>WIN1250</emphasis>,
<literal>LATIN2</literal>,
- <literal>MULE_INTERNAL</literal>,
<literal>UTF8</literal>
</entry>
</row>
<entry><emphasis>WIN1251</emphasis>,
<literal>ISO_8859_5</literal>,
<literal>KOI8R</literal>,
- <literal>MULE_INTERNAL</literal>,
<literal>UTF8</literal>,
<literal>WIN866</literal>
</entry>
<entry><literal>BIG5</literal></entry>
<entry><literal>EUC_TW</literal></entry>
</row>
- <row>
- <entry><literal>big5_to_mic</literal></entry>
- <entry><literal>BIG5</literal></entry>
- <entry><literal>MULE_INTERNAL</literal></entry>
- </row>
<row>
<entry><literal>big5_to_utf8</literal></entry>
<entry><literal>BIG5</literal></entry>
<entry><literal>UTF8</literal></entry>
</row>
- <row>
- <entry><literal>euc_cn_to_mic</literal></entry>
- <entry><literal>EUC_CN</literal></entry>
- <entry><literal>MULE_INTERNAL</literal></entry>
- </row>
<row>
<entry><literal>euc_cn_to_utf8</literal></entry>
<entry><literal>EUC_CN</literal></entry>
<entry><literal>UTF8</literal></entry>
</row>
- <row>
- <entry><literal>euc_jp_to_mic</literal></entry>
- <entry><literal>EUC_JP</literal></entry>
- <entry><literal>MULE_INTERNAL</literal></entry>
- </row>
<row>
<entry><literal>euc_jp_to_sjis</literal></entry>
<entry><literal>EUC_JP</literal></entry>
<entry><literal>EUC_JP</literal></entry>
<entry><literal>UTF8</literal></entry>
</row>
- <row>
- <entry><literal>euc_kr_to_mic</literal></entry>
- <entry><literal>EUC_KR</literal></entry>
- <entry><literal>MULE_INTERNAL</literal></entry>
- </row>
<row>
<entry><literal>euc_kr_to_utf8</literal></entry>
<entry><literal>EUC_KR</literal></entry>
<entry><literal>EUC_TW</literal></entry>
<entry><literal>BIG5</literal></entry>
</row>
- <row>
- <entry><literal>euc_tw_to_mic</literal></entry>
- <entry><literal>EUC_TW</literal></entry>
- <entry><literal>MULE_INTERNAL</literal></entry>
- </row>
<row>
<entry><literal>euc_tw_to_utf8</literal></entry>
<entry><literal>EUC_TW</literal></entry>
<entry><literal>LATIN10</literal></entry>
<entry><literal>UTF8</literal></entry>
</row>
- <row>
- <entry><literal>iso_8859_1_to_mic</literal></entry>
- <entry><literal>LATIN1</literal></entry>
- <entry><literal>MULE_INTERNAL</literal></entry>
- </row>
<row>
<entry><literal>iso_8859_1_to_utf8</literal></entry>
<entry><literal>LATIN1</literal></entry>
<entry><literal>UTF8</literal></entry>
</row>
- <row>
- <entry><literal>iso_8859_2_to_mic</literal></entry>
- <entry><literal>LATIN2</literal></entry>
- <entry><literal>MULE_INTERNAL</literal></entry>
- </row>
<row>
<entry><literal>iso_8859_2_to_utf8</literal></entry>
<entry><literal>LATIN2</literal></entry>
<entry><literal>LATIN2</literal></entry>
<entry><literal>WIN1250</literal></entry>
</row>
- <row>
- <entry><literal>iso_8859_3_to_mic</literal></entry>
- <entry><literal>LATIN3</literal></entry>
- <entry><literal>MULE_INTERNAL</literal></entry>
- </row>
<row>
<entry><literal>iso_8859_3_to_utf8</literal></entry>
<entry><literal>LATIN3</literal></entry>
<entry><literal>UTF8</literal></entry>
</row>
- <row>
- <entry><literal>iso_8859_4_to_mic</literal></entry>
- <entry><literal>LATIN4</literal></entry>
- <entry><literal>MULE_INTERNAL</literal></entry>
- </row>
<row>
<entry><literal>iso_8859_4_to_utf8</literal></entry>
<entry><literal>LATIN4</literal></entry>
<entry><literal>ISO_8859_5</literal></entry>
<entry><literal>KOI8R</literal></entry>
</row>
- <row>
- <entry><literal>iso_8859_5_to_mic</literal></entry>
- <entry><literal>ISO_8859_5</literal></entry>
- <entry><literal>MULE_INTERNAL</literal></entry>
- </row>
<row>
<entry><literal>iso_8859_5_to_utf8</literal></entry>
<entry><literal>ISO_8859_5</literal></entry>
<entry><literal>KOI8R</literal></entry>
<entry><literal>ISO_8859_5</literal></entry>
</row>
- <row>
- <entry><literal>koi8_r_to_mic</literal></entry>
- <entry><literal>KOI8R</literal></entry>
- <entry><literal>MULE_INTERNAL</literal></entry>
- </row>
<row>
<entry><literal>koi8_r_to_utf8</literal></entry>
<entry><literal>KOI8R</literal></entry>
<entry><literal>KOI8U</literal></entry>
<entry><literal>UTF8</literal></entry>
</row>
- <row>
- <entry><literal>mic_to_big5</literal></entry>
- <entry><literal>MULE_INTERNAL</literal></entry>
- <entry><literal>BIG5</literal></entry>
- </row>
- <row>
- <entry><literal>mic_to_euc_cn</literal></entry>
- <entry><literal>MULE_INTERNAL</literal></entry>
- <entry><literal>EUC_CN</literal></entry>
- </row>
- <row>
- <entry><literal>mic_to_euc_jp</literal></entry>
- <entry><literal>MULE_INTERNAL</literal></entry>
- <entry><literal>EUC_JP</literal></entry>
- </row>
- <row>
- <entry><literal>mic_to_euc_kr</literal></entry>
- <entry><literal>MULE_INTERNAL</literal></entry>
- <entry><literal>EUC_KR</literal></entry>
- </row>
- <row>
- <entry><literal>mic_to_euc_tw</literal></entry>
- <entry><literal>MULE_INTERNAL</literal></entry>
- <entry><literal>EUC_TW</literal></entry>
- </row>
- <row>
- <entry><literal>mic_to_iso_8859_1</literal></entry>
- <entry><literal>MULE_INTERNAL</literal></entry>
- <entry><literal>LATIN1</literal></entry>
- </row>
- <row>
- <entry><literal>mic_to_iso_8859_2</literal></entry>
- <entry><literal>MULE_INTERNAL</literal></entry>
- <entry><literal>LATIN2</literal></entry>
- </row>
- <row>
- <entry><literal>mic_to_iso_8859_3</literal></entry>
- <entry><literal>MULE_INTERNAL</literal></entry>
- <entry><literal>LATIN3</literal></entry>
- </row>
- <row>
- <entry><literal>mic_to_iso_8859_4</literal></entry>
- <entry><literal>MULE_INTERNAL</literal></entry>
- <entry><literal>LATIN4</literal></entry>
- </row>
- <row>
- <entry><literal>mic_to_iso_8859_5</literal></entry>
- <entry><literal>MULE_INTERNAL</literal></entry>
- <entry><literal>ISO_8859_5</literal></entry>
- </row>
- <row>
- <entry><literal>mic_to_koi8_r</literal></entry>
- <entry><literal>MULE_INTERNAL</literal></entry>
- <entry><literal>KOI8R</literal></entry>
- </row>
- <row>
- <entry><literal>mic_to_sjis</literal></entry>
- <entry><literal>MULE_INTERNAL</literal></entry>
- <entry><literal>SJIS</literal></entry>
- </row>
- <row>
- <entry><literal>mic_to_windows_1250</literal></entry>
- <entry><literal>MULE_INTERNAL</literal></entry>
- <entry><literal>WIN1250</literal></entry>
- </row>
- <row>
- <entry><literal>mic_to_windows_1251</literal></entry>
- <entry><literal>MULE_INTERNAL</literal></entry>
- <entry><literal>WIN1251</literal></entry>
- </row>
- <row>
- <entry><literal>mic_to_windows_866</literal></entry>
- <entry><literal>MULE_INTERNAL</literal></entry>
- <entry><literal>WIN866</literal></entry>
- </row>
<row>
<entry><literal>sjis_to_euc_jp</literal></entry>
<entry><literal>SJIS</literal></entry>
<entry><literal>EUC_JP</literal></entry>
</row>
- <row>
- <entry><literal>sjis_to_mic</literal></entry>
- <entry><literal>SJIS</literal></entry>
- <entry><literal>MULE_INTERNAL</literal></entry>
- </row>
<row>
<entry><literal>sjis_to_utf8</literal></entry>
<entry><literal>SJIS</literal></entry>
<entry><literal>WIN1250</literal></entry>
<entry><literal>LATIN2</literal></entry>
</row>
- <row>
- <entry><literal>windows_1250_to_mic</literal></entry>
- <entry><literal>WIN1250</literal></entry>
- <entry><literal>MULE_INTERNAL</literal></entry>
- </row>
<row>
<entry><literal>windows_1250_to_utf8</literal></entry>
<entry><literal>WIN1250</literal></entry>
<entry><literal>WIN1251</literal></entry>
<entry><literal>KOI8R</literal></entry>
</row>
- <row>
- <entry><literal>windows_1251_to_mic</literal></entry>
- <entry><literal>WIN1251</literal></entry>
- <entry><literal>MULE_INTERNAL</literal></entry>
- </row>
<row>
<entry><literal>windows_1251_to_utf8</literal></entry>
<entry><literal>WIN1251</literal></entry>
<entry><literal>WIN866</literal></entry>
<entry><literal>KOI8R</literal></entry>
</row>
- <row>
- <entry><literal>windows_866_to_mic</literal></entry>
- <entry><literal>WIN866</literal></entry>
- <entry><literal>MULE_INTERNAL</literal></entry>
- </row>
<row>
<entry><literal>windows_866_to_utf8</literal></entry>
<entry><literal>WIN866</literal></entry>
return l - start;
}
-/*
- * LATINn ---> MIC when the charset's local codes map directly to MIC
- *
- * l points to the source string of length len
- * p is the output area (must be large enough!)
- * lc is the mule character set id for the local encoding
- * encoding is the PG identifier for the local encoding
- *
- * Returns the number of input bytes consumed. If noError is true, this can
- * be less than 'len'.
- */
-int
-latin2mic(const unsigned char *l, unsigned char *p, int len,
- int lc, int encoding, bool noError)
-{
- const unsigned char *start = l;
- int c1;
-
- while (len > 0)
- {
- c1 = *l;
- if (c1 == 0)
- {
- if (noError)
- break;
- report_invalid_encoding(encoding, (const char *) l, len);
- }
- if (IS_HIGHBIT_SET(c1))
- *p++ = lc;
- *p++ = c1;
- l++;
- len--;
- }
- *p = '\0';
-
- return l - start;
-}
-
-/*
- * MIC ---> LATINn when the charset's local codes map directly to MIC
- *
- * mic points to the source string of length len
- * p is the output area (must be large enough!)
- * lc is the mule character set id for the local encoding
- * encoding is the PG identifier for the local encoding
- *
- * Returns the number of input bytes consumed. If noError is true, this can
- * be less than 'len'.
- */
-int
-mic2latin(const unsigned char *mic, unsigned char *p, int len,
- int lc, int encoding, bool noError)
-{
- const unsigned char *start = mic;
- int c1;
-
- while (len > 0)
- {
- c1 = *mic;
- if (c1 == 0)
- {
- if (noError)
- break;
- report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len);
- }
- if (!IS_HIGHBIT_SET(c1))
- {
- /* easy for ASCII */
- *p++ = c1;
- mic++;
- len--;
- }
- else
- {
- int l = pg_mule_mblen(mic);
-
- if (len < l)
- {
- if (noError)
- break;
- report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic,
- len);
- }
- if (l != 2 || c1 != lc || !IS_HIGHBIT_SET(mic[1]))
- {
- if (noError)
- break;
- report_untranslatable_char(PG_MULE_INTERNAL, encoding,
- (const char *) mic, len);
- }
- *p++ = mic[1];
- mic += 2;
- len -= 2;
- }
- }
- *p = '\0';
-
- return mic - start;
-}
-
-
-/*
- * latin2mic_with_table: a generic single byte charset encoding
- * conversion from a local charset to the mule internal code.
- *
- * l points to the source string of length len
- * p is the output area (must be large enough!)
- * lc is the mule character set id for the local encoding
- * encoding is the PG identifier for the local encoding
- * tab holds conversion entries for the local charset
- * starting from 128 (0x80). each entry in the table holds the corresponding
- * code point for the mule encoding, or 0 if there is no equivalent code.
- *
- * Returns the number of input bytes consumed. If noError is true, this can
- * be less than 'len'.
- */
-int
-latin2mic_with_table(const unsigned char *l,
- unsigned char *p,
- int len,
- int lc,
- int encoding,
- const unsigned char *tab,
- bool noError)
-{
- const unsigned char *start = l;
- unsigned char c1,
- c2;
-
- while (len > 0)
- {
- c1 = *l;
- if (c1 == 0)
- {
- if (noError)
- break;
- report_invalid_encoding(encoding, (const char *) l, len);
- }
- if (!IS_HIGHBIT_SET(c1))
- *p++ = c1;
- else
- {
- c2 = tab[c1 - HIGHBIT];
- if (c2)
- {
- *p++ = lc;
- *p++ = c2;
- }
- else
- {
- if (noError)
- break;
- report_untranslatable_char(encoding, PG_MULE_INTERNAL,
- (const char *) l, len);
- }
- }
- l++;
- len--;
- }
- *p = '\0';
-
- return l - start;
-}
-
-/*
- * mic2latin_with_table: a generic single byte charset encoding
- * conversion from the mule internal code to a local charset.
- *
- * mic points to the source string of length len
- * p is the output area (must be large enough!)
- * lc is the mule character set id for the local encoding
- * encoding is the PG identifier for the local encoding
- * tab holds conversion entries for the mule internal code's second byte,
- * starting from 128 (0x80). each entry in the table holds the corresponding
- * code point for the local charset, or 0 if there is no equivalent code.
- *
- * Returns the number of input bytes consumed. If noError is true, this can
- * be less than 'len'.
- */
-int
-mic2latin_with_table(const unsigned char *mic,
- unsigned char *p,
- int len,
- int lc,
- int encoding,
- const unsigned char *tab,
- bool noError)
-{
- const unsigned char *start = mic;
- unsigned char c1,
- c2;
-
- while (len > 0)
- {
- c1 = *mic;
- if (c1 == 0)
- {
- if (noError)
- break;
- report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len);
- }
- if (!IS_HIGHBIT_SET(c1))
- {
- /* easy for ASCII */
- *p++ = c1;
- mic++;
- len--;
- }
- else
- {
- int l = pg_mule_mblen(mic);
-
- if (len < l)
- {
- if (noError)
- break;
- report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic,
- len);
- }
- if (l != 2 || c1 != lc || !IS_HIGHBIT_SET(mic[1]) ||
- (c2 = tab[mic[1] - HIGHBIT]) == 0)
- {
- if (noError)
- break;
- report_untranslatable_char(PG_MULE_INTERNAL, encoding,
- (const char *) mic, len);
- break; /* keep compiler quiet */
- }
- *p++ = c2;
- mic += 2;
- len -= 2;
- }
- }
- *p = '\0';
-
- return mic - start;
-}
-
/*
* comparison routine for bsearch()
* this routine is intended for combined UTF8 -> local code
include $(top_builddir)/src/Makefile.global
SUBDIRS = \
- cyrillic_and_mic euc_cn_and_mic euc_jp_and_sjis \
- euc_kr_and_mic euc_tw_and_big5 latin2_and_win1250 latin_and_mic \
+ cyrillic euc_jp_and_sjis \
+ euc_tw_and_big5 latin2_and_win1250 \
utf8_and_big5 utf8_and_cyrillic utf8_and_euc_cn \
utf8_and_euc_jp utf8_and_euc_kr utf8_and_euc_tw utf8_and_gb18030 \
utf8_and_gbk utf8_and_iso8859 utf8_and_iso8859_1 utf8_and_johab \
#-------------------------------------------------------------------------
#
-# src/backend/utils/mb/conversion_procs/latin_and_mic/Makefile
+# src/backend/utils/mb/conversion_procs/cyrillic/Makefile
#
#-------------------------------------------------------------------------
-subdir = src/backend/utils/mb/conversion_procs/latin_and_mic
+subdir = src/backend/utils/mb/conversion_procs/cyrillic
top_builddir = ../../../../../..
include $(top_builddir)/src/Makefile.global
-NAME = latin_and_mic
-PGFILEDESC = "latin <-> mic text conversions"
+NAME = cyrillic
+PGFILEDESC = "cyrillic single-byte conversions"
include $(srcdir)/../proc.mk
/*-------------------------------------------------------------------------
*
- * Cyrillic and MULE_INTERNAL
+ * KOI8R, WIN1251, WIN866 and ISO_8859_5
*
* Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * src/backend/utils/mb/conversion_procs/cyrillic_and_mic/cyrillic_and_mic.c
+ * src/backend/utils/mb/conversion_procs/cyrillic/cyrillic.c
*
*-------------------------------------------------------------------------
*/
#include "mb/pg_wchar.h"
PG_MODULE_MAGIC_EXT(
- .name = "cyrillic_and_mic",
+ .name = "cyrillic",
.version = PG_VERSION
);
-PG_FUNCTION_INFO_V1(koi8r_to_mic);
-PG_FUNCTION_INFO_V1(mic_to_koi8r);
-PG_FUNCTION_INFO_V1(iso_to_mic);
-PG_FUNCTION_INFO_V1(mic_to_iso);
-PG_FUNCTION_INFO_V1(win1251_to_mic);
-PG_FUNCTION_INFO_V1(mic_to_win1251);
-PG_FUNCTION_INFO_V1(win866_to_mic);
-PG_FUNCTION_INFO_V1(mic_to_win866);
PG_FUNCTION_INFO_V1(koi8r_to_win1251);
PG_FUNCTION_INFO_V1(win1251_to_koi8r);
PG_FUNCTION_INFO_V1(koi8r_to_win866);
* Cyrillic support
* currently supported Cyrillic encodings:
*
- * KOI8-R (this is also the charset for the mule internal code for Cyrillic)
+ * KOI8-R
* ISO-8859-5
* Microsoft's CP1251 (windows-1251)
* Alternativny Variant (MS-DOS CP866)
};
-Datum
-koi8r_to_mic(PG_FUNCTION_ARGS)
-{
- unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
- unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
- int len = PG_GETARG_INT32(4);
- bool noError = PG_GETARG_BOOL(5);
- int converted;
-
- CHECK_ENCODING_CONVERSION_ARGS(PG_KOI8R, PG_MULE_INTERNAL);
-
- converted = latin2mic(src, dest, len, LC_KOI8_R, PG_KOI8R, noError);
-
- PG_RETURN_INT32(converted);
-}
-
-Datum
-mic_to_koi8r(PG_FUNCTION_ARGS)
-{
- unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
- unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
- int len = PG_GETARG_INT32(4);
- bool noError = PG_GETARG_BOOL(5);
- int converted;
-
- CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_KOI8R);
-
- converted = mic2latin(src, dest, len, LC_KOI8_R, PG_KOI8R, noError);
-
- PG_RETURN_INT32(converted);
-}
-
-Datum
-iso_to_mic(PG_FUNCTION_ARGS)
-{
- unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
- unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
- int len = PG_GETARG_INT32(4);
- bool noError = PG_GETARG_BOOL(5);
- int converted;
-
- CHECK_ENCODING_CONVERSION_ARGS(PG_ISO_8859_5, PG_MULE_INTERNAL);
-
- converted = latin2mic_with_table(src, dest, len, LC_KOI8_R, PG_ISO_8859_5, iso2koi, noError);
-
- PG_RETURN_INT32(converted);
-}
-
-Datum
-mic_to_iso(PG_FUNCTION_ARGS)
-{
- unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
- unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
- int len = PG_GETARG_INT32(4);
- bool noError = PG_GETARG_BOOL(5);
- int converted;
-
- CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_ISO_8859_5);
-
- converted = mic2latin_with_table(src, dest, len, LC_KOI8_R, PG_ISO_8859_5, koi2iso, noError);
-
- PG_RETURN_INT32(converted);
-}
-
-Datum
-win1251_to_mic(PG_FUNCTION_ARGS)
-{
- unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
- unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
- int len = PG_GETARG_INT32(4);
- bool noError = PG_GETARG_BOOL(5);
- int converted;
-
- CHECK_ENCODING_CONVERSION_ARGS(PG_WIN1251, PG_MULE_INTERNAL);
-
- converted = latin2mic_with_table(src, dest, len, LC_KOI8_R, PG_WIN1251, win12512koi, noError);
-
- PG_RETURN_INT32(converted);
-}
-
-Datum
-mic_to_win1251(PG_FUNCTION_ARGS)
-{
- unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
- unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
- int len = PG_GETARG_INT32(4);
- bool noError = PG_GETARG_BOOL(5);
- int converted;
-
- CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_WIN1251);
-
- converted = mic2latin_with_table(src, dest, len, LC_KOI8_R, PG_WIN1251, koi2win1251, noError);
-
- PG_RETURN_INT32(converted);
-}
-
-Datum
-win866_to_mic(PG_FUNCTION_ARGS)
-{
- unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
- unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
- int len = PG_GETARG_INT32(4);
- bool noError = PG_GETARG_BOOL(5);
- int converted;
-
- CHECK_ENCODING_CONVERSION_ARGS(PG_WIN866, PG_MULE_INTERNAL);
-
- converted = latin2mic_with_table(src, dest, len, LC_KOI8_R, PG_WIN866, win8662koi, noError);
-
- PG_RETURN_INT32(converted);
-}
-
-Datum
-mic_to_win866(PG_FUNCTION_ARGS)
-{
- unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
- unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
- int len = PG_GETARG_INT32(4);
- bool noError = PG_GETARG_BOOL(5);
- int converted;
-
- CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_WIN866);
-
- converted = mic2latin_with_table(src, dest, len, LC_KOI8_R, PG_WIN866, koi2win866, noError);
-
- PG_RETURN_INT32(converted);
-}
-
Datum
koi8r_to_win1251(PG_FUNCTION_ARGS)
{
+++ /dev/null
-#-------------------------------------------------------------------------
-#
-# src/backend/utils/mb/conversion_procs/cyrillic_and_mic/Makefile
-#
-#-------------------------------------------------------------------------
-subdir = src/backend/utils/mb/conversion_procs/cyrillic_and_mic
-top_builddir = ../../../../../..
-include $(top_builddir)/src/Makefile.global
-
-NAME = cyrillic_and_mic
-PGFILEDESC = "cyrillic <-> mic text conversions"
-
-include $(srcdir)/../proc.mk
+++ /dev/null
-#-------------------------------------------------------------------------
-#
-# src/backend/utils/mb/conversion_procs/euc_cn_and_mic/Makefile
-#
-#-------------------------------------------------------------------------
-subdir = src/backend/utils/mb/conversion_procs/euc_cn_and_mic
-top_builddir = ../../../../../..
-include $(top_builddir)/src/Makefile.global
-
-NAME = euc_cn_and_mic
-PGFILEDESC = "euc_cn <-> mic text conversions"
-
-include $(srcdir)/../proc.mk
+++ /dev/null
-/*-------------------------------------------------------------------------
- *
- * EUC_CN and MULE_INTERNAL
- *
- * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
- * Portions Copyright (c) 1994, Regents of the University of California
- *
- * IDENTIFICATION
- * src/backend/utils/mb/conversion_procs/euc_cn_and_mic/euc_cn_and_mic.c
- *
- *-------------------------------------------------------------------------
- */
-
-#include "postgres.h"
-#include "fmgr.h"
-#include "mb/pg_wchar.h"
-
-PG_MODULE_MAGIC_EXT(
- .name = "euc_cn_and_mic",
- .version = PG_VERSION
-);
-
-PG_FUNCTION_INFO_V1(euc_cn_to_mic);
-PG_FUNCTION_INFO_V1(mic_to_euc_cn);
-
-/* ----------
- * conv_proc(
- * INTEGER, -- source encoding id
- * INTEGER, -- destination encoding id
- * CSTRING, -- source string (null terminated C string)
- * CSTRING, -- destination string (null terminated C string)
- * INTEGER, -- source string length
- * BOOL -- if true, don't throw an error if conversion fails
- * ) returns INTEGER;
- *
- * Returns the number of bytes successfully converted.
- * ----------
- */
-
-static int euc_cn2mic(const unsigned char *euc, unsigned char *p, int len, bool noError);
-static int mic2euc_cn(const unsigned char *mic, unsigned char *p, int len, bool noError);
-
-Datum
-euc_cn_to_mic(PG_FUNCTION_ARGS)
-{
- unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
- unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
- int len = PG_GETARG_INT32(4);
- bool noError = PG_GETARG_BOOL(5);
- int converted;
-
- CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_CN, PG_MULE_INTERNAL);
-
- converted = euc_cn2mic(src, dest, len, noError);
-
- PG_RETURN_INT32(converted);
-}
-
-Datum
-mic_to_euc_cn(PG_FUNCTION_ARGS)
-{
- unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
- unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
- int len = PG_GETARG_INT32(4);
- bool noError = PG_GETARG_BOOL(5);
- int converted;
-
- CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_EUC_CN);
-
- converted = mic2euc_cn(src, dest, len, noError);
-
- PG_RETURN_INT32(converted);
-}
-
-/*
- * EUC_CN ---> MIC
- */
-static int
-euc_cn2mic(const unsigned char *euc, unsigned char *p, int len, bool noError)
-{
- const unsigned char *start = euc;
- int c1;
-
- while (len > 0)
- {
- c1 = *euc;
- if (IS_HIGHBIT_SET(c1))
- {
- if (len < 2 || !IS_HIGHBIT_SET(euc[1]))
- {
- if (noError)
- break;
- report_invalid_encoding(PG_EUC_CN, (const char *) euc, len);
- }
- *p++ = LC_GB2312_80;
- *p++ = c1;
- *p++ = euc[1];
- euc += 2;
- len -= 2;
- }
- else
- { /* should be ASCII */
- if (c1 == 0)
- {
- if (noError)
- break;
- report_invalid_encoding(PG_EUC_CN, (const char *) euc, len);
- }
- *p++ = c1;
- euc++;
- len--;
- }
- }
- *p = '\0';
-
- return euc - start;
-}
-
-/*
- * MIC ---> EUC_CN
- */
-static int
-mic2euc_cn(const unsigned char *mic, unsigned char *p, int len, bool noError)
-{
- const unsigned char *start = mic;
- int c1;
-
- while (len > 0)
- {
- c1 = *mic;
- if (IS_HIGHBIT_SET(c1))
- {
- if (c1 != LC_GB2312_80)
- {
- if (noError)
- break;
- report_untranslatable_char(PG_MULE_INTERNAL, PG_EUC_CN,
- (const char *) mic, len);
- }
- if (len < 3 || !IS_HIGHBIT_SET(mic[1]) || !IS_HIGHBIT_SET(mic[2]))
- {
- if (noError)
- break;
- report_invalid_encoding(PG_MULE_INTERNAL,
- (const char *) mic, len);
- }
- mic++;
- *p++ = *mic++;
- *p++ = *mic++;
- len -= 3;
- }
- else
- { /* should be ASCII */
- if (c1 == 0)
- {
- if (noError)
- break;
- report_invalid_encoding(PG_MULE_INTERNAL,
- (const char *) mic, len);
- }
- *p++ = c1;
- mic++;
- len--;
- }
- }
- *p = '\0';
-
- return mic - start;
-}
/*-------------------------------------------------------------------------
*
- * EUC_JP, SJIS and MULE_INTERNAL
+ * EUC_JP and SJIS
*
* Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
PG_FUNCTION_INFO_V1(euc_jp_to_sjis);
PG_FUNCTION_INFO_V1(sjis_to_euc_jp);
-PG_FUNCTION_INFO_V1(euc_jp_to_mic);
-PG_FUNCTION_INFO_V1(mic_to_euc_jp);
-PG_FUNCTION_INFO_V1(sjis_to_mic);
-PG_FUNCTION_INFO_V1(mic_to_sjis);
/* ----------
* conv_proc(
* ----------
*/
-static int sjis2mic(const unsigned char *sjis, unsigned char *p, int len, bool noError);
-static int mic2sjis(const unsigned char *mic, unsigned char *p, int len, bool noError);
-static int euc_jp2mic(const unsigned char *euc, unsigned char *p, int len, bool noError);
-static int mic2euc_jp(const unsigned char *mic, unsigned char *p, int len, bool noError);
static int euc_jp2sjis(const unsigned char *euc, unsigned char *p, int len, bool noError);
static int sjis2euc_jp(const unsigned char *sjis, unsigned char *p, int len, bool noError);
PG_RETURN_INT32(converted);
}
-Datum
-euc_jp_to_mic(PG_FUNCTION_ARGS)
-{
- unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
- unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
- int len = PG_GETARG_INT32(4);
- bool noError = PG_GETARG_BOOL(5);
- int converted;
-
- CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_JP, PG_MULE_INTERNAL);
-
- converted = euc_jp2mic(src, dest, len, noError);
-
- PG_RETURN_INT32(converted);
-}
-
-Datum
-mic_to_euc_jp(PG_FUNCTION_ARGS)
-{
- unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
- unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
- int len = PG_GETARG_INT32(4);
- bool noError = PG_GETARG_BOOL(5);
- int converted;
-
- CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_EUC_JP);
-
- converted = mic2euc_jp(src, dest, len, noError);
-
- PG_RETURN_INT32(converted);
-}
-
-Datum
-sjis_to_mic(PG_FUNCTION_ARGS)
-{
- unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
- unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
- int len = PG_GETARG_INT32(4);
- bool noError = PG_GETARG_BOOL(5);
- int converted;
-
- CHECK_ENCODING_CONVERSION_ARGS(PG_SJIS, PG_MULE_INTERNAL);
-
- converted = sjis2mic(src, dest, len, noError);
-
- PG_RETURN_INT32(converted);
-}
-
-Datum
-mic_to_sjis(PG_FUNCTION_ARGS)
-{
- unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
- unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
- int len = PG_GETARG_INT32(4);
- bool noError = PG_GETARG_BOOL(5);
- int converted;
-
- CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_SJIS);
-
- converted = mic2sjis(src, dest, len, noError);
-
- PG_RETURN_INT32(converted);
-}
-
-/*
- * SJIS ---> MIC
- */
-static int
-sjis2mic(const unsigned char *sjis, unsigned char *p, int len, bool noError)
-{
- const unsigned char *start = sjis;
- int c1,
- c2,
- i,
- k,
- k2;
-
- while (len > 0)
- {
- c1 = *sjis;
- if (c1 >= 0xa1 && c1 <= 0xdf)
- {
- /* JIS X0201 (1 byte kana) */
- *p++ = LC_JISX0201K;
- *p++ = c1;
- sjis++;
- len--;
- }
- else if (IS_HIGHBIT_SET(c1))
- {
- /*
- * JIS X0208, X0212, user defined extended characters
- */
- if (len < 2 || !ISSJISHEAD(c1) || !ISSJISTAIL(sjis[1]))
- {
- if (noError)
- break;
- report_invalid_encoding(PG_SJIS, (const char *) sjis, len);
- }
- c2 = sjis[1];
- k = (c1 << 8) + c2;
- if (k >= 0xed40 && k < 0xf040)
- {
- /* NEC selection IBM kanji */
- for (i = 0;; i++)
- {
- k2 = ibmkanji[i].nec;
- if (k2 == 0xffff)
- break;
- if (k2 == k)
- {
- k = ibmkanji[i].sjis;
- c1 = (k >> 8) & 0xff;
- c2 = k & 0xff;
- }
- }
- }
-
- if (k < 0xeb3f)
- {
- /* JIS X0208 */
- *p++ = LC_JISX0208;
- *p++ = ((c1 & 0x3f) << 1) + 0x9f + (c2 > 0x9e);
- *p++ = c2 + ((c2 > 0x9e) ? 2 : 0x60) + (c2 < 0x80);
- }
- else if ((k >= 0xeb40 && k < 0xf040) || (k >= 0xfc4c && k <= 0xfcfc))
- {
- /* NEC selection IBM kanji - Other undecided justice */
- *p++ = LC_JISX0208;
- *p++ = PGEUCALTCODE >> 8;
- *p++ = PGEUCALTCODE & 0xff;
- }
- else if (k >= 0xf040 && k < 0xf540)
- {
- /*
- * UDC1 mapping to X0208 85 ku - 94 ku JIS code 0x7521 -
- * 0x7e7e EUC 0xf5a1 - 0xfefe
- */
- *p++ = LC_JISX0208;
- c1 -= 0x6f;
- *p++ = ((c1 & 0x3f) << 1) + 0xf3 + (c2 > 0x9e);
- *p++ = c2 + ((c2 > 0x9e) ? 2 : 0x60) + (c2 < 0x80);
- }
- else if (k >= 0xf540 && k < 0xfa40)
- {
- /*
- * UDC2 mapping to X0212 85 ku - 94 ku JIS code 0x7521 -
- * 0x7e7e EUC 0x8ff5a1 - 0x8ffefe
- */
- *p++ = LC_JISX0212;
- c1 -= 0x74;
- *p++ = ((c1 & 0x3f) << 1) + 0xf3 + (c2 > 0x9e);
- *p++ = c2 + ((c2 > 0x9e) ? 2 : 0x60) + (c2 < 0x80);
- }
- else if (k >= 0xfa40)
- {
- /*
- * mapping IBM kanji to X0208 and X0212
- */
- for (i = 0;; i++)
- {
- k2 = ibmkanji[i].sjis;
- if (k2 == 0xffff)
- break;
- if (k2 == k)
- {
- k = ibmkanji[i].euc;
- if (k >= 0x8f0000)
- {
- *p++ = LC_JISX0212;
- *p++ = 0x80 | ((k & 0xff00) >> 8);
- *p++ = 0x80 | (k & 0xff);
- }
- else
- {
- *p++ = LC_JISX0208;
- *p++ = 0x80 | (k >> 8);
- *p++ = 0x80 | (k & 0xff);
- }
- }
- }
- }
- sjis += 2;
- len -= 2;
- }
- else
- { /* should be ASCII */
- if (c1 == 0)
- {
- if (noError)
- break;
- report_invalid_encoding(PG_SJIS, (const char *) sjis, len);
- }
- *p++ = c1;
- sjis++;
- len--;
- }
- }
- *p = '\0';
-
- return sjis - start;
-}
-
-/*
- * MIC ---> SJIS
- */
-static int
-mic2sjis(const unsigned char *mic, unsigned char *p, int len, bool noError)
-{
- const unsigned char *start = mic;
- int c1,
- c2,
- k,
- l;
-
- while (len > 0)
- {
- c1 = *mic;
- if (!IS_HIGHBIT_SET(c1))
- {
- /* ASCII */
- if (c1 == 0)
- {
- if (noError)
- break;
- report_invalid_encoding(PG_MULE_INTERNAL,
- (const char *) mic, len);
- }
- *p++ = c1;
- mic++;
- len--;
- continue;
- }
- l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len);
- if (l < 0)
- {
- if (noError)
- break;
- report_invalid_encoding(PG_MULE_INTERNAL,
- (const char *) mic, len);
- }
- if (c1 == LC_JISX0201K)
- *p++ = mic[1];
- else if (c1 == LC_JISX0208)
- {
- c1 = mic[1];
- c2 = mic[2];
- k = (c1 << 8) | (c2 & 0xff);
- if (k >= 0xf5a1)
- {
- /* UDC1 */
- c1 -= 0x54;
- *p++ = ((c1 - 0xa1) >> 1) + ((c1 < 0xdf) ? 0x81 : 0xc1) + 0x6f;
- }
- else
- *p++ = ((c1 - 0xa1) >> 1) + ((c1 < 0xdf) ? 0x81 : 0xc1);
- *p++ = c2 - ((c1 & 1) ? ((c2 < 0xe0) ? 0x61 : 0x60) : 2);
- }
- else if (c1 == LC_JISX0212)
- {
- int i,
- k2;
-
- c1 = mic[1];
- c2 = mic[2];
- k = c1 << 8 | c2;
- if (k >= 0xf5a1)
- {
- /* UDC2 */
- c1 -= 0x54;
- *p++ = ((c1 - 0xa1) >> 1) + ((c1 < 0xdf) ? 0x81 : 0xc1) + 0x74;
- *p++ = c2 - ((c1 & 1) ? ((c2 < 0xe0) ? 0x61 : 0x60) : 2);
- }
- else
- {
- /* IBM kanji */
- for (i = 0;; i++)
- {
- k2 = ibmkanji[i].euc & 0xffff;
- if (k2 == 0xffff)
- {
- *p++ = PGSJISALTCODE >> 8;
- *p++ = PGSJISALTCODE & 0xff;
- break;
- }
- if (k2 == k)
- {
- k = ibmkanji[i].sjis;
- *p++ = k >> 8;
- *p++ = k & 0xff;
- break;
- }
- }
- }
- }
- else
- {
- if (noError)
- break;
- report_untranslatable_char(PG_MULE_INTERNAL, PG_SJIS,
- (const char *) mic, len);
- }
- mic += l;
- len -= l;
- }
- *p = '\0';
-
- return mic - start;
-}
-
-/*
- * EUC_JP ---> MIC
- */
-static int
-euc_jp2mic(const unsigned char *euc, unsigned char *p, int len, bool noError)
-{
- const unsigned char *start = euc;
- int c1;
- int l;
-
- while (len > 0)
- {
- c1 = *euc;
- if (!IS_HIGHBIT_SET(c1))
- {
- /* ASCII */
- if (c1 == 0)
- {
- if (noError)
- break;
- report_invalid_encoding(PG_EUC_JP,
- (const char *) euc, len);
- }
- *p++ = c1;
- euc++;
- len--;
- continue;
- }
- l = pg_encoding_verifymbchar(PG_EUC_JP, (const char *) euc, len);
- if (l < 0)
- {
- if (noError)
- break;
- report_invalid_encoding(PG_EUC_JP,
- (const char *) euc, len);
- }
- if (c1 == SS2)
- { /* 1 byte kana? */
- *p++ = LC_JISX0201K;
- *p++ = euc[1];
- }
- else if (c1 == SS3)
- { /* JIS X0212 kanji? */
- *p++ = LC_JISX0212;
- *p++ = euc[1];
- *p++ = euc[2];
- }
- else
- { /* kanji? */
- *p++ = LC_JISX0208;
- *p++ = c1;
- *p++ = euc[1];
- }
- euc += l;
- len -= l;
- }
- *p = '\0';
-
- return euc - start;
-}
-
-/*
- * MIC ---> EUC_JP
- */
-static int
-mic2euc_jp(const unsigned char *mic, unsigned char *p, int len, bool noError)
-{
- const unsigned char *start = mic;
- int c1;
- int l;
-
- while (len > 0)
- {
- c1 = *mic;
- if (!IS_HIGHBIT_SET(c1))
- {
- /* ASCII */
- if (c1 == 0)
- {
- if (noError)
- break;
- report_invalid_encoding(PG_MULE_INTERNAL,
- (const char *) mic, len);
- }
- *p++ = c1;
- mic++;
- len--;
- continue;
- }
- l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len);
- if (l < 0)
- {
- if (noError)
- break;
- report_invalid_encoding(PG_MULE_INTERNAL,
- (const char *) mic, len);
- }
- if (c1 == LC_JISX0201K)
- {
- *p++ = SS2;
- *p++ = mic[1];
- }
- else if (c1 == LC_JISX0212)
- {
- *p++ = SS3;
- *p++ = mic[1];
- *p++ = mic[2];
- }
- else if (c1 == LC_JISX0208)
- {
- *p++ = mic[1];
- *p++ = mic[2];
- }
- else
- {
- if (noError)
- break;
- report_untranslatable_char(PG_MULE_INTERNAL, PG_EUC_JP,
- (const char *) mic, len);
- }
- mic += l;
- len -= l;
- }
- *p = '\0';
-
- return mic - start;
-}
-
/*
* EUC_JP -> SJIS
*/
+++ /dev/null
-#-------------------------------------------------------------------------
-#
-# src/backend/utils/mb/conversion_procs/euc_kr_and_mic/Makefile
-#
-#-------------------------------------------------------------------------
-subdir = src/backend/utils/mb/conversion_procs/euc_kr_and_mic
-top_builddir = ../../../../../..
-include $(top_builddir)/src/Makefile.global
-
-NAME = euc_kr_and_mic
-PGFILEDESC = "euc_kr <-> mic text conversions"
-
-include $(srcdir)/../proc.mk
+++ /dev/null
-/*-------------------------------------------------------------------------
- *
- * EUC_KR and MULE_INTERNAL
- *
- * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
- * Portions Copyright (c) 1994, Regents of the University of California
- *
- * IDENTIFICATION
- * src/backend/utils/mb/conversion_procs/euc_kr_and_mic/euc_kr_and_mic.c
- *
- *-------------------------------------------------------------------------
- */
-
-#include "postgres.h"
-#include "fmgr.h"
-#include "mb/pg_wchar.h"
-
-PG_MODULE_MAGIC_EXT(
- .name = "euc_kr_and_mic",
- .version = PG_VERSION
-);
-
-PG_FUNCTION_INFO_V1(euc_kr_to_mic);
-PG_FUNCTION_INFO_V1(mic_to_euc_kr);
-
-/* ----------
- * conv_proc(
- * INTEGER, -- source encoding id
- * INTEGER, -- destination encoding id
- * CSTRING, -- source string (null terminated C string)
- * CSTRING, -- destination string (null terminated C string)
- * INTEGER, -- source string length
- * BOOL -- if true, don't throw an error if conversion fails
- * ) returns INTEGER;
- *
- * Returns the number of bytes successfully converted.
- * ----------
- */
-
-static int euc_kr2mic(const unsigned char *euc, unsigned char *p, int len, bool noError);
-static int mic2euc_kr(const unsigned char *mic, unsigned char *p, int len, bool noError);
-
-Datum
-euc_kr_to_mic(PG_FUNCTION_ARGS)
-{
- unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
- unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
- int len = PG_GETARG_INT32(4);
- bool noError = PG_GETARG_BOOL(5);
- int converted;
-
- CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_KR, PG_MULE_INTERNAL);
-
- converted = euc_kr2mic(src, dest, len, noError);
-
- PG_RETURN_INT32(converted);
-}
-
-Datum
-mic_to_euc_kr(PG_FUNCTION_ARGS)
-{
- unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
- unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
- int len = PG_GETARG_INT32(4);
- bool noError = PG_GETARG_BOOL(5);
- int converted;
-
- CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_EUC_KR);
-
- converted = mic2euc_kr(src, dest, len, noError);
-
- PG_RETURN_INT32(converted);
-}
-
-/*
- * EUC_KR ---> MIC
- */
-static int
-euc_kr2mic(const unsigned char *euc, unsigned char *p, int len, bool noError)
-{
- const unsigned char *start = euc;
- int c1;
- int l;
-
- while (len > 0)
- {
- c1 = *euc;
- if (IS_HIGHBIT_SET(c1))
- {
- l = pg_encoding_verifymbchar(PG_EUC_KR, (const char *) euc, len);
- if (l != 2)
- {
- if (noError)
- break;
- report_invalid_encoding(PG_EUC_KR,
- (const char *) euc, len);
- }
- *p++ = LC_KS5601;
- *p++ = c1;
- *p++ = euc[1];
- euc += 2;
- len -= 2;
- }
- else
- { /* should be ASCII */
- if (c1 == 0)
- {
- if (noError)
- break;
- report_invalid_encoding(PG_EUC_KR,
- (const char *) euc, len);
- }
- *p++ = c1;
- euc++;
- len--;
- }
- }
- *p = '\0';
-
- return euc - start;
-}
-
-/*
- * MIC ---> EUC_KR
- */
-static int
-mic2euc_kr(const unsigned char *mic, unsigned char *p, int len, bool noError)
-{
- const unsigned char *start = mic;
- int c1;
- int l;
-
- while (len > 0)
- {
- c1 = *mic;
- if (!IS_HIGHBIT_SET(c1))
- {
- /* ASCII */
- if (c1 == 0)
- {
- if (noError)
- break;
- report_invalid_encoding(PG_MULE_INTERNAL,
- (const char *) mic, len);
- }
- *p++ = c1;
- mic++;
- len--;
- continue;
- }
- l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len);
- if (l < 0)
- {
- if (noError)
- break;
- report_invalid_encoding(PG_MULE_INTERNAL,
- (const char *) mic, len);
- }
- if (c1 == LC_KS5601)
- {
- *p++ = mic[1];
- *p++ = mic[2];
- }
- else
- {
- if (noError)
- break;
- report_untranslatable_char(PG_MULE_INTERNAL, PG_EUC_KR,
- (const char *) mic, len);
- }
- mic += l;
- len -= l;
- }
- *p = '\0';
-
- return mic - start;
-}
/*
- * conversion between BIG5 and Mule Internal Code(CNS 116643-1992
- * plane 1 and plane 2).
+ * BIG5 support functions (CNS 116643-1992 * plane 1 and plane 2).
* This program is partially copied from lv(Multilingual file viewer)
* and slightly modified. lv is written and copyrighted by NARITA Tomio
* (nrt@web.ad.jp).
/*-------------------------------------------------------------------------
*
- * EUC_TW, BIG5 and MULE_INTERNAL
+ * EUC_TW and BIG5
*
* Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
PG_FUNCTION_INFO_V1(euc_tw_to_big5);
PG_FUNCTION_INFO_V1(big5_to_euc_tw);
-PG_FUNCTION_INFO_V1(euc_tw_to_mic);
-PG_FUNCTION_INFO_V1(mic_to_euc_tw);
-PG_FUNCTION_INFO_V1(big5_to_mic);
-PG_FUNCTION_INFO_V1(mic_to_big5);
/* ----------
* conv_proc(
static int euc_tw2big5(const unsigned char *euc, unsigned char *p, int len, bool noError);
static int big52euc_tw(const unsigned char *big5, unsigned char *p, int len, bool noError);
-static int big52mic(const unsigned char *big5, unsigned char *p, int len, bool noError);
-static int mic2big5(const unsigned char *mic, unsigned char *p, int len, bool noError);
-static int euc_tw2mic(const unsigned char *euc, unsigned char *p, int len, bool noError);
-static int mic2euc_tw(const unsigned char *mic, unsigned char *p, int len, bool noError);
Datum
euc_tw_to_big5(PG_FUNCTION_ARGS)
PG_RETURN_INT32(converted);
}
-Datum
-euc_tw_to_mic(PG_FUNCTION_ARGS)
-{
- unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
- unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
- int len = PG_GETARG_INT32(4);
- bool noError = PG_GETARG_BOOL(5);
- int converted;
-
- CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_TW, PG_MULE_INTERNAL);
-
- converted = euc_tw2mic(src, dest, len, noError);
-
- PG_RETURN_INT32(converted);
-}
-
-Datum
-mic_to_euc_tw(PG_FUNCTION_ARGS)
-{
- unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
- unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
- int len = PG_GETARG_INT32(4);
- bool noError = PG_GETARG_BOOL(5);
- int converted;
-
- CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_EUC_TW);
-
- converted = mic2euc_tw(src, dest, len, noError);
-
- PG_RETURN_INT32(converted);
-}
-
-Datum
-big5_to_mic(PG_FUNCTION_ARGS)
-{
- unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
- unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
- int len = PG_GETARG_INT32(4);
- bool noError = PG_GETARG_BOOL(5);
- int converted;
-
- CHECK_ENCODING_CONVERSION_ARGS(PG_BIG5, PG_MULE_INTERNAL);
-
- converted = big52mic(src, dest, len, noError);
-
- PG_RETURN_INT32(converted);
-}
-
-Datum
-mic_to_big5(PG_FUNCTION_ARGS)
-{
- unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
- unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
- int len = PG_GETARG_INT32(4);
- bool noError = PG_GETARG_BOOL(5);
- int converted;
-
- CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_BIG5);
-
- converted = mic2big5(src, dest, len, noError);
-
- PG_RETURN_INT32(converted);
-}
-
-
-/*
- * EUC_TW ---> Big5
- */
static int
euc_tw2big5(const unsigned char *euc, unsigned char *p, int len, bool noError)
{
return big5 - start;
}
-
-/*
- * EUC_TW ---> MIC
- */
-static int
-euc_tw2mic(const unsigned char *euc, unsigned char *p, int len, bool noError)
-{
- const unsigned char *start = euc;
- int c1;
- int l;
-
- while (len > 0)
- {
- c1 = *euc;
- if (IS_HIGHBIT_SET(c1))
- {
- l = pg_encoding_verifymbchar(PG_EUC_TW, (const char *) euc, len);
- if (l < 0)
- {
- if (noError)
- break;
- report_invalid_encoding(PG_EUC_TW,
- (const char *) euc, len);
- }
- if (c1 == SS2)
- {
- c1 = euc[1]; /* plane No. */
- if (c1 == 0xa1)
- *p++ = LC_CNS11643_1;
- else if (c1 == 0xa2)
- *p++ = LC_CNS11643_2;
- else
- {
- /* other planes are MULE private charsets */
- *p++ = LCPRV2_B;
- *p++ = c1 - 0xa3 + LC_CNS11643_3;
- }
- *p++ = euc[2];
- *p++ = euc[3];
- }
- else
- { /* CNS11643-1 */
- *p++ = LC_CNS11643_1;
- *p++ = c1;
- *p++ = euc[1];
- }
- euc += l;
- len -= l;
- }
- else
- { /* should be ASCII */
- if (c1 == 0)
- {
- if (noError)
- break;
- report_invalid_encoding(PG_EUC_TW,
- (const char *) euc, len);
- }
- *p++ = c1;
- euc++;
- len--;
- }
- }
- *p = '\0';
-
- return euc - start;
-}
-
-/*
- * MIC ---> EUC_TW
- */
-static int
-mic2euc_tw(const unsigned char *mic, unsigned char *p, int len, bool noError)
-{
- const unsigned char *start = mic;
- int c1;
- int l;
-
- while (len > 0)
- {
- c1 = *mic;
- if (!IS_HIGHBIT_SET(c1))
- {
- /* ASCII */
- if (c1 == 0)
- {
- if (noError)
- break;
- report_invalid_encoding(PG_MULE_INTERNAL,
- (const char *) mic, len);
- }
- *p++ = c1;
- mic++;
- len--;
- continue;
- }
- l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len);
- if (l < 0)
- {
- if (noError)
- break;
- report_invalid_encoding(PG_MULE_INTERNAL,
- (const char *) mic, len);
- }
- if (c1 == LC_CNS11643_1)
- {
- *p++ = mic[1];
- *p++ = mic[2];
- }
- else if (c1 == LC_CNS11643_2)
- {
- *p++ = SS2;
- *p++ = 0xa2;
- *p++ = mic[1];
- *p++ = mic[2];
- }
- else if (c1 == LCPRV2_B &&
- mic[1] >= LC_CNS11643_3 && mic[1] <= LC_CNS11643_7)
- {
- *p++ = SS2;
- *p++ = mic[1] - LC_CNS11643_3 + 0xa3;
- *p++ = mic[2];
- *p++ = mic[3];
- }
- else
- {
- if (noError)
- break;
- report_untranslatable_char(PG_MULE_INTERNAL, PG_EUC_TW,
- (const char *) mic, len);
- }
- mic += l;
- len -= l;
- }
- *p = '\0';
-
- return mic - start;
-}
-
-/*
- * Big5 ---> MIC
- */
-static int
-big52mic(const unsigned char *big5, unsigned char *p, int len, bool noError)
-{
- const unsigned char *start = big5;
- unsigned short c1;
- unsigned short big5buf,
- cnsBuf;
- unsigned char lc;
- int l;
-
- while (len > 0)
- {
- c1 = *big5;
- if (!IS_HIGHBIT_SET(c1))
- {
- /* ASCII */
- if (c1 == 0)
- {
- if (noError)
- break;
- report_invalid_encoding(PG_BIG5,
- (const char *) big5, len);
- }
- *p++ = c1;
- big5++;
- len--;
- continue;
- }
- l = pg_encoding_verifymbchar(PG_BIG5, (const char *) big5, len);
- if (l < 0)
- {
- if (noError)
- break;
- report_invalid_encoding(PG_BIG5,
- (const char *) big5, len);
- }
- big5buf = (c1 << 8) | big5[1];
- cnsBuf = BIG5toCNS(big5buf, &lc);
- if (lc != 0)
- {
- /* Planes 3 and 4 are MULE private charsets */
- if (lc == LC_CNS11643_3 || lc == LC_CNS11643_4)
- *p++ = LCPRV2_B;
- *p++ = lc; /* Plane No. */
- *p++ = (cnsBuf >> 8) & 0x00ff;
- *p++ = cnsBuf & 0x00ff;
- }
- else
- {
- if (noError)
- break;
- report_untranslatable_char(PG_BIG5, PG_MULE_INTERNAL,
- (const char *) big5, len);
- }
- big5 += l;
- len -= l;
- }
- *p = '\0';
-
- return big5 - start;
-}
-
-/*
- * MIC ---> Big5
- */
-static int
-mic2big5(const unsigned char *mic, unsigned char *p, int len, bool noError)
-{
- const unsigned char *start = mic;
- unsigned short c1;
- unsigned short big5buf,
- cnsBuf;
- int l;
-
- while (len > 0)
- {
- c1 = *mic;
- if (!IS_HIGHBIT_SET(c1))
- {
- /* ASCII */
- if (c1 == 0)
- {
- if (noError)
- break;
- report_invalid_encoding(PG_MULE_INTERNAL,
- (const char *) mic, len);
- }
- *p++ = c1;
- mic++;
- len--;
- continue;
- }
- l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len);
- if (l < 0)
- {
- if (noError)
- break;
- report_invalid_encoding(PG_MULE_INTERNAL,
- (const char *) mic, len);
- }
- if (c1 == LC_CNS11643_1 || c1 == LC_CNS11643_2 || c1 == LCPRV2_B)
- {
- if (c1 == LCPRV2_B)
- {
- c1 = mic[1]; /* get plane no. */
- cnsBuf = (mic[2] << 8) | mic[3];
- }
- else
- {
- cnsBuf = (mic[1] << 8) | mic[2];
- }
- big5buf = CNStoBIG5(cnsBuf, c1);
- if (big5buf == 0)
- {
- if (noError)
- break;
- report_untranslatable_char(PG_MULE_INTERNAL, PG_BIG5,
- (const char *) mic, len);
- }
- *p++ = (big5buf >> 8) & 0x00ff;
- *p++ = big5buf & 0x00ff;
- }
- else
- {
- if (noError)
- break;
- report_untranslatable_char(PG_MULE_INTERNAL, PG_BIG5,
- (const char *) mic, len);
- }
- mic += l;
- len -= l;
- }
- *p = '\0';
-
- return mic - start;
-}
.version = PG_VERSION
);
-PG_FUNCTION_INFO_V1(latin2_to_mic);
-PG_FUNCTION_INFO_V1(mic_to_latin2);
-PG_FUNCTION_INFO_V1(win1250_to_mic);
-PG_FUNCTION_INFO_V1(mic_to_win1250);
PG_FUNCTION_INFO_V1(latin2_to_win1250);
PG_FUNCTION_INFO_V1(win1250_to_latin2);
};
-Datum
-latin2_to_mic(PG_FUNCTION_ARGS)
-{
- unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
- unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
- int len = PG_GETARG_INT32(4);
- bool noError = PG_GETARG_BOOL(5);
- int converted;
-
- CHECK_ENCODING_CONVERSION_ARGS(PG_LATIN2, PG_MULE_INTERNAL);
-
- converted = latin2mic(src, dest, len, LC_ISO8859_2, PG_LATIN2, noError);
-
- PG_RETURN_INT32(converted);
-}
-
-Datum
-mic_to_latin2(PG_FUNCTION_ARGS)
-{
- unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
- unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
- int len = PG_GETARG_INT32(4);
- bool noError = PG_GETARG_BOOL(5);
- int converted;
-
- CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_LATIN2);
-
- converted = mic2latin(src, dest, len, LC_ISO8859_2, PG_LATIN2, noError);
-
- PG_RETURN_INT32(converted);
-}
-
-Datum
-win1250_to_mic(PG_FUNCTION_ARGS)
-{
- unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
- unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
- int len = PG_GETARG_INT32(4);
- bool noError = PG_GETARG_BOOL(5);
- int converted;
-
- CHECK_ENCODING_CONVERSION_ARGS(PG_WIN1250, PG_MULE_INTERNAL);
-
- converted = latin2mic_with_table(src, dest, len, LC_ISO8859_2, PG_WIN1250,
- win1250_2_iso88592, noError);
-
- PG_RETURN_INT32(converted);
-}
-
-Datum
-mic_to_win1250(PG_FUNCTION_ARGS)
-{
- unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
- unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
- int len = PG_GETARG_INT32(4);
- bool noError = PG_GETARG_BOOL(5);
- int converted;
-
- CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_WIN1250);
-
- converted = mic2latin_with_table(src, dest, len, LC_ISO8859_2, PG_WIN1250,
- iso88592_2_win1250, noError);
-
- PG_RETURN_INT32(converted);
-}
-
Datum
latin2_to_win1250(PG_FUNCTION_ARGS)
{
+++ /dev/null
-/*-------------------------------------------------------------------------
- *
- * LATINn and MULE_INTERNAL
- *
- * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
- * Portions Copyright (c) 1994, Regents of the University of California
- *
- * IDENTIFICATION
- * src/backend/utils/mb/conversion_procs/latin_and_mic/latin_and_mic.c
- *
- *-------------------------------------------------------------------------
- */
-
-#include "postgres.h"
-#include "fmgr.h"
-#include "mb/pg_wchar.h"
-
-PG_MODULE_MAGIC_EXT(
- .name = "latin_and_mic",
- .version = PG_VERSION
-);
-
-PG_FUNCTION_INFO_V1(latin1_to_mic);
-PG_FUNCTION_INFO_V1(mic_to_latin1);
-PG_FUNCTION_INFO_V1(latin3_to_mic);
-PG_FUNCTION_INFO_V1(mic_to_latin3);
-PG_FUNCTION_INFO_V1(latin4_to_mic);
-PG_FUNCTION_INFO_V1(mic_to_latin4);
-
-/* ----------
- * conv_proc(
- * INTEGER, -- source encoding id
- * INTEGER, -- destination encoding id
- * CSTRING, -- source string (null terminated C string)
- * CSTRING, -- destination string (null terminated C string)
- * INTEGER, -- source string length
- * BOOL -- if true, don't throw an error if conversion fails
- * ) returns INTEGER;
- *
- * Returns the number of bytes successfully converted.
- * ----------
- */
-
-
-Datum
-latin1_to_mic(PG_FUNCTION_ARGS)
-{
- unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
- unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
- int len = PG_GETARG_INT32(4);
- bool noError = PG_GETARG_BOOL(5);
- int converted;
-
- CHECK_ENCODING_CONVERSION_ARGS(PG_LATIN1, PG_MULE_INTERNAL);
-
- converted = latin2mic(src, dest, len, LC_ISO8859_1, PG_LATIN1, noError);
-
- PG_RETURN_INT32(converted);
-}
-
-Datum
-mic_to_latin1(PG_FUNCTION_ARGS)
-{
- unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
- unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
- int len = PG_GETARG_INT32(4);
- bool noError = PG_GETARG_BOOL(5);
- int converted;
-
- CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_LATIN1);
-
- converted = mic2latin(src, dest, len, LC_ISO8859_1, PG_LATIN1, noError);
-
- PG_RETURN_INT32(converted);
-}
-
-Datum
-latin3_to_mic(PG_FUNCTION_ARGS)
-{
- unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
- unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
- int len = PG_GETARG_INT32(4);
- bool noError = PG_GETARG_BOOL(5);
- int converted;
-
- CHECK_ENCODING_CONVERSION_ARGS(PG_LATIN3, PG_MULE_INTERNAL);
-
- converted = latin2mic(src, dest, len, LC_ISO8859_3, PG_LATIN3, noError);
-
- PG_RETURN_INT32(converted);
-}
-
-Datum
-mic_to_latin3(PG_FUNCTION_ARGS)
-{
- unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
- unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
- int len = PG_GETARG_INT32(4);
- bool noError = PG_GETARG_BOOL(5);
- int converted;
-
- CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_LATIN3);
-
- converted = mic2latin(src, dest, len, LC_ISO8859_3, PG_LATIN3, noError);
-
- PG_RETURN_INT32(converted);
-}
-
-Datum
-latin4_to_mic(PG_FUNCTION_ARGS)
-{
- unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
- unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
- int len = PG_GETARG_INT32(4);
- bool noError = PG_GETARG_BOOL(5);
- int converted;
-
- CHECK_ENCODING_CONVERSION_ARGS(PG_LATIN4, PG_MULE_INTERNAL);
-
- converted = latin2mic(src, dest, len, LC_ISO8859_4, PG_LATIN4, noError);
-
- PG_RETURN_INT32(converted);
-}
-
-Datum
-mic_to_latin4(PG_FUNCTION_ARGS)
-{
- unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
- unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
- int len = PG_GETARG_INT32(4);
- bool noError = PG_GETARG_BOOL(5);
- int converted;
-
- CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_LATIN4);
-
- converted = mic2latin(src, dest, len, LC_ISO8859_4, PG_LATIN4, noError);
-
- PG_RETURN_INT32(converted);
-}
# Copyright (c) 2022-2026, PostgreSQL Global Development Group
encodings = {
- 'cyrillic_and_mic': ['cyrillic_and_mic/cyrillic_and_mic.c'],
+ 'cyrillic': ['cyrillic/cyrillic.c'],
'euc2004_sjis2004': ['euc2004_sjis2004/euc2004_sjis2004.c'],
- 'euc_cn_and_mic': ['euc_cn_and_mic/euc_cn_and_mic.c'],
'euc_jp_and_sjis': ['euc_jp_and_sjis/euc_jp_and_sjis.c'],
- 'euc_kr_and_mic': ['euc_kr_and_mic/euc_kr_and_mic.c'],
'euc_tw_and_big5': [
'euc_tw_and_big5/euc_tw_and_big5.c',
'euc_tw_and_big5/big5.c',
],
'latin2_and_win1250': ['latin2_and_win1250/latin2_and_win1250.c'],
- 'latin_and_mic': ['latin_and_mic/latin_and_mic.c'],
'utf8_and_big5': ['utf8_and_big5/utf8_and_big5.c'],
'utf8_and_cyrillic': ['utf8_and_cyrillic/utf8_and_cyrillic.c'],
'utf8_and_euc2004': ['utf8_and_euc2004/utf8_and_euc2004.c'],
#ifdef ENABLE_NLS
/*
* Make one bind_textdomain_codeset() call, translating a pg_enc to a gettext
- * codeset. Fails for MULE_INTERNAL, an encoding unknown to gettext; can also
- * fail for gettext-internal causes like out-of-memory.
+ * codeset. Can fail for gettext-internal causes like out-of-memory.
*/
static bool
raw_pg_bind_textdomain_codeset(const char *domainname, int encoding)
/*
* gettext() returns messages in this encoding. This often matches the
* database encoding, but it differs for SQL_ASCII databases, for processes
- * not attached to a database, and under a database encoding lacking iconv
- * support (MULE_INTERNAL).
+ * not attached to a database.
*/
int
GetMessageEncoding(void)
{
"mskanji", PG_SJIS
}, /* alias for Shift_JIS */
- {
- "muleinternal", PG_MULE_INTERNAL
- },
{
"shiftjis", PG_SJIS
}, /* Shift_JIS; JIS X 0202-1991 */
[PG_EUC_TW] = DEF_ENC2NAME(EUC_TW, 0),
[PG_EUC_JIS_2004] = DEF_ENC2NAME(EUC_JIS_2004, 20932),
[PG_UTF8] = DEF_ENC2NAME(UTF8, 65001),
- [PG_MULE_INTERNAL] = DEF_ENC2NAME(MULE_INTERNAL, 0),
[PG_LATIN1] = DEF_ENC2NAME(LATIN1, 28591),
[PG_LATIN2] = DEF_ENC2NAME(LATIN2, 28592),
[PG_LATIN3] = DEF_ENC2NAME(LATIN3, 28593),
/* ----------
* These are encoding names for gettext.
- *
- * This covers all encodings except MULE_INTERNAL, which is alien to gettext.
* ----------
*/
const char *pg_enc2gettext_tbl[] =
{
[PG_SQL_ASCII] = "US-ASCII",
[PG_UTF8] = "UTF-8",
- [PG_MULE_INTERNAL] = NULL,
[PG_LATIN1] = "LATIN1",
[PG_LATIN2] = "LATIN2",
[PG_LATIN3] = "LATIN3",
[PG_EUC_TW] = "EUC-TW",
[PG_EUC_JIS_2004] = NULL,
[PG_UTF8] = "UTF-8",
- [PG_MULE_INTERNAL] = NULL,
[PG_LATIN1] = "ISO-8859-1",
[PG_LATIN2] = "ISO-8859-2",
[PG_LATIN3] = "ISO-8859-3",
return ucs_wcwidth(utf8_to_unicode(s));
}
-/*
- * convert mule internal code to pg_wchar
- * caller should allocate enough space for "to"
- * len: length of from.
- * "from" not necessarily null terminated.
- */
-static int
-pg_mule2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
-{
- int cnt = 0;
-
- while (len > 0 && *from)
- {
- if (IS_LC1(*from))
- {
- MB2CHAR_NEED_AT_LEAST(len, 2);
- *to = *from++ << 16;
- *to |= *from++;
- len -= 2;
- }
- else if (IS_LCPRV1(*from))
- {
- MB2CHAR_NEED_AT_LEAST(len, 3);
- from++;
- *to = *from++ << 16;
- *to |= *from++;
- len -= 3;
- }
- else if (IS_LC2(*from))
- {
- MB2CHAR_NEED_AT_LEAST(len, 3);
- *to = *from++ << 16;
- *to |= *from++ << 8;
- *to |= *from++;
- len -= 3;
- }
- else if (IS_LCPRV2(*from))
- {
- MB2CHAR_NEED_AT_LEAST(len, 4);
- from++;
- *to = *from++ << 16;
- *to |= *from++ << 8;
- *to |= *from++;
- len -= 4;
- }
- else
- { /* assume ASCII */
- *to = (unsigned char) *from++;
- len--;
- }
- to++;
- cnt++;
- }
- *to = 0;
- return cnt;
-}
-
-/*
- * convert pg_wchar to mule internal code
- * caller should allocate enough space for "to"
- * len: length of from.
- * "from" not necessarily null terminated.
- */
-static int
-pg_wchar2mule_with_len(const pg_wchar *from, unsigned char *to, int len)
-{
- int cnt = 0;
-
- while (len > 0 && *from)
- {
- unsigned char lb;
-
- lb = (*from >> 16) & 0xff;
- if (IS_LC1(lb))
- {
- *to++ = lb;
- *to++ = *from & 0xff;
- cnt += 2;
- }
- else if (IS_LC2(lb))
- {
- *to++ = lb;
- *to++ = (*from >> 8) & 0xff;
- *to++ = *from & 0xff;
- cnt += 3;
- }
- else if (IS_LCPRV1_A_RANGE(lb))
- {
- *to++ = LCPRV1_A;
- *to++ = lb;
- *to++ = *from & 0xff;
- cnt += 3;
- }
- else if (IS_LCPRV1_B_RANGE(lb))
- {
- *to++ = LCPRV1_B;
- *to++ = lb;
- *to++ = *from & 0xff;
- cnt += 3;
- }
- else if (IS_LCPRV2_A_RANGE(lb))
- {
- *to++ = LCPRV2_A;
- *to++ = lb;
- *to++ = (*from >> 8) & 0xff;
- *to++ = *from & 0xff;
- cnt += 4;
- }
- else if (IS_LCPRV2_B_RANGE(lb))
- {
- *to++ = LCPRV2_B;
- *to++ = lb;
- *to++ = (*from >> 8) & 0xff;
- *to++ = *from & 0xff;
- cnt += 4;
- }
- else
- {
- *to++ = *from & 0xff;
- cnt += 1;
- }
- from++;
- len--;
- }
- *to = 0;
- return cnt;
-}
-
-/* exported for direct use by conv.c */
-int
-pg_mule_mblen(const unsigned char *s)
-{
- int len;
-
- if (IS_LC1(*s))
- len = 2;
- else if (IS_LCPRV1(*s))
- len = 3;
- else if (IS_LC2(*s))
- len = 3;
- else if (IS_LCPRV2(*s))
- len = 4;
- else
- len = 1; /* assume ASCII */
- return len;
-}
-
-static int
-pg_mule_dsplen(const unsigned char *s)
-{
- int len;
-
- /*
- * Note: it's not really appropriate to assume that all multibyte charsets
- * are double-wide on screen. But this seems an okay approximation for
- * the MULE charsets we currently support.
- */
-
- if (IS_LC1(*s))
- len = 1;
- else if (IS_LCPRV1(*s))
- len = 1;
- else if (IS_LC2(*s))
- len = 2;
- else if (IS_LCPRV2(*s))
- len = 2;
- else
- len = 1; /* assume ASCII */
-
- return len;
-}
-
/*
* ISO8859-1
*/
return s - start;
}
-static int
-pg_mule_verifychar(const unsigned char *s, int len)
-{
- int l,
- mbl;
- unsigned char c;
-
- l = mbl = pg_mule_mblen(s);
-
- if (len < l)
- return -1;
-
- while (--l > 0)
- {
- c = *++s;
- if (!IS_HIGHBIT_SET(c))
- return -1;
- }
- return mbl;
-}
-
-static int
-pg_mule_verifystr(const unsigned char *s, int len)
-{
- const unsigned char *start = s;
-
- while (len > 0)
- {
- int l;
-
- /* fast path for ASCII-subset characters */
- if (!IS_HIGHBIT_SET(*s))
- {
- if (*s == '\0')
- break;
- l = 1;
- }
- else
- {
- l = pg_mule_verifychar(s, len);
- if (l == -1)
- break;
- }
- s += l;
- len -= l;
- }
-
- return s - start;
-}
-
static int
pg_latin1_verifychar(const unsigned char *s, int len)
{
[PG_EUC_TW] = {pg_euctw2wchar_with_len, pg_wchar2euc_with_len, pg_euctw_mblen, pg_euctw_dsplen, pg_euctw_verifychar, pg_euctw_verifystr, 4},
[PG_EUC_JIS_2004] = {pg_eucjp2wchar_with_len, pg_wchar2euc_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifychar, pg_eucjp_verifystr, 3},
[PG_UTF8] = {pg_utf2wchar_with_len, pg_wchar2utf_with_len, pg_utf_mblen, pg_utf_dsplen, pg_utf8_verifychar, pg_utf8_verifystr, 4},
- [PG_MULE_INTERNAL] = {pg_mule2wchar_with_len, pg_wchar2mule_with_len, pg_mule_mblen, pg_mule_dsplen, pg_mule_verifychar, pg_mule_verifystr, 4},
[PG_LATIN1] = {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1},
[PG_LATIN2] = {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1},
[PG_LATIN3] = {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1},
[
-{ oid => '4402', descr => 'conversion for KOI8R to MULE_INTERNAL',
- conname => 'koi8_r_to_mic', conforencoding => 'PG_KOI8R',
- contoencoding => 'PG_MULE_INTERNAL', conproc => 'koi8r_to_mic' },
-{ oid => '4403', descr => 'conversion for MULE_INTERNAL to KOI8R',
- conname => 'mic_to_koi8_r', conforencoding => 'PG_MULE_INTERNAL',
- contoencoding => 'PG_KOI8R', conproc => 'mic_to_koi8r' },
-{ oid => '4404', descr => 'conversion for ISO-8859-5 to MULE_INTERNAL',
- conname => 'iso_8859_5_to_mic', conforencoding => 'PG_ISO_8859_5',
- contoencoding => 'PG_MULE_INTERNAL', conproc => 'iso_to_mic' },
-{ oid => '4405', descr => 'conversion for MULE_INTERNAL to ISO-8859-5',
- conname => 'mic_to_iso_8859_5', conforencoding => 'PG_MULE_INTERNAL',
- contoencoding => 'PG_ISO_8859_5', conproc => 'mic_to_iso' },
-{ oid => '4406', descr => 'conversion for WIN1251 to MULE_INTERNAL',
- conname => 'windows_1251_to_mic', conforencoding => 'PG_WIN1251',
- contoencoding => 'PG_MULE_INTERNAL', conproc => 'win1251_to_mic' },
-{ oid => '4407', descr => 'conversion for MULE_INTERNAL to WIN1251',
- conname => 'mic_to_windows_1251', conforencoding => 'PG_MULE_INTERNAL',
- contoencoding => 'PG_WIN1251', conproc => 'mic_to_win1251' },
-{ oid => '4408', descr => 'conversion for WIN866 to MULE_INTERNAL',
- conname => 'windows_866_to_mic', conforencoding => 'PG_WIN866',
- contoencoding => 'PG_MULE_INTERNAL', conproc => 'win866_to_mic' },
-{ oid => '4409', descr => 'conversion for MULE_INTERNAL to WIN866',
- conname => 'mic_to_windows_866', conforencoding => 'PG_MULE_INTERNAL',
- contoencoding => 'PG_WIN866', conproc => 'mic_to_win866' },
{ oid => '4410', descr => 'conversion for KOI8R to WIN1251',
conname => 'koi8_r_to_windows_1251', conforencoding => 'PG_KOI8R',
contoencoding => 'PG_WIN1251', conproc => 'koi8r_to_win1251' },
{ oid => '4421', descr => 'conversion for WIN866 to ISO-8859-5',
conname => 'windows_866_to_iso_8859_5', conforencoding => 'PG_WIN866',
contoencoding => 'PG_ISO_8859_5', conproc => 'win866_to_iso' },
-{ oid => '4422', descr => 'conversion for EUC_CN to MULE_INTERNAL',
- conname => 'euc_cn_to_mic', conforencoding => 'PG_EUC_CN',
- contoencoding => 'PG_MULE_INTERNAL', conproc => 'euc_cn_to_mic' },
-{ oid => '4423', descr => 'conversion for MULE_INTERNAL to EUC_CN',
- conname => 'mic_to_euc_cn', conforencoding => 'PG_MULE_INTERNAL',
- contoencoding => 'PG_EUC_CN', conproc => 'mic_to_euc_cn' },
{ oid => '4424', descr => 'conversion for EUC_JP to SJIS',
conname => 'euc_jp_to_sjis', conforencoding => 'PG_EUC_JP',
contoencoding => 'PG_SJIS', conproc => 'euc_jp_to_sjis' },
{ oid => '4425', descr => 'conversion for SJIS to EUC_JP',
conname => 'sjis_to_euc_jp', conforencoding => 'PG_SJIS',
contoencoding => 'PG_EUC_JP', conproc => 'sjis_to_euc_jp' },
-{ oid => '4426', descr => 'conversion for EUC_JP to MULE_INTERNAL',
- conname => 'euc_jp_to_mic', conforencoding => 'PG_EUC_JP',
- contoencoding => 'PG_MULE_INTERNAL', conproc => 'euc_jp_to_mic' },
-{ oid => '4427', descr => 'conversion for SJIS to MULE_INTERNAL',
- conname => 'sjis_to_mic', conforencoding => 'PG_SJIS',
- contoencoding => 'PG_MULE_INTERNAL', conproc => 'sjis_to_mic' },
-{ oid => '4428', descr => 'conversion for MULE_INTERNAL to EUC_JP',
- conname => 'mic_to_euc_jp', conforencoding => 'PG_MULE_INTERNAL',
- contoencoding => 'PG_EUC_JP', conproc => 'mic_to_euc_jp' },
-{ oid => '4429', descr => 'conversion for MULE_INTERNAL to SJIS',
- conname => 'mic_to_sjis', conforencoding => 'PG_MULE_INTERNAL',
- contoencoding => 'PG_SJIS', conproc => 'mic_to_sjis' },
-{ oid => '4430', descr => 'conversion for EUC_KR to MULE_INTERNAL',
- conname => 'euc_kr_to_mic', conforencoding => 'PG_EUC_KR',
- contoencoding => 'PG_MULE_INTERNAL', conproc => 'euc_kr_to_mic' },
-{ oid => '4431', descr => 'conversion for MULE_INTERNAL to EUC_KR',
- conname => 'mic_to_euc_kr', conforencoding => 'PG_MULE_INTERNAL',
- contoencoding => 'PG_EUC_KR', conproc => 'mic_to_euc_kr' },
{ oid => '4432', descr => 'conversion for EUC_TW to BIG5',
conname => 'euc_tw_to_big5', conforencoding => 'PG_EUC_TW',
contoencoding => 'PG_BIG5', conproc => 'euc_tw_to_big5' },
{ oid => '4433', descr => 'conversion for BIG5 to EUC_TW',
conname => 'big5_to_euc_tw', conforencoding => 'PG_BIG5',
contoencoding => 'PG_EUC_TW', conproc => 'big5_to_euc_tw' },
-{ oid => '4434', descr => 'conversion for EUC_TW to MULE_INTERNAL',
- conname => 'euc_tw_to_mic', conforencoding => 'PG_EUC_TW',
- contoencoding => 'PG_MULE_INTERNAL', conproc => 'euc_tw_to_mic' },
-{ oid => '4435', descr => 'conversion for BIG5 to MULE_INTERNAL',
- conname => 'big5_to_mic', conforencoding => 'PG_BIG5',
- contoencoding => 'PG_MULE_INTERNAL', conproc => 'big5_to_mic' },
-{ oid => '4436', descr => 'conversion for MULE_INTERNAL to EUC_TW',
- conname => 'mic_to_euc_tw', conforencoding => 'PG_MULE_INTERNAL',
- contoencoding => 'PG_EUC_TW', conproc => 'mic_to_euc_tw' },
-{ oid => '4437', descr => 'conversion for MULE_INTERNAL to BIG5',
- conname => 'mic_to_big5', conforencoding => 'PG_MULE_INTERNAL',
- contoencoding => 'PG_BIG5', conproc => 'mic_to_big5' },
-{ oid => '4438', descr => 'conversion for LATIN2 to MULE_INTERNAL',
- conname => 'iso_8859_2_to_mic', conforencoding => 'PG_LATIN2',
- contoencoding => 'PG_MULE_INTERNAL', conproc => 'latin2_to_mic' },
-{ oid => '4439', descr => 'conversion for MULE_INTERNAL to LATIN2',
- conname => 'mic_to_iso_8859_2', conforencoding => 'PG_MULE_INTERNAL',
- contoencoding => 'PG_LATIN2', conproc => 'mic_to_latin2' },
-{ oid => '4440', descr => 'conversion for WIN1250 to MULE_INTERNAL',
- conname => 'windows_1250_to_mic', conforencoding => 'PG_WIN1250',
- contoencoding => 'PG_MULE_INTERNAL', conproc => 'win1250_to_mic' },
-{ oid => '4441', descr => 'conversion for MULE_INTERNAL to WIN1250',
- conname => 'mic_to_windows_1250', conforencoding => 'PG_MULE_INTERNAL',
- contoencoding => 'PG_WIN1250', conproc => 'mic_to_win1250' },
{ oid => '4442', descr => 'conversion for LATIN2 to WIN1250',
conname => 'iso_8859_2_to_windows_1250', conforencoding => 'PG_LATIN2',
contoencoding => 'PG_WIN1250', conproc => 'latin2_to_win1250' },
{ oid => '4443', descr => 'conversion for WIN1250 to LATIN2',
conname => 'windows_1250_to_iso_8859_2', conforencoding => 'PG_WIN1250',
contoencoding => 'PG_LATIN2', conproc => 'win1250_to_latin2' },
-{ oid => '4444', descr => 'conversion for LATIN1 to MULE_INTERNAL',
- conname => 'iso_8859_1_to_mic', conforencoding => 'PG_LATIN1',
- contoencoding => 'PG_MULE_INTERNAL', conproc => 'latin1_to_mic' },
-{ oid => '4445', descr => 'conversion for MULE_INTERNAL to LATIN1',
- conname => 'mic_to_iso_8859_1', conforencoding => 'PG_MULE_INTERNAL',
- contoencoding => 'PG_LATIN1', conproc => 'mic_to_latin1' },
-{ oid => '4446', descr => 'conversion for LATIN3 to MULE_INTERNAL',
- conname => 'iso_8859_3_to_mic', conforencoding => 'PG_LATIN3',
- contoencoding => 'PG_MULE_INTERNAL', conproc => 'latin3_to_mic' },
-{ oid => '4447', descr => 'conversion for MULE_INTERNAL to LATIN3',
- conname => 'mic_to_iso_8859_3', conforencoding => 'PG_MULE_INTERNAL',
- contoencoding => 'PG_LATIN3', conproc => 'mic_to_latin3' },
-{ oid => '4448', descr => 'conversion for LATIN4 to MULE_INTERNAL',
- conname => 'iso_8859_4_to_mic', conforencoding => 'PG_LATIN4',
- contoencoding => 'PG_MULE_INTERNAL', conproc => 'latin4_to_mic' },
-{ oid => '4449', descr => 'conversion for MULE_INTERNAL to LATIN4',
- conname => 'mic_to_iso_8859_4', conforencoding => 'PG_MULE_INTERNAL',
- contoencoding => 'PG_LATIN4', conproc => 'mic_to_latin4' },
{ oid => '4452', descr => 'conversion for BIG5 to UTF8',
conname => 'big5_to_utf8', conforencoding => 'PG_BIG5',
contoencoding => 'PG_UTF8', conproc => 'big5_to_utf8' },
proargtypes => '', prosrc => 'binary_upgrade_create_conflict_detection_slot' },
# conversion functions
-{ oid => '4302',
- descr => 'internal conversion function for KOI8R to MULE_INTERNAL',
- proname => 'koi8r_to_mic', prolang => 'c', prorettype => 'int4',
- proargtypes => 'int4 int4 cstring internal int4 bool',
- prosrc => 'koi8r_to_mic', probin => '$libdir/cyrillic_and_mic' },
-{ oid => '4303',
- descr => 'internal conversion function for MULE_INTERNAL to KOI8R',
- proname => 'mic_to_koi8r', prolang => 'c', prorettype => 'int4',
- proargtypes => 'int4 int4 cstring internal int4 bool',
- prosrc => 'mic_to_koi8r', probin => '$libdir/cyrillic_and_mic' },
-{ oid => '4304',
- descr => 'internal conversion function for ISO-8859-5 to MULE_INTERNAL',
- proname => 'iso_to_mic', prolang => 'c', prorettype => 'int4',
- proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'iso_to_mic',
- probin => '$libdir/cyrillic_and_mic' },
-{ oid => '4305',
- descr => 'internal conversion function for MULE_INTERNAL to ISO-8859-5',
- proname => 'mic_to_iso', prolang => 'c', prorettype => 'int4',
- proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'mic_to_iso',
- probin => '$libdir/cyrillic_and_mic' },
-{ oid => '4306',
- descr => 'internal conversion function for WIN1251 to MULE_INTERNAL',
- proname => 'win1251_to_mic', prolang => 'c', prorettype => 'int4',
- proargtypes => 'int4 int4 cstring internal int4 bool',
- prosrc => 'win1251_to_mic', probin => '$libdir/cyrillic_and_mic' },
-{ oid => '4307',
- descr => 'internal conversion function for MULE_INTERNAL to WIN1251',
- proname => 'mic_to_win1251', prolang => 'c', prorettype => 'int4',
- proargtypes => 'int4 int4 cstring internal int4 bool',
- prosrc => 'mic_to_win1251', probin => '$libdir/cyrillic_and_mic' },
-{ oid => '4308',
- descr => 'internal conversion function for WIN866 to MULE_INTERNAL',
- proname => 'win866_to_mic', prolang => 'c', prorettype => 'int4',
- proargtypes => 'int4 int4 cstring internal int4 bool',
- prosrc => 'win866_to_mic', probin => '$libdir/cyrillic_and_mic' },
-{ oid => '4309',
- descr => 'internal conversion function for MULE_INTERNAL to WIN866',
- proname => 'mic_to_win866', prolang => 'c', prorettype => 'int4',
- proargtypes => 'int4 int4 cstring internal int4 bool',
- prosrc => 'mic_to_win866', probin => '$libdir/cyrillic_and_mic' },
{ oid => '4310', descr => 'internal conversion function for KOI8R to WIN1251',
proname => 'koi8r_to_win1251', prolang => 'c', prorettype => 'int4',
proargtypes => 'int4 int4 cstring internal int4 bool',
- prosrc => 'koi8r_to_win1251', probin => '$libdir/cyrillic_and_mic' },
+ prosrc => 'koi8r_to_win1251', probin => '$libdir/cyrillic' },
{ oid => '4311', descr => 'internal conversion function for WIN1251 to KOI8R',
proname => 'win1251_to_koi8r', prolang => 'c', prorettype => 'int4',
proargtypes => 'int4 int4 cstring internal int4 bool',
- prosrc => 'win1251_to_koi8r', probin => '$libdir/cyrillic_and_mic' },
+ prosrc => 'win1251_to_koi8r', probin => '$libdir/cyrillic' },
{ oid => '4312', descr => 'internal conversion function for KOI8R to WIN866',
proname => 'koi8r_to_win866', prolang => 'c', prorettype => 'int4',
proargtypes => 'int4 int4 cstring internal int4 bool',
- prosrc => 'koi8r_to_win866', probin => '$libdir/cyrillic_and_mic' },
+ prosrc => 'koi8r_to_win866', probin => '$libdir/cyrillic' },
{ oid => '4313', descr => 'internal conversion function for WIN866 to KOI8R',
proname => 'win866_to_koi8r', prolang => 'c', prorettype => 'int4',
proargtypes => 'int4 int4 cstring internal int4 bool',
- prosrc => 'win866_to_koi8r', probin => '$libdir/cyrillic_and_mic' },
+ prosrc => 'win866_to_koi8r', probin => '$libdir/cyrillic' },
{ oid => '4314',
descr => 'internal conversion function for WIN866 to WIN1251',
proname => 'win866_to_win1251', prolang => 'c', prorettype => 'int4',
proargtypes => 'int4 int4 cstring internal int4 bool',
- prosrc => 'win866_to_win1251', probin => '$libdir/cyrillic_and_mic' },
+ prosrc => 'win866_to_win1251', probin => '$libdir/cyrillic' },
{ oid => '4315',
descr => 'internal conversion function for WIN1251 to WIN866',
proname => 'win1251_to_win866', prolang => 'c', prorettype => 'int4',
proargtypes => 'int4 int4 cstring internal int4 bool',
- prosrc => 'win1251_to_win866', probin => '$libdir/cyrillic_and_mic' },
+ prosrc => 'win1251_to_win866', probin => '$libdir/cyrillic' },
{ oid => '4316',
descr => 'internal conversion function for ISO-8859-5 to KOI8R',
proname => 'iso_to_koi8r', prolang => 'c', prorettype => 'int4',
proargtypes => 'int4 int4 cstring internal int4 bool',
- prosrc => 'iso_to_koi8r', probin => '$libdir/cyrillic_and_mic' },
+ prosrc => 'iso_to_koi8r', probin => '$libdir/cyrillic' },
{ oid => '4317',
descr => 'internal conversion function for KOI8R to ISO-8859-5',
proname => 'koi8r_to_iso', prolang => 'c', prorettype => 'int4',
proargtypes => 'int4 int4 cstring internal int4 bool',
- prosrc => 'koi8r_to_iso', probin => '$libdir/cyrillic_and_mic' },
+ prosrc => 'koi8r_to_iso', probin => '$libdir/cyrillic' },
{ oid => '4318',
descr => 'internal conversion function for ISO-8859-5 to WIN1251',
proname => 'iso_to_win1251', prolang => 'c', prorettype => 'int4',
proargtypes => 'int4 int4 cstring internal int4 bool',
- prosrc => 'iso_to_win1251', probin => '$libdir/cyrillic_and_mic' },
+ prosrc => 'iso_to_win1251', probin => '$libdir/cyrillic' },
{ oid => '4319',
descr => 'internal conversion function for WIN1251 to ISO-8859-5',
proname => 'win1251_to_iso', prolang => 'c', prorettype => 'int4',
proargtypes => 'int4 int4 cstring internal int4 bool',
- prosrc => 'win1251_to_iso', probin => '$libdir/cyrillic_and_mic' },
+ prosrc => 'win1251_to_iso', probin => '$libdir/cyrillic' },
{ oid => '4320',
descr => 'internal conversion function for ISO-8859-5 to WIN866',
proname => 'iso_to_win866', prolang => 'c', prorettype => 'int4',
proargtypes => 'int4 int4 cstring internal int4 bool',
- prosrc => 'iso_to_win866', probin => '$libdir/cyrillic_and_mic' },
+ prosrc => 'iso_to_win866', probin => '$libdir/cyrillic' },
{ oid => '4321',
descr => 'internal conversion function for WIN866 to ISO-8859-5',
proname => 'win866_to_iso', prolang => 'c', prorettype => 'int4',
proargtypes => 'int4 int4 cstring internal int4 bool',
- prosrc => 'win866_to_iso', probin => '$libdir/cyrillic_and_mic' },
-{ oid => '4322',
- descr => 'internal conversion function for EUC_CN to MULE_INTERNAL',
- proname => 'euc_cn_to_mic', prolang => 'c', prorettype => 'int4',
- proargtypes => 'int4 int4 cstring internal int4 bool',
- prosrc => 'euc_cn_to_mic', probin => '$libdir/euc_cn_and_mic' },
-{ oid => '4323',
- descr => 'internal conversion function for MULE_INTERNAL to EUC_CN',
- proname => 'mic_to_euc_cn', prolang => 'c', prorettype => 'int4',
- proargtypes => 'int4 int4 cstring internal int4 bool',
- prosrc => 'mic_to_euc_cn', probin => '$libdir/euc_cn_and_mic' },
+ prosrc => 'win866_to_iso', probin => '$libdir/cyrillic' },
{ oid => '4324', descr => 'internal conversion function for EUC_JP to SJIS',
proname => 'euc_jp_to_sjis', prolang => 'c', prorettype => 'int4',
proargtypes => 'int4 int4 cstring internal int4 bool',
proname => 'sjis_to_euc_jp', prolang => 'c', prorettype => 'int4',
proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'sjis_to_euc_jp', probin => '$libdir/euc_jp_and_sjis' },
-{ oid => '4326',
- descr => 'internal conversion function for EUC_JP to MULE_INTERNAL',
- proname => 'euc_jp_to_mic', prolang => 'c', prorettype => 'int4',
- proargtypes => 'int4 int4 cstring internal int4 bool',
- prosrc => 'euc_jp_to_mic', probin => '$libdir/euc_jp_and_sjis' },
-{ oid => '4327',
- descr => 'internal conversion function for SJIS to MULE_INTERNAL',
- proname => 'sjis_to_mic', prolang => 'c', prorettype => 'int4',
- proargtypes => 'int4 int4 cstring internal int4 bool',
- prosrc => 'sjis_to_mic', probin => '$libdir/euc_jp_and_sjis' },
-{ oid => '4328',
- descr => 'internal conversion function for MULE_INTERNAL to EUC_JP',
- proname => 'mic_to_euc_jp', prolang => 'c', prorettype => 'int4',
- proargtypes => 'int4 int4 cstring internal int4 bool',
- prosrc => 'mic_to_euc_jp', probin => '$libdir/euc_jp_and_sjis' },
-{ oid => '4329',
- descr => 'internal conversion function for MULE_INTERNAL to SJIS',
- proname => 'mic_to_sjis', prolang => 'c', prorettype => 'int4',
- proargtypes => 'int4 int4 cstring internal int4 bool',
- prosrc => 'mic_to_sjis', probin => '$libdir/euc_jp_and_sjis' },
-{ oid => '4330',
- descr => 'internal conversion function for EUC_KR to MULE_INTERNAL',
- proname => 'euc_kr_to_mic', prolang => 'c', prorettype => 'int4',
- proargtypes => 'int4 int4 cstring internal int4 bool',
- prosrc => 'euc_kr_to_mic', probin => '$libdir/euc_kr_and_mic' },
-{ oid => '4331',
- descr => 'internal conversion function for MULE_INTERNAL to EUC_KR',
- proname => 'mic_to_euc_kr', prolang => 'c', prorettype => 'int4',
- proargtypes => 'int4 int4 cstring internal int4 bool',
- prosrc => 'mic_to_euc_kr', probin => '$libdir/euc_kr_and_mic' },
{ oid => '4332', descr => 'internal conversion function for EUC_TW to BIG5',
proname => 'euc_tw_to_big5', prolang => 'c', prorettype => 'int4',
proargtypes => 'int4 int4 cstring internal int4 bool',
proname => 'big5_to_euc_tw', prolang => 'c', prorettype => 'int4',
proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'big5_to_euc_tw', probin => '$libdir/euc_tw_and_big5' },
-{ oid => '4334',
- descr => 'internal conversion function for EUC_TW to MULE_INTERNAL',
- proname => 'euc_tw_to_mic', prolang => 'c', prorettype => 'int4',
- proargtypes => 'int4 int4 cstring internal int4 bool',
- prosrc => 'euc_tw_to_mic', probin => '$libdir/euc_tw_and_big5' },
-{ oid => '4335',
- descr => 'internal conversion function for BIG5 to MULE_INTERNAL',
- proname => 'big5_to_mic', prolang => 'c', prorettype => 'int4',
- proargtypes => 'int4 int4 cstring internal int4 bool',
- prosrc => 'big5_to_mic', probin => '$libdir/euc_tw_and_big5' },
-{ oid => '4336',
- descr => 'internal conversion function for MULE_INTERNAL to EUC_TW',
- proname => 'mic_to_euc_tw', prolang => 'c', prorettype => 'int4',
- proargtypes => 'int4 int4 cstring internal int4 bool',
- prosrc => 'mic_to_euc_tw', probin => '$libdir/euc_tw_and_big5' },
-{ oid => '4337',
- descr => 'internal conversion function for MULE_INTERNAL to BIG5',
- proname => 'mic_to_big5', prolang => 'c', prorettype => 'int4',
- proargtypes => 'int4 int4 cstring internal int4 bool',
- prosrc => 'mic_to_big5', probin => '$libdir/euc_tw_and_big5' },
-{ oid => '4338',
- descr => 'internal conversion function for LATIN2 to MULE_INTERNAL',
- proname => 'latin2_to_mic', prolang => 'c', prorettype => 'int4',
- proargtypes => 'int4 int4 cstring internal int4 bool',
- prosrc => 'latin2_to_mic', probin => '$libdir/latin2_and_win1250' },
-{ oid => '4339',
- descr => 'internal conversion function for MULE_INTERNAL to LATIN2',
- proname => 'mic_to_latin2', prolang => 'c', prorettype => 'int4',
- proargtypes => 'int4 int4 cstring internal int4 bool',
- prosrc => 'mic_to_latin2', probin => '$libdir/latin2_and_win1250' },
-{ oid => '4340',
- descr => 'internal conversion function for WIN1250 to MULE_INTERNAL',
- proname => 'win1250_to_mic', prolang => 'c', prorettype => 'int4',
- proargtypes => 'int4 int4 cstring internal int4 bool',
- prosrc => 'win1250_to_mic', probin => '$libdir/latin2_and_win1250' },
-{ oid => '4341',
- descr => 'internal conversion function for MULE_INTERNAL to WIN1250',
- proname => 'mic_to_win1250', prolang => 'c', prorettype => 'int4',
- proargtypes => 'int4 int4 cstring internal int4 bool',
- prosrc => 'mic_to_win1250', probin => '$libdir/latin2_and_win1250' },
{ oid => '4342',
descr => 'internal conversion function for LATIN2 to WIN1250',
proname => 'latin2_to_win1250', prolang => 'c', prorettype => 'int4',
proname => 'win1250_to_latin2', prolang => 'c', prorettype => 'int4',
proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'win1250_to_latin2', probin => '$libdir/latin2_and_win1250' },
-{ oid => '4344',
- descr => 'internal conversion function for LATIN1 to MULE_INTERNAL',
- proname => 'latin1_to_mic', prolang => 'c', prorettype => 'int4',
- proargtypes => 'int4 int4 cstring internal int4 bool',
- prosrc => 'latin1_to_mic', probin => '$libdir/latin_and_mic' },
-{ oid => '4345',
- descr => 'internal conversion function for MULE_INTERNAL to LATIN1',
- proname => 'mic_to_latin1', prolang => 'c', prorettype => 'int4',
- proargtypes => 'int4 int4 cstring internal int4 bool',
- prosrc => 'mic_to_latin1', probin => '$libdir/latin_and_mic' },
-{ oid => '4346',
- descr => 'internal conversion function for LATIN3 to MULE_INTERNAL',
- proname => 'latin3_to_mic', prolang => 'c', prorettype => 'int4',
- proargtypes => 'int4 int4 cstring internal int4 bool',
- prosrc => 'latin3_to_mic', probin => '$libdir/latin_and_mic' },
-{ oid => '4347',
- descr => 'internal conversion function for MULE_INTERNAL to LATIN3',
- proname => 'mic_to_latin3', prolang => 'c', prorettype => 'int4',
- proargtypes => 'int4 int4 cstring internal int4 bool',
- prosrc => 'mic_to_latin3', probin => '$libdir/latin_and_mic' },
-{ oid => '4348',
- descr => 'internal conversion function for LATIN4 to MULE_INTERNAL',
- proname => 'latin4_to_mic', prolang => 'c', prorettype => 'int4',
- proargtypes => 'int4 int4 cstring internal int4 bool',
- prosrc => 'latin4_to_mic', probin => '$libdir/latin_and_mic' },
-{ oid => '4349',
- descr => 'internal conversion function for MULE_INTERNAL to LATIN4',
- proname => 'mic_to_latin4', prolang => 'c', prorettype => 'int4',
- proargtypes => 'int4 int4 cstring internal int4 bool',
- prosrc => 'mic_to_latin4', probin => '$libdir/latin_and_mic' },
{ oid => '4352', descr => 'internal conversion function for BIG5 to UTF8',
proname => 'big5_to_utf8', prolang => 'c', prorettype => 'int4',
proargtypes => 'int4 int4 cstring internal int4 bool',
#define SS3 0x8f /* single shift 3 (JIS0212) */
/*
- * SJIS validation macros
+ * EUC_TW planes
*/
-#define ISSJISHEAD(c) (((c) >= 0x81 && (c) <= 0x9f) || ((c) >= 0xe0 && (c) <= 0xfc))
-#define ISSJISTAIL(c) (((c) >= 0x40 && (c) <= 0x7e) || ((c) >= 0x80 && (c) <= 0xfc))
-
-/*----------------------------------------------------
- * MULE Internal Encoding (MIC)
- *
- * This encoding follows the design used within XEmacs; it is meant to
- * subsume many externally-defined character sets. Each character includes
- * identification of the character set it belongs to, so the encoding is
- * general but somewhat bulky.
- *
- * Currently PostgreSQL supports 5 types of MULE character sets:
- *
- * 1) 1-byte ASCII characters. Each byte is below 0x80.
- *
- * 2) "Official" single byte charsets such as ISO-8859-1 (Latin1).
- * Each MULE character consists of 2 bytes: LC1 + C1, where LC1 is
- * an identifier for the charset (in the range 0x81 to 0x8d) and C1
- * is the character code (in the range 0xa0 to 0xff).
- *
- * 3) "Private" single byte charsets such as SISHENG. Each MULE
- * character consists of 3 bytes: LCPRV1 + LC12 + C1, where LCPRV1
- * is a private-charset flag, LC12 is an identifier for the charset,
- * and C1 is the character code (in the range 0xa0 to 0xff).
- * LCPRV1 is either 0x9a (if LC12 is in the range 0xa0 to 0xdf)
- * or 0x9b (if LC12 is in the range 0xe0 to 0xef).
- *
- * 4) "Official" multibyte charsets such as JIS X0208. Each MULE
- * character consists of 3 bytes: LC2 + C1 + C2, where LC2 is
- * an identifier for the charset (in the range 0x90 to 0x99) and C1
- * and C2 form the character code (each in the range 0xa0 to 0xff).
- *
- * 5) "Private" multibyte charsets such as CNS 11643-1992 Plane 3.
- * Each MULE character consists of 4 bytes: LCPRV2 + LC22 + C1 + C2,
- * where LCPRV2 is a private-charset flag, LC22 is an identifier for
- * the charset, and C1 and C2 form the character code (each in the range
- * 0xa0 to 0xff). LCPRV2 is either 0x9c (if LC22 is in the range 0xf0
- * to 0xf4) or 0x9d (if LC22 is in the range 0xf5 to 0xfe).
- *
- * "Official" encodings are those that have been assigned code numbers by
- * the XEmacs project; "private" encodings have Postgres-specific charset
- * identifiers.
- *
- * See the "XEmacs Internals Manual", available at http://www.xemacs.org,
- * for more details. Note that for historical reasons, Postgres'
- * private-charset flag values do not match what XEmacs says they should be,
- * so this isn't really exactly MULE (not that private charsets would be
- * interoperable anyway).
- *
- * Note that XEmacs's implementation is different from what emacs does.
- * We follow emacs's implementation, rather than XEmacs's.
- *----------------------------------------------------
- */
-
-/*
- * Charset identifiers (also called "leading bytes" in the MULE documentation)
- */
-
-/*
- * Charset IDs for official single byte encodings (0x81-0x8e)
- */
-#define LC_ISO8859_1 0x81 /* ISO8859 Latin 1 */
-#define LC_ISO8859_2 0x82 /* ISO8859 Latin 2 */
-#define LC_ISO8859_3 0x83 /* ISO8859 Latin 3 */
-#define LC_ISO8859_4 0x84 /* ISO8859 Latin 4 */
-#define LC_TIS620 0x85 /* Thai (not supported yet) */
-#define LC_ISO8859_7 0x86 /* Greek (not supported yet) */
-#define LC_ISO8859_6 0x87 /* Arabic (not supported yet) */
-#define LC_ISO8859_8 0x88 /* Hebrew (not supported yet) */
-#define LC_JISX0201K 0x89 /* Japanese 1 byte kana */
-#define LC_JISX0201R 0x8a /* Japanese 1 byte Roman */
-/* Note that 0x8b seems to be unused as of Emacs 20.7.
- * However, there might be a chance that 0x8b could be used
- * in later versions of Emacs.
- */
-#define LC_KOI8_R 0x8b /* Cyrillic KOI8-R */
-#define LC_ISO8859_5 0x8c /* ISO8859 Cyrillic */
-#define LC_ISO8859_9 0x8d /* ISO8859 Latin 5 (not supported yet) */
-#define LC_ISO8859_15 0x8e /* ISO8859 Latin 15 (not supported yet) */
-/* #define CONTROL_1 0x8f control characters (unused) */
-
-/* Is a leading byte for "official" single byte encodings? */
-#define IS_LC1(c) ((unsigned char)(c) >= 0x81 && (unsigned char)(c) <= 0x8d)
-
-/*
- * Charset IDs for official multibyte encodings (0x90-0x99)
- * 0x9a-0x9d are free. 0x9e and 0x9f are reserved.
- */
-#define LC_JISX0208_1978 0x90 /* Japanese Kanji, old JIS (not supported) */
-#define LC_GB2312_80 0x91 /* Chinese */
-#define LC_JISX0208 0x92 /* Japanese Kanji (JIS X 0208) */
-#define LC_KS5601 0x93 /* Korean */
-#define LC_JISX0212 0x94 /* Japanese Kanji (JIS X 0212) */
#define LC_CNS11643_1 0x95 /* CNS 11643-1992 Plane 1 */
#define LC_CNS11643_2 0x96 /* CNS 11643-1992 Plane 2 */
-#define LC_JISX0213_1 0x97 /* Japanese Kanji (JIS X 0213 Plane 1)
- * (not supported) */
-#define LC_BIG5_1 0x98 /* Plane 1 Chinese traditional (not
- * supported) */
-#define LC_BIG5_2 0x99 /* Plane 1 Chinese traditional (not
- * supported) */
-
-/* Is a leading byte for "official" multibyte encodings? */
-#define IS_LC2(c) ((unsigned char)(c) >= 0x90 && (unsigned char)(c) <= 0x99)
-
-/*
- * Postgres-specific prefix bytes for "private" single byte encodings
- * (According to the MULE docs, we should be using 0x9e for this)
- */
-#define LCPRV1_A 0x9a
-#define LCPRV1_B 0x9b
-#define IS_LCPRV1(c) ((unsigned char)(c) == LCPRV1_A || (unsigned char)(c) == LCPRV1_B)
-#define IS_LCPRV1_A_RANGE(c) \
- ((unsigned char)(c) >= 0xa0 && (unsigned char)(c) <= 0xdf)
-#define IS_LCPRV1_B_RANGE(c) \
- ((unsigned char)(c) >= 0xe0 && (unsigned char)(c) <= 0xef)
-
-/*
- * Postgres-specific prefix bytes for "private" multibyte encodings
- * (According to the MULE docs, we should be using 0x9f for this)
- */
-#define LCPRV2_A 0x9c
-#define LCPRV2_B 0x9d
-#define IS_LCPRV2(c) ((unsigned char)(c) == LCPRV2_A || (unsigned char)(c) == LCPRV2_B)
-#define IS_LCPRV2_A_RANGE(c) \
- ((unsigned char)(c) >= 0xf0 && (unsigned char)(c) <= 0xf4)
-#define IS_LCPRV2_B_RANGE(c) \
- ((unsigned char)(c) >= 0xf5 && (unsigned char)(c) <= 0xfe)
-
-/*
- * Charset IDs for private single byte encodings (0xa0-0xef)
- */
-#define LC_SISHENG 0xa0 /* Chinese SiSheng characters for
- * PinYin/ZhuYin (not supported) */
-#define LC_IPA 0xa1 /* IPA (International Phonetic
- * Association) (not supported) */
-#define LC_VISCII_LOWER 0xa2 /* Vietnamese VISCII1.1 lower-case (not
- * supported) */
-#define LC_VISCII_UPPER 0xa3 /* Vietnamese VISCII1.1 upper-case (not
- * supported) */
-#define LC_ARABIC_DIGIT 0xa4 /* Arabic digit (not supported) */
-#define LC_ARABIC_1_COLUMN 0xa5 /* Arabic 1-column (not supported) */
-#define LC_ASCII_RIGHT_TO_LEFT 0xa6 /* ASCII (left half of ISO8859-1) with
- * right-to-left direction (not
- * supported) */
-#define LC_LAO 0xa7 /* Lao characters (ISO10646 0E80..0EDF)
- * (not supported) */
-#define LC_ARABIC_2_COLUMN 0xa8 /* Arabic 1-column (not supported) */
-
-/*
- * Charset IDs for private multibyte encodings (0xf0-0xff)
- */
-#define LC_INDIAN_1_COLUMN 0xf0 /* Indian charset for 1-column width
- * glyphs (not supported) */
-#define LC_TIBETAN_1_COLUMN 0xf1 /* Tibetan 1-column width glyphs (not
- * supported) */
-#define LC_UNICODE_SUBSET_2 0xf2 /* Unicode characters of the range
- * U+2500..U+33FF. (not supported) */
-#define LC_UNICODE_SUBSET_3 0xf3 /* Unicode characters of the range
- * U+E000..U+FFFF. (not supported) */
-#define LC_UNICODE_SUBSET 0xf4 /* Unicode characters of the range
- * U+0100..U+24FF. (not supported) */
-#define LC_ETHIOPIC 0xf5 /* Ethiopic characters (not supported) */
#define LC_CNS11643_3 0xf6 /* CNS 11643-1992 Plane 3 */
#define LC_CNS11643_4 0xf7 /* CNS 11643-1992 Plane 4 */
#define LC_CNS11643_5 0xf8 /* CNS 11643-1992 Plane 5 */
#define LC_CNS11643_6 0xf9 /* CNS 11643-1992 Plane 6 */
#define LC_CNS11643_7 0xfa /* CNS 11643-1992 Plane 7 */
-#define LC_INDIAN_2_COLUMN 0xfb /* Indian charset for 2-column width
- * glyphs (not supported) */
-#define LC_TIBETAN 0xfc /* Tibetan (not supported) */
-/* #define FREE 0xfd free (unused) */
-/* #define FREE 0xfe free (unused) */
-/* #define FREE 0xff free (unused) */
-
-/*----------------------------------------------------
- * end of MULE stuff
- *----------------------------------------------------
+
+/*
+ * SJIS validation macros
*/
+#define ISSJISHEAD(c) (((c) >= 0x81 && (c) <= 0x9f) || ((c) >= 0xe0 && (c) <= 0xfc))
+#define ISSJISTAIL(c) (((c) >= 0x40 && (c) <= 0x7e) || ((c) >= 0x80 && (c) <= 0xfc))
/*
* PostgreSQL encoding identifiers
PG_EUC_TW, /* EUC for Taiwan */
PG_EUC_JIS_2004, /* EUC-JIS-2004 */
PG_UTF8, /* Unicode UTF8 */
- PG_MULE_INTERNAL, /* Mule internal code */
+ PG_UNUSED_1, /* (Was Mule internal code) */
PG_LATIN1, /* ISO-8859-1 Latin 1 */
PG_LATIN2, /* ISO-8859-2 Latin 2 */
PG_LATIN3, /* ISO-8859-3 Latin 3 */
#define PG_ENCODING_BE_LAST PG_KOI8U
+#define PG_UNUSED_ENCODING(_enc) \
+ ((_enc) == PG_UNUSED_1)
+
/*
* Please use these tests before access to pg_enc2name_tbl[]
* or to other places...
*/
#define PG_VALID_BE_ENCODING(_enc) \
- ((_enc) >= 0 && (_enc) <= PG_ENCODING_BE_LAST)
+ ((_enc) >= 0 && (_enc) <= PG_ENCODING_BE_LAST && !PG_UNUSED_ENCODING(_enc))
#define PG_ENCODING_IS_CLIENT_ONLY(_enc) \
((_enc) > PG_ENCODING_BE_LAST && (_enc) < _PG_LAST_ENCODING_)
#define PG_VALID_ENCODING(_enc) \
- ((_enc) >= 0 && (_enc) < _PG_LAST_ENCODING_)
+ ((_enc) >= 0 && (_enc) < _PG_LAST_ENCODING_ && !PG_UNUSED_ENCODING(_enc))
/* On FE are possible all encodings */
#define PG_VALID_FE_ENCODING(_enc) PG_VALID_ENCODING(_enc)
extern bool pg_utf8_islegal(const unsigned char *source, int length);
extern int pg_utf_mblen(const unsigned char *s);
-extern int pg_mule_mblen(const unsigned char *s);
/*
* The remaining functions are backend-only.
extern int local2local(const unsigned char *l, unsigned char *p, int len,
int src_encoding, int dest_encoding,
const unsigned char *tab, bool noError);
-extern int latin2mic(const unsigned char *l, unsigned char *p, int len,
- int lc, int encoding, bool noError);
-extern int mic2latin(const unsigned char *mic, unsigned char *p, int len,
- int lc, int encoding, bool noError);
-extern int latin2mic_with_table(const unsigned char *l, unsigned char *p,
- int len, int lc, int encoding,
- const unsigned char *tab, bool noError);
-extern int mic2latin_with_table(const unsigned char *mic, unsigned char *p,
- int len, int lc, int encoding,
- const unsigned char *tab, bool noError);
#ifdef WIN32
extern WCHAR *pgwin32_message_to_UTF16(const char *str, int len, int *utf16len);
*
* PyUnicode_AsEncodedString could be used to encode the object directly
* in the server encoding, but Python doesn't support all the encodings
- * that PostgreSQL does (EUC_TW and MULE_INTERNAL). UTF-8 is used as an
- * intermediary in PLyUnicode_FromString as well.
+ * that PostgreSQL does (EUC_TW). UTF-8 is used as an intermediary in
+ * PLyUnicode_FromString as well.
*/
if (GetDatabaseEncoding() != PG_UTF8)
{
+++ /dev/null
-drop table \92·×\92»»\92µ¡\92ÍÑ\92¸ì;
-ERROR: table "\92·×\92»»\92µ¡\92ÍÑ\92¸ì" does not exist
-create table \92·×\92»»\92µ¡\92ÍÑ\92¸ì (\92ÍÑ\92¸ì text, \92ʬ\92Îà\92¥³\92¡¼\92¥É varchar, \92È÷\92¹Í1A\92¤À\92¤è char(16));
-create index \92·×\92»»\92µ¡\92ÍÑ\92¸ìindex1 on \92·×\92»»\92µ¡\92ÍÑ\92¸ì using btree (\92ÍÑ\92¸ì);
-create index \92·×\92»»\92µ¡\92ÍÑ\92¸ìindex2 on \92·×\92»»\92µ¡\92ÍÑ\92¸ì using hash (\92ʬ\92Îà\92¥³\92¡¼\92¥É);
-insert into \92·×\92»»\92µ¡\92ÍÑ\92¸ì values('\92¥³\92¥ó\92¥Ô\92¥å\92¡¼\92¥¿\92¥Ç\92¥£\92¥¹\92¥×\92¥ì\92¥¤','\92µ¡A01\92¾å');
-insert into \92·×\92»»\92µ¡\92ÍÑ\92¸ì values('\92¥³\92¥ó\92¥Ô\92¥å\92¡¼\92¥¿\92¥°\92¥é\92¥Õ\92¥£\92¥Ã\92¥¯\92¥¹','\92ʬB10\92Ãæ');
-insert into \92·×\92»»\92µ¡\92ÍÑ\92¸ì values('\92¥³\92¥ó\92¥Ô\92¥å\92¡¼\92¥¿\92¥×\92¥í\92¥°\92¥é\92¥Þ\92¡¼','\92¿ÍZ01\92²¼');
-vacuum \92·×\92»»\92µ¡\92ÍÑ\92¸ì;
-select * from \92·×\92»»\92µ¡\92ÍÑ\92¸ì;
- \92ÍÑ\92¸ì | \92ʬ\92Îà\92¥³\92¡¼\92¥É | \92È÷\92¹Í1a\92¤À\92¤è
-----------------------------+------------+------------
- \92¥³\92¥ó\92¥Ô\92¥å\92¡¼\92¥¿\92¥Ç\92¥£\92¥¹\92¥×\92¥ì\92¥¤ | \92µ¡A01\92¾å |
- \92¥³\92¥ó\92¥Ô\92¥å\92¡¼\92¥¿\92¥°\92¥é\92¥Õ\92¥£\92¥Ã\92¥¯\92¥¹ | \92ʬB10\92Ãæ |
- \92¥³\92¥ó\92¥Ô\92¥å\92¡¼\92¥¿\92¥×\92¥í\92¥°\92¥é\92¥Þ\92¡¼ | \92¿ÍZ01\92²¼ |
-(3 rows)
-
-select * from \92·×\92»»\92µ¡\92ÍÑ\92¸ì where \92ʬ\92Îà\92¥³\92¡¼\92¥É = '\92¿ÍZ01\92²¼';
- \92ÍÑ\92¸ì | \92ʬ\92Îà\92¥³\92¡¼\92¥É | \92È÷\92¹Í1a\92¤À\92¤è
---------------------------+------------+------------
- \92¥³\92¥ó\92¥Ô\92¥å\92¡¼\92¥¿\92¥×\92¥í\92¥°\92¥é\92¥Þ\92¡¼ | \92¿ÍZ01\92²¼ |
-(1 row)
-
-select * from \92·×\92»»\92µ¡\92ÍÑ\92¸ì where \92ʬ\92Îà\92¥³\92¡¼\92¥É ~* '\92¿Íz01\92²¼';
- \92ÍÑ\92¸ì | \92ʬ\92Îà\92¥³\92¡¼\92¥É | \92È÷\92¹Í1a\92¤À\92¤è
---------------------------+------------+------------
- \92¥³\92¥ó\92¥Ô\92¥å\92¡¼\92¥¿\92¥×\92¥í\92¥°\92¥é\92¥Þ\92¡¼ | \92¿ÍZ01\92²¼ |
-(1 row)
-
-select * from \92·×\92»»\92µ¡\92ÍÑ\92¸ì where \92ʬ\92Îà\92¥³\92¡¼\92¥É like '_Z01_';
- \92ÍÑ\92¸ì | \92ʬ\92Îà\92¥³\92¡¼\92¥É | \92È÷\92¹Í1a\92¤À\92¤è
---------------------------+------------+------------
- \92¥³\92¥ó\92¥Ô\92¥å\92¡¼\92¥¿\92¥×\92¥í\92¥°\92¥é\92¥Þ\92¡¼ | \92¿ÍZ01\92²¼ |
-(1 row)
-
-select * from \92·×\92»»\92µ¡\92ÍÑ\92¸ì where \92ʬ\92Îà\92¥³\92¡¼\92¥É like '_Z%';
- \92ÍÑ\92¸ì | \92ʬ\92Îà\92¥³\92¡¼\92¥É | \92È÷\92¹Í1a\92¤À\92¤è
---------------------------+------------+------------
- \92¥³\92¥ó\92¥Ô\92¥å\92¡¼\92¥¿\92¥×\92¥í\92¥°\92¥é\92¥Þ\92¡¼ | \92¿ÍZ01\92²¼ |
-(1 row)
-
-select * from \92·×\92»»\92µ¡\92ÍÑ\92¸ì where \92ÍÑ\92¸ì ~ '\92¥³\92¥ó\92¥Ô\92¥å\92¡¼\92¥¿[\92¥Ç\92¥°]';
- \92ÍÑ\92¸ì | \92ʬ\92Îà\92¥³\92¡¼\92¥É | \92È÷\92¹Í1a\92¤À\92¤è
-----------------------------+------------+------------
- \92¥³\92¥ó\92¥Ô\92¥å\92¡¼\92¥¿\92¥Ç\92¥£\92¥¹\92¥×\92¥ì\92¥¤ | \92µ¡A01\92¾å |
- \92¥³\92¥ó\92¥Ô\92¥å\92¡¼\92¥¿\92¥°\92¥é\92¥Õ\92¥£\92¥Ã\92¥¯\92¥¹ | \92ʬB10\92Ãæ |
-(2 rows)
-
-select * from \92·×\92»»\92µ¡\92ÍÑ\92¸ì where \92ÍÑ\92¸ì ~* '\92¥³\92¥ó\92¥Ô\92¥å\92¡¼\92¥¿[\92¥Ç\92¥°]';
- \92ÍÑ\92¸ì | \92ʬ\92Îà\92¥³\92¡¼\92¥É | \92È÷\92¹Í1a\92¤À\92¤è
-----------------------------+------------+------------
- \92¥³\92¥ó\92¥Ô\92¥å\92¡¼\92¥¿\92¥Ç\92¥£\92¥¹\92¥×\92¥ì\92¥¤ | \92µ¡A01\92¾å |
- \92¥³\92¥ó\92¥Ô\92¥å\92¡¼\92¥¿\92¥°\92¥é\92¥Õ\92¥£\92¥Ã\92¥¯\92¥¹ | \92ʬB10\92Ãæ |
-(2 rows)
-
-select *,character_length(\92ÍÑ\92¸ì) from \92·×\92»»\92µ¡\92ÍÑ\92¸ì;
- \92ÍÑ\92¸ì | \92ʬ\92Îà\92¥³\92¡¼\92¥É | \92È÷\92¹Í1a\92¤À\92¤è | character_length
-----------------------------+------------+------------+------------------
- \92¥³\92¥ó\92¥Ô\92¥å\92¡¼\92¥¿\92¥Ç\92¥£\92¥¹\92¥×\92¥ì\92¥¤ | \92µ¡A01\92¾å | | 12
- \92¥³\92¥ó\92¥Ô\92¥å\92¡¼\92¥¿\92¥°\92¥é\92¥Õ\92¥£\92¥Ã\92¥¯\92¥¹ | \92ʬB10\92Ãæ | | 13
- \92¥³\92¥ó\92¥Ô\92¥å\92¡¼\92¥¿\92¥×\92¥í\92¥°\92¥é\92¥Þ\92¡¼ | \92¿ÍZ01\92²¼ | | 12
-(3 rows)
-
-select *,octet_length(\92ÍÑ\92¸ì) from \92·×\92»»\92µ¡\92ÍÑ\92¸ì;
- \92ÍÑ\92¸ì | \92ʬ\92Îà\92¥³\92¡¼\92¥É | \92È÷\92¹Í1a\92¤À\92¤è | octet_length
-----------------------------+------------+------------+--------------
- \92¥³\92¥ó\92¥Ô\92¥å\92¡¼\92¥¿\92¥Ç\92¥£\92¥¹\92¥×\92¥ì\92¥¤ | \92µ¡A01\92¾å | | 36
- \92¥³\92¥ó\92¥Ô\92¥å\92¡¼\92¥¿\92¥°\92¥é\92¥Õ\92¥£\92¥Ã\92¥¯\92¥¹ | \92ʬB10\92Ãæ | | 39
- \92¥³\92¥ó\92¥Ô\92¥å\92¡¼\92¥¿\92¥×\92¥í\92¥°\92¥é\92¥Þ\92¡¼ | \92¿ÍZ01\92²¼ | | 36
-(3 rows)
-
-select *,position('\92¥Ç' in \92ÍÑ\92¸ì) from \92·×\92»»\92µ¡\92ÍÑ\92¸ì;
- \92ÍÑ\92¸ì | \92ʬ\92Îà\92¥³\92¡¼\92¥É | \92È÷\92¹Í1a\92¤À\92¤è | position
-----------------------------+------------+------------+----------
- \92¥³\92¥ó\92¥Ô\92¥å\92¡¼\92¥¿\92¥Ç\92¥£\92¥¹\92¥×\92¥ì\92¥¤ | \92µ¡A01\92¾å | | 7
- \92¥³\92¥ó\92¥Ô\92¥å\92¡¼\92¥¿\92¥°\92¥é\92¥Õ\92¥£\92¥Ã\92¥¯\92¥¹ | \92ʬB10\92Ãæ | | 0
- \92¥³\92¥ó\92¥Ô\92¥å\92¡¼\92¥¿\92¥×\92¥í\92¥°\92¥é\92¥Þ\92¡¼ | \92¿ÍZ01\92²¼ | | 0
-(3 rows)
-
-select *,substring(\92ÍÑ\92¸ì from 10 for 4) from \92·×\92»»\92µ¡\92ÍÑ\92¸ì;
- \92ÍÑ\92¸ì | \92ʬ\92Îà\92¥³\92¡¼\92¥É | \92È÷\92¹Í1a\92¤À\92¤è | substring
-----------------------------+------------+------------+-----------
- \92¥³\92¥ó\92¥Ô\92¥å\92¡¼\92¥¿\92¥Ç\92¥£\92¥¹\92¥×\92¥ì\92¥¤ | \92µ¡A01\92¾å | | \92¥×\92¥ì\92¥¤
- \92¥³\92¥ó\92¥Ô\92¥å\92¡¼\92¥¿\92¥°\92¥é\92¥Õ\92¥£\92¥Ã\92¥¯\92¥¹ | \92ʬB10\92Ãæ | | \92¥£\92¥Ã\92¥¯\92¥¹
- \92¥³\92¥ó\92¥Ô\92¥å\92¡¼\92¥¿\92¥×\92¥í\92¥°\92¥é\92¥Þ\92¡¼ | \92¿ÍZ01\92²¼ | | \92¥é\92¥Þ\92¡¼
-(3 rows)
-
-drop table \91¼Æ\91Ëã\91»ú\91Êõ\91Óï;
-ERROR: table "\91¼Æ\91Ëã\91»ú\91Êõ\91Óï" does not exist
-create table \91¼Æ\91Ëã\91»ú\91Êõ\91Óï(\91Êõ\91Óï text, \91·Ö\91Àà\91ºÅ varchar, \91±¸\91×¢1A char(16));
-create index \91¼Æ\91Ëã\91»ú\91Êõ\91Óïindex1 on \91¼Æ\91Ëã\91»ú\91Êõ\91Óï using btree(\91Êõ\91Óï);
-create index \91¼Æ\91Ëã\91»ú\91Êõ\91Óïindex2 on \91¼Æ\91Ëã\91»ú\91Êõ\91Óï using btree(\91·Ö\91Àà\91ºÅ);
-insert into \91¼Æ\91Ëã\91»ú\91Êõ\91Óï values('\91µç\91ÄÔ\91ÏÔ\91ʾ\91ÆÁ','\91»úA01\91ÉÏ');
-insert into \91¼Æ\91Ëã\91»ú\91Êõ\91Óï values('\91µç\91ÄÔ\91ͼ\91ÐÎ','\91·ÖB01\91ÖÐ');
-insert into \91¼Æ\91Ëã\91»ú\91Êõ\91Óï values('\91µç\91ÄÔ\91³Ì\91Ðò\91Ô±','\91ÈËZ01\91ÏÂ');
-vacuum \91¼Æ\91Ëã\91»ú\91Êõ\91Óï;
-select * from \91¼Æ\91Ëã\91»ú\91Êõ\91Óï;
- \91Êõ\91Óï | \91·Ö\91Àà\91ºÅ | \91±¸\91×¢1a
-------------+---------+--------
- \91µç\91ÄÔ\91ÏÔ\91ʾ\91ÆÁ | \91»úA01\91ÉÏ |
- \91µç\91ÄÔ\91ͼ\91ÐÎ | \91·ÖB01\91ÖÐ |
- \91µç\91ÄÔ\91³Ì\91Ðò\91Ô± | \91ÈËZ01\91Ï |
-(3 rows)
-
-select * from \91¼Æ\91Ëã\91»ú\91Êõ\91Óï where \91·Ö\91Àà\91ºÅ = '\91ÈËZ01\91ÏÂ';
- \91Êõ\91Óï | \91·Ö\91Àà\91ºÅ | \91±¸\91×¢1a
-------------+---------+--------
- \91µç\91ÄÔ\91³Ì\91Ðò\91Ô± | \91ÈËZ01\91Ï |
-(1 row)
-
-select * from \91¼Æ\91Ëã\91»ú\91Êõ\91Óï where \91·Ö\91Àà\91ºÅ ~* '\91ÈËz01\91ÏÂ';
- \91Êõ\91Óï | \91·Ö\91Àà\91ºÅ | \91±¸\91×¢1a
-------------+---------+--------
- \91µç\91ÄÔ\91³Ì\91Ðò\91Ô± | \91ÈËZ01\91Ï |
-(1 row)
-
-select * from \91¼Æ\91Ëã\91»ú\91Êõ\91Óï where \91·Ö\91Àà\91ºÅ like '_Z01_';
- \91Êõ\91Óï | \91·Ö\91Àà\91ºÅ | \91±¸\91×¢1a
-------------+---------+--------
- \91µç\91ÄÔ\91³Ì\91Ðò\91Ô± | \91ÈËZ01\91Ï |
-(1 row)
-
-select * from \91¼Æ\91Ëã\91»ú\91Êõ\91Óï where \91·Ö\91Àà\91ºÅ like '_Z%';
- \91Êõ\91Óï | \91·Ö\91Àà\91ºÅ | \91±¸\91×¢1a
-------------+---------+--------
- \91µç\91ÄÔ\91³Ì\91Ðò\91Ô± | \91ÈËZ01\91Ï |
-(1 row)
-
-select * from \91¼Æ\91Ëã\91»ú\91Êõ\91Óï where \91Êõ\91Óï ~ '\91µç\91ÄÔ[\91ÏÔ\91ͼ]';
- \91Êõ\91Óï | \91·Ö\91Àà\91ºÅ | \91±¸\91×¢1a
-------------+---------+--------
- \91µç\91ÄÔ\91ÏÔ\91ʾ\91ÆÁ | \91»úA01\91ÉÏ |
- \91µç\91ÄÔ\91ͼ\91ÐÎ | \91·ÖB01\91ÖÐ |
-(2 rows)
-
-select * from \91¼Æ\91Ëã\91»ú\91Êõ\91Óï where \91Êõ\91Óï ~* '\91µç\91ÄÔ[\91ÏÔ\91ͼ]';
- \91Êõ\91Óï | \91·Ö\91Àà\91ºÅ | \91±¸\91×¢1a
-------------+---------+--------
- \91µç\91ÄÔ\91ÏÔ\91ʾ\91ÆÁ | \91»úA01\91ÉÏ |
- \91µç\91ÄÔ\91ͼ\91ÐÎ | \91·ÖB01\91ÖÐ |
-(2 rows)
-
-select *,character_length(\91Êõ\91Óï) from \91¼Æ\91Ëã\91»ú\91Êõ\91Óï;
- \91Êõ\91Óï | \91·Ö\91Àà\91ºÅ | \91±¸\91×¢1a | character_length
-------------+---------+--------+------------------
- \91µç\91ÄÔ\91ÏÔ\91ʾ\91ÆÁ | \91»úA01\91ÉÏ | | 5
- \91µç\91ÄÔ\91ͼ\91ÐÎ | \91·ÖB01\91ÖÐ | | 4
- \91µç\91ÄÔ\91³Ì\91Ðò\91Ô± | \91ÈËZ01\91Ï | | 5
-(3 rows)
-
-select *,octet_length(\91Êõ\91Óï) from \91¼Æ\91Ëã\91»ú\91Êõ\91Óï;
- \91Êõ\91Óï | \91·Ö\91Àà\91ºÅ | \91±¸\91×¢1a | octet_length
-------------+---------+--------+--------------
- \91µç\91ÄÔ\91ÏÔ\91ʾ\91ÆÁ | \91»úA01\91ÉÏ | | 15
- \91µç\91ÄÔ\91ͼ\91ÐÎ | \91·ÖB01\91ÖÐ | | 12
- \91µç\91ÄÔ\91³Ì\91Ðò\91Ô± | \91ÈËZ01\91Ï | | 15
-(3 rows)
-
-select *,position('\91ÏÔ' in \91Êõ\91Óï) from \91¼Æ\91Ëã\91»ú\91Êõ\91Óï;
- \91Êõ\91Óï | \91·Ö\91Àà\91ºÅ | \91±¸\91×¢1a | position
-------------+---------+--------+----------
- \91µç\91ÄÔ\91ÏÔ\91ʾ\91ÆÁ | \91»úA01\91ÉÏ | | 3
- \91µç\91ÄÔ\91ͼ\91ÐÎ | \91·ÖB01\91ÖÐ | | 0
- \91µç\91ÄÔ\91³Ì\91Ðò\91Ô± | \91ÈËZ01\91Ï | | 0
-(3 rows)
-
-select *,substring(\91Êõ\91Óï from 3 for 4) from \91¼Æ\91Ëã\91»ú\91Êõ\91Óï;
- \91Êõ\91Óï | \91·Ö\91Àà\91ºÅ | \91±¸\91×¢1a | substring
-------------+---------+--------+-----------
- \91µç\91ÄÔ\91ÏÔ\91ʾ\91ÆÁ | \91»úA01\91ÉÏ | | \91ÏÔ\91ʾ\91ÆÁ
- \91µç\91ÄÔ\91ͼ\91ÐÎ | \91·ÖB01\91ÖÐ | | \91ͼ\91ÐÎ
- \91µç\91ÄÔ\91³Ì\91Ðò\91Ô± | \91ÈËZ01\91Ï | | \91³Ì\91Ðò\91Ô±
-(3 rows)
-
-drop table \93ͪ\93ß©\93Ѧ\93¿ë\93¾î;
-ERROR: table "\93ͪ\93ß©\93Ѧ\93¿ë\93¾î" does not exist
-create table \93ͪ\93ß©\93Ѧ\93¿ë\93¾î (\93¿ë\93¾î text, \93ÝÂ\93×¾\93ÄÚ\93µå varchar, \93ºñ\93°í1A\93¶ó\93±¸ char(16));
-create index \93ͪ\93ß©\93Ѧ\93¿ë\93¾îindex1 on \93ͪ\93ß©\93Ѧ\93¿ë\93¾î using btree (\93¿ë\93¾î);
-create index \93ͪ\93ß©\93Ѧ\93¿ë\93¾îindex2 on \93ͪ\93ß©\93Ѧ\93¿ë\93¾î using hash (\93ÝÂ\93×¾\93ÄÚ\93µå);
-insert into \93ͪ\93ß©\93Ѧ\93¿ë\93¾î values('\93ÄÄ\93Ç»\93ÅÍ\93µð\93½º\93ÇÃ\93·¹\93ÀÌ', '\93ѦA01\93ß¾');
-insert into \93ͪ\93ß©\93Ѧ\93¿ë\93¾î values('\93ÄÄ\93Ç»\93ÅÍ\93±×\93·¡\93ÇÈ\93½º', '\93ÝÂB10\93ñé');
-insert into \93ͪ\93ß©\93Ѧ\93¿ë\93¾î values('\93ÄÄ\93Ç»\93ÅÍ\93ÇÁ\93·Î\93±×\93·¡\93¸Ó', '\93ìÑZ01\93ù»');
-vacuum \93ͪ\93ß©\93Ѧ\93¿ë\93¾î;
-select * from \93ͪ\93ß©\93Ѧ\93¿ë\93¾î;
- \93¿ë\93¾î | \93ÝÂ\93×¾\93ÄÚ\93µå | \93ºñ\93°í1a\93¶ó\93±¸
-------------------+----------+------------
- \93ÄÄ\93Ç»\93ÅÍ\93µð\93½º\93ÇÃ\93·¹\93ÀÌ | \93ѦA01\93ß¾ |
- \93ÄÄ\93Ç»\93ÅÍ\93±×\93·¡\93ÇÈ\93½º | \93ÝÂB10\93ñé |
- \93ÄÄ\93Ç»\93ÅÍ\93ÇÁ\93·Î\93±×\93·¡\93¸Ó | \93ìÑZ01\93ù» |
-(3 rows)
-
-select * from \93ͪ\93ß©\93Ѧ\93¿ë\93¾î where \93ÝÂ\93×¾\93ÄÚ\93µå = '\93ìÑZ01\93ù»';
- \93¿ë\93¾î | \93ÝÂ\93×¾\93ÄÚ\93µå | \93ºñ\93°í1a\93¶ó\93±¸
-------------------+----------+------------
- \93ÄÄ\93Ç»\93ÅÍ\93ÇÁ\93·Î\93±×\93·¡\93¸Ó | \93ìÑZ01\93ù» |
-(1 row)
-
-select * from \93ͪ\93ß©\93Ѧ\93¿ë\93¾î where \93ÝÂ\93×¾\93ÄÚ\93µå ~* '\93ìÑz01\93ù»';
- \93¿ë\93¾î | \93ÝÂ\93×¾\93ÄÚ\93µå | \93ºñ\93°í1a\93¶ó\93±¸
-------------------+----------+------------
- \93ÄÄ\93Ç»\93ÅÍ\93ÇÁ\93·Î\93±×\93·¡\93¸Ó | \93ìÑZ01\93ù» |
-(1 row)
-
-select * from \93ͪ\93ß©\93Ѧ\93¿ë\93¾î where \93ÝÂ\93×¾\93ÄÚ\93µå like '_Z01_';
- \93¿ë\93¾î | \93ÝÂ\93×¾\93ÄÚ\93µå | \93ºñ\93°í1a\93¶ó\93±¸
-------------------+----------+------------
- \93ÄÄ\93Ç»\93ÅÍ\93ÇÁ\93·Î\93±×\93·¡\93¸Ó | \93ìÑZ01\93ù» |
-(1 row)
-
-select * from \93ͪ\93ß©\93Ѧ\93¿ë\93¾î where \93ÝÂ\93×¾\93ÄÚ\93µå like '_Z%';
- \93¿ë\93¾î | \93ÝÂ\93×¾\93ÄÚ\93µå | \93ºñ\93°í1a\93¶ó\93±¸
-------------------+----------+------------
- \93ÄÄ\93Ç»\93ÅÍ\93ÇÁ\93·Î\93±×\93·¡\93¸Ó | \93ìÑZ01\93ù» |
-(1 row)
-
-select * from \93ͪ\93ß©\93Ѧ\93¿ë\93¾î where \93¿ë\93¾î ~ '\93ÄÄ\93Ç»\93ÅÍ[\93µð\93±×]';
- \93¿ë\93¾î | \93ÝÂ\93×¾\93ÄÚ\93µå | \93ºñ\93°í1a\93¶ó\93±¸
-------------------+----------+------------
- \93ÄÄ\93Ç»\93ÅÍ\93µð\93½º\93ÇÃ\93·¹\93ÀÌ | \93ѦA01\93ß¾ |
- \93ÄÄ\93Ç»\93ÅÍ\93±×\93·¡\93ÇÈ\93½º | \93ÝÂB10\93ñé |
-(2 rows)
-
-select * from \93ͪ\93ß©\93Ѧ\93¿ë\93¾î where \93¿ë\93¾î ~* '\93ÄÄ\93Ç»\93ÅÍ[\93µð\93±×]';
- \93¿ë\93¾î | \93ÝÂ\93×¾\93ÄÚ\93µå | \93ºñ\93°í1a\93¶ó\93±¸
-------------------+----------+------------
- \93ÄÄ\93Ç»\93ÅÍ\93µð\93½º\93ÇÃ\93·¹\93ÀÌ | \93ѦA01\93ß¾ |
- \93ÄÄ\93Ç»\93ÅÍ\93±×\93·¡\93ÇÈ\93½º | \93ÝÂB10\93ñé |
-(2 rows)
-
-select *,character_length(\93¿ë\93¾î) from \93ͪ\93ß©\93Ѧ\93¿ë\93¾î;
- \93¿ë\93¾î | \93ÝÂ\93×¾\93ÄÚ\93µå | \93ºñ\93°í1a\93¶ó\93±¸ | character_length
-------------------+----------+------------+------------------
- \93ÄÄ\93Ç»\93ÅÍ\93µð\93½º\93ÇÃ\93·¹\93ÀÌ | \93ѦA01\93ß¾ | | 8
- \93ÄÄ\93Ç»\93ÅÍ\93±×\93·¡\93ÇÈ\93½º | \93ÝÂB10\93ñé | | 7
- \93ÄÄ\93Ç»\93ÅÍ\93ÇÁ\93·Î\93±×\93·¡\93¸Ó | \93ìÑZ01\93ù» | | 8
-(3 rows)
-
-select *,octet_length(\93¿ë\93¾î) from \93ͪ\93ß©\93Ѧ\93¿ë\93¾î;
- \93¿ë\93¾î | \93ÝÂ\93×¾\93ÄÚ\93µå | \93ºñ\93°í1a\93¶ó\93±¸ | octet_length
-------------------+----------+------------+--------------
- \93ÄÄ\93Ç»\93ÅÍ\93µð\93½º\93ÇÃ\93·¹\93ÀÌ | \93ѦA01\93ß¾ | | 24
- \93ÄÄ\93Ç»\93ÅÍ\93±×\93·¡\93ÇÈ\93½º | \93ÝÂB10\93ñé | | 21
- \93ÄÄ\93Ç»\93ÅÍ\93ÇÁ\93·Î\93±×\93·¡\93¸Ó | \93ìÑZ01\93ù» | | 24
-(3 rows)
-
-select *,position('\93µð' in \93¿ë\93¾î) from \93ͪ\93ß©\93Ѧ\93¿ë\93¾î;
- \93¿ë\93¾î | \93ÝÂ\93×¾\93ÄÚ\93µå | \93ºñ\93°í1a\93¶ó\93±¸ | position
-------------------+----------+------------+----------
- \93ÄÄ\93Ç»\93ÅÍ\93µð\93½º\93ÇÃ\93·¹\93ÀÌ | \93ѦA01\93ß¾ | | 4
- \93ÄÄ\93Ç»\93ÅÍ\93±×\93·¡\93ÇÈ\93½º | \93ÝÂB10\93ñé | | 0
- \93ÄÄ\93Ç»\93ÅÍ\93ÇÁ\93·Î\93±×\93·¡\93¸Ó | \93ìÑZ01\93ù» | | 0
-(3 rows)
-
-select *,substring(\93¿ë\93¾î from 3 for 4) from \93ͪ\93ß©\93Ѧ\93¿ë\93¾î;
- \93¿ë\93¾î | \93ÝÂ\93×¾\93ÄÚ\93µå | \93ºñ\93°í1a\93¶ó\93±¸ | substring
-------------------+----------+------------+-----------
- \93ÄÄ\93Ç»\93ÅÍ\93µð\93½º\93ÇÃ\93·¹\93ÀÌ | \93ѦA01\93ß¾ | | \93ÅÍ\93µð\93½º\93ÇÃ
- \93ÄÄ\93Ç»\93ÅÍ\93±×\93·¡\93ÇÈ\93½º | \93ÝÂB10\93ñé | | \93ÅÍ\93±×\93·¡\93ÇÈ
- \93ÄÄ\93Ç»\93ÅÍ\93ÇÁ\93·Î\93±×\93·¡\93¸Ó | \93ìÑZ01\93ù» | | \93ÅÍ\93ÇÁ\93·Î\93±×
-(3 rows)
-
-drop table test;
-ERROR: table "test" does not exist
-create table test (t text);
-insert into test values('ENGLISH');
-insert into test values('FRAN\81ÇAIS');
-insert into test values('ESPA\81ÑOL');
-insert into test values('\81ÍSLENSKA');
-insert into test values('ENGLISH FRAN\81ÇAIS ESPA\81ÑOL \81ÍSLENSKA');
-vacuum test;
-select * from test;
- t
------------------------------------
- ENGLISH
- FRAN\81ÇAIS
- ESPA\81ÑOL
- \81ÍSLENSKA
- ENGLISH FRAN\81ÇAIS ESPA\81ÑOL \81ÍSLENSKA
-(5 rows)
-
-select * from test where t = 'ESPA\81ÑOL';
- t
----------
- ESPA\81ÑOL
-(1 row)
-
-select * from test where t ~* 'espa\81Ñol';
- t
------------------------------------
- ESPA\81ÑOL
- ENGLISH FRAN\81ÇAIS ESPA\81ÑOL \81ÍSLENSKA
-(2 rows)
-
-select *,character_length(t) from test;
- t | character_length
------------------------------------+------------------
- ENGLISH | 7
- FRAN\81ÇAIS | 8
- ESPA\81ÑOL | 7
- \81ÍSLENSKA | 8
- ENGLISH FRAN\81ÇAIS ESPA\81ÑOL \81ÍSLENSKA | 33
-(5 rows)
-
-select *,octet_length(t) from test;
- t | octet_length
------------------------------------+--------------
- ENGLISH | 7
- FRAN\81ÇAIS | 9
- ESPA\81ÑOL | 8
- \81ÍSLENSKA | 9
- ENGLISH FRAN\81ÇAIS ESPA\81ÑOL \81ÍSLENSKA | 36
-(5 rows)
-
-select *,position('L' in t) from test;
- t | position
------------------------------------+----------
- ENGLISH | 4
- FRAN\81ÇAIS | 0
- ESPA\81ÑOL | 7
- \81ÍSLENSKA | 3
- ENGLISH FRAN\81ÇAIS ESPA\81ÑOL \81ÍSLENSKA | 4
-(5 rows)
-
-select *,substring(t from 3 for 4) from test;
- t | substring
------------------------------------+-----------
- ENGLISH | GLIS
- FRAN\81ÇAIS | AN\81ÇA
- ESPA\81ÑOL | PA\81ÑO
- \81ÍSLENSKA | LENS
- ENGLISH FRAN\81ÇAIS ESPA\81ÑOL \81ÍSLENSKA | GLIS
-(5 rows)
-
# in the test list, client-only encodings must follow the server encoding
# they're to be tested with; see hard-coded cases below
-tests="euc_jp sjis euc_kr euc_cn euc_tw big5 utf8 gb18030 mule_internal"
+tests="euc_jp sjis euc_kr euc_cn euc_tw big5 utf8 gb18030"
EXITCODE=0
+++ /dev/null
-drop table \92·×\92»»\92µ¡\92ÍÑ\92¸ì;
-create table \92·×\92»»\92µ¡\92ÍÑ\92¸ì (\92ÍÑ\92¸ì text, \92ʬ\92Îà\92¥³\92¡¼\92¥É varchar, \92È÷\92¹Í1A\92¤À\92¤è char(16));
-create index \92·×\92»»\92µ¡\92ÍÑ\92¸ìindex1 on \92·×\92»»\92µ¡\92ÍÑ\92¸ì using btree (\92ÍÑ\92¸ì);
-create index \92·×\92»»\92µ¡\92ÍÑ\92¸ìindex2 on \92·×\92»»\92µ¡\92ÍÑ\92¸ì using hash (\92ʬ\92Îà\92¥³\92¡¼\92¥É);
-insert into \92·×\92»»\92µ¡\92ÍÑ\92¸ì values('\92¥³\92¥ó\92¥Ô\92¥å\92¡¼\92¥¿\92¥Ç\92¥£\92¥¹\92¥×\92¥ì\92¥¤','\92µ¡A01\92¾å');
-insert into \92·×\92»»\92µ¡\92ÍÑ\92¸ì values('\92¥³\92¥ó\92¥Ô\92¥å\92¡¼\92¥¿\92¥°\92¥é\92¥Õ\92¥£\92¥Ã\92¥¯\92¥¹','\92ʬB10\92Ãæ');
-insert into \92·×\92»»\92µ¡\92ÍÑ\92¸ì values('\92¥³\92¥ó\92¥Ô\92¥å\92¡¼\92¥¿\92¥×\92¥í\92¥°\92¥é\92¥Þ\92¡¼','\92¿ÍZ01\92²¼');
-vacuum \92·×\92»»\92µ¡\92ÍÑ\92¸ì;
-select * from \92·×\92»»\92µ¡\92ÍÑ\92¸ì;
-select * from \92·×\92»»\92µ¡\92ÍÑ\92¸ì where \92ʬ\92Îà\92¥³\92¡¼\92¥É = '\92¿ÍZ01\92²¼';
-select * from \92·×\92»»\92µ¡\92ÍÑ\92¸ì where \92ʬ\92Îà\92¥³\92¡¼\92¥É ~* '\92¿Íz01\92²¼';
-select * from \92·×\92»»\92µ¡\92ÍÑ\92¸ì where \92ʬ\92Îà\92¥³\92¡¼\92¥É like '_Z01_';
-select * from \92·×\92»»\92µ¡\92ÍÑ\92¸ì where \92ʬ\92Îà\92¥³\92¡¼\92¥É like '_Z%';
-select * from \92·×\92»»\92µ¡\92ÍÑ\92¸ì where \92ÍÑ\92¸ì ~ '\92¥³\92¥ó\92¥Ô\92¥å\92¡¼\92¥¿[\92¥Ç\92¥°]';
-select * from \92·×\92»»\92µ¡\92ÍÑ\92¸ì where \92ÍÑ\92¸ì ~* '\92¥³\92¥ó\92¥Ô\92¥å\92¡¼\92¥¿[\92¥Ç\92¥°]';
-select *,character_length(\92ÍÑ\92¸ì) from \92·×\92»»\92µ¡\92ÍÑ\92¸ì;
-select *,octet_length(\92ÍÑ\92¸ì) from \92·×\92»»\92µ¡\92ÍÑ\92¸ì;
-select *,position('\92¥Ç' in \92ÍÑ\92¸ì) from \92·×\92»»\92µ¡\92ÍÑ\92¸ì;
-select *,substring(\92ÍÑ\92¸ì from 10 for 4) from \92·×\92»»\92µ¡\92ÍÑ\92¸ì;
-drop table \91¼Æ\91Ëã\91»ú\91Êõ\91Óï;
-create table \91¼Æ\91Ëã\91»ú\91Êõ\91Óï(\91Êõ\91Óï text, \91·Ö\91Àà\91ºÅ varchar, \91±¸\91×¢1A char(16));
-create index \91¼Æ\91Ëã\91»ú\91Êõ\91Óïindex1 on \91¼Æ\91Ëã\91»ú\91Êõ\91Óï using btree(\91Êõ\91Óï);
-create index \91¼Æ\91Ëã\91»ú\91Êõ\91Óïindex2 on \91¼Æ\91Ëã\91»ú\91Êõ\91Óï using btree(\91·Ö\91Àà\91ºÅ);
-insert into \91¼Æ\91Ëã\91»ú\91Êõ\91Óï values('\91µç\91ÄÔ\91ÏÔ\91ʾ\91ÆÁ','\91»úA01\91ÉÏ');
-insert into \91¼Æ\91Ëã\91»ú\91Êõ\91Óï values('\91µç\91ÄÔ\91ͼ\91ÐÎ','\91·ÖB01\91ÖÐ');
-insert into \91¼Æ\91Ëã\91»ú\91Êõ\91Óï values('\91µç\91ÄÔ\91³Ì\91Ðò\91Ô±','\91ÈËZ01\91ÏÂ');
-vacuum \91¼Æ\91Ëã\91»ú\91Êõ\91Óï;
-select * from \91¼Æ\91Ëã\91»ú\91Êõ\91Óï;
-select * from \91¼Æ\91Ëã\91»ú\91Êõ\91Óï where \91·Ö\91Àà\91ºÅ = '\91ÈËZ01\91ÏÂ';
-select * from \91¼Æ\91Ëã\91»ú\91Êõ\91Óï where \91·Ö\91Àà\91ºÅ ~* '\91ÈËz01\91ÏÂ';
-select * from \91¼Æ\91Ëã\91»ú\91Êõ\91Óï where \91·Ö\91Àà\91ºÅ like '_Z01_';
-select * from \91¼Æ\91Ëã\91»ú\91Êõ\91Óï where \91·Ö\91Àà\91ºÅ like '_Z%';
-select * from \91¼Æ\91Ëã\91»ú\91Êõ\91Óï where \91Êõ\91Óï ~ '\91µç\91ÄÔ[\91ÏÔ\91ͼ]';
-select * from \91¼Æ\91Ëã\91»ú\91Êõ\91Óï where \91Êõ\91Óï ~* '\91µç\91ÄÔ[\91ÏÔ\91ͼ]';
-select *,character_length(\91Êõ\91Óï) from \91¼Æ\91Ëã\91»ú\91Êõ\91Óï;
-select *,octet_length(\91Êõ\91Óï) from \91¼Æ\91Ëã\91»ú\91Êõ\91Óï;
-select *,position('\91ÏÔ' in \91Êõ\91Óï) from \91¼Æ\91Ëã\91»ú\91Êõ\91Óï;
-select *,substring(\91Êõ\91Óï from 3 for 4) from \91¼Æ\91Ëã\91»ú\91Êõ\91Óï;
-drop table \93ͪ\93ß©\93Ѧ\93¿ë\93¾î;
-create table \93ͪ\93ß©\93Ѧ\93¿ë\93¾î (\93¿ë\93¾î text, \93ÝÂ\93×¾\93ÄÚ\93µå varchar, \93ºñ\93°í1A\93¶ó\93±¸ char(16));
-create index \93ͪ\93ß©\93Ѧ\93¿ë\93¾îindex1 on \93ͪ\93ß©\93Ѧ\93¿ë\93¾î using btree (\93¿ë\93¾î);
-create index \93ͪ\93ß©\93Ѧ\93¿ë\93¾îindex2 on \93ͪ\93ß©\93Ѧ\93¿ë\93¾î using hash (\93ÝÂ\93×¾\93ÄÚ\93µå);
-insert into \93ͪ\93ß©\93Ѧ\93¿ë\93¾î values('\93ÄÄ\93Ç»\93ÅÍ\93µð\93½º\93ÇÃ\93·¹\93ÀÌ', '\93ѦA01\93ß¾');
-insert into \93ͪ\93ß©\93Ѧ\93¿ë\93¾î values('\93ÄÄ\93Ç»\93ÅÍ\93±×\93·¡\93ÇÈ\93½º', '\93ÝÂB10\93ñé');
-insert into \93ͪ\93ß©\93Ѧ\93¿ë\93¾î values('\93ÄÄ\93Ç»\93ÅÍ\93ÇÁ\93·Î\93±×\93·¡\93¸Ó', '\93ìÑZ01\93ù»');
-vacuum \93ͪ\93ß©\93Ѧ\93¿ë\93¾î;
-select * from \93ͪ\93ß©\93Ѧ\93¿ë\93¾î;
-select * from \93ͪ\93ß©\93Ѧ\93¿ë\93¾î where \93ÝÂ\93×¾\93ÄÚ\93µå = '\93ìÑZ01\93ù»';
-select * from \93ͪ\93ß©\93Ѧ\93¿ë\93¾î where \93ÝÂ\93×¾\93ÄÚ\93µå ~* '\93ìÑz01\93ù»';
-select * from \93ͪ\93ß©\93Ѧ\93¿ë\93¾î where \93ÝÂ\93×¾\93ÄÚ\93µå like '_Z01_';
-select * from \93ͪ\93ß©\93Ѧ\93¿ë\93¾î where \93ÝÂ\93×¾\93ÄÚ\93µå like '_Z%';
-select * from \93ͪ\93ß©\93Ѧ\93¿ë\93¾î where \93¿ë\93¾î ~ '\93ÄÄ\93Ç»\93ÅÍ[\93µð\93±×]';
-select * from \93ͪ\93ß©\93Ѧ\93¿ë\93¾î where \93¿ë\93¾î ~* '\93ÄÄ\93Ç»\93ÅÍ[\93µð\93±×]';
-select *,character_length(\93¿ë\93¾î) from \93ͪ\93ß©\93Ѧ\93¿ë\93¾î;
-select *,octet_length(\93¿ë\93¾î) from \93ͪ\93ß©\93Ѧ\93¿ë\93¾î;
-select *,position('\93µð' in \93¿ë\93¾î) from \93ͪ\93ß©\93Ѧ\93¿ë\93¾î;
-select *,substring(\93¿ë\93¾î from 3 for 4) from \93ͪ\93ß©\93Ѧ\93¿ë\93¾î;
-drop table test;
-create table test (t text);
-insert into test values('ENGLISH');
-insert into test values('FRAN\81ÇAIS');
-insert into test values('ESPA\81ÑOL');
-insert into test values('\81ÍSLENSKA');
-insert into test values('ENGLISH FRAN\81ÇAIS ESPA\81ÑOL \81ÍSLENSKA');
-vacuum test;
-select * from test;
-select * from test where t = 'ESPA\81ÑOL';
-select * from test where t ~* 'espa\81Ñol';
-select *,character_length(t) from test;
-select *,octet_length(t) from test;
-select *,position('L' in t) from test;
-select *,substring(t from 3 for 4) from test;
TV("gbk", "\x80\""),
TV("gbk", "\x80\\"),
- TV("mule_internal", "\\\x9c';\0;"),
-
TV("sql_ascii", "1\xC0'"),
/*
invalid, NUL byte | \xe4dede00 | \xc6cfcf | \x00 | invalid byte sequence for encoding "ISO_8859_5": 0x00
(5 rows)
-select description, inbytes, (test_conv(inbytes, 'iso8859_5', 'mule_internal')).* from iso8859_5_inputs;
- description | inbytes | result | errorat | error
--------------------+------------+----------------+----------+-------------------------------------------------------
- valid, pure ASCII | \x666f6f | \x666f6f | |
- valid | \xe4dede | \x8bc68bcf8bcf | |
- invalid, NUL byte | \x00 | \x | \x00 | invalid byte sequence for encoding "ISO_8859_5": 0x00
- invalid, NUL byte | \xe400dede | \x8bc6 | \x00dede | invalid byte sequence for encoding "ISO_8859_5": 0x00
- invalid, NUL byte | \xe4dede00 | \x8bc68bcf8bcf | \x00 | invalid byte sequence for encoding "ISO_8859_5": 0x00
-(5 rows)
-
--
-- Big5
--
invalid, NUL byte | \x666f6fb64800 | \x666f6fe8b1a1 | \x00 | invalid byte sequence for encoding "BIG5": 0x00
(5 rows)
-select description, inbytes, (test_conv(inbytes, 'big5', 'mule_internal')).* from big5_inputs;
- description | inbytes | result | errorat | error
---------------------------------+----------------+----------------+----------+------------------------------------------------------
- valid, pure ASCII | \x666f6f | \x666f6f | |
- valid | \x666f6fb648 | \x666f6f95e2af | |
- valid, no translation to UTF-8 | \x666f6fa27f | \x666f6f95a3c1 | |
- invalid, NUL byte | \x666f6fb60048 | \x666f6f | \xb60048 | invalid byte sequence for encoding "BIG5": 0xb6 0x00
- invalid, NUL byte | \x666f6fb64800 | \x666f6f95e2af | \x00 | invalid byte sequence for encoding "BIG5": 0x00
-(5 rows)
-
---
--- MULE_INTERNAL
---
-CREATE TABLE mic_inputs (inbytes bytea, description text);
-insert into mic_inputs values
- ('\x666f6f', 'valid, pure ASCII'),
- ('\x8bc68bcf8bcf', 'valid (in KOI8R)'),
- ('\x8bc68bcf8b', 'invalid,incomplete char'),
- ('\x92bedd', 'valid (in SHIFT_JIS)'),
- ('\x92be', 'invalid, incomplete char)'),
- ('\x666f6f95a3c1', 'valid (in Big5)'),
- ('\x666f6f95a3', 'invalid, incomplete char'),
- ('\x9200bedd', 'invalid, NUL byte'),
- ('\x92bedd00', 'invalid, NUL byte'),
- ('\x8b00c68bcf8bcf', 'invalid, NUL byte');
--- Test MULE_INTERNAL verification
-select description, inbytes, (test_conv(inbytes, 'mule_internal', 'mule_internal')).* from mic_inputs;
- description | inbytes | result | errorat | error
----------------------------+------------------+----------------+------------------+--------------------------------------------------------------------
- valid, pure ASCII | \x666f6f | \x666f6f | |
- valid (in KOI8R) | \x8bc68bcf8bcf | \x8bc68bcf8bcf | |
- invalid,incomplete char | \x8bc68bcf8b | \x8bc68bcf | \x8b | invalid byte sequence for encoding "MULE_INTERNAL": 0x8b
- valid (in SHIFT_JIS) | \x92bedd | \x92bedd | |
- invalid, incomplete char) | \x92be | \x | \x92be | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0xbe
- valid (in Big5) | \x666f6f95a3c1 | \x666f6f95a3c1 | |
- invalid, incomplete char | \x666f6f95a3 | \x666f6f | \x95a3 | invalid byte sequence for encoding "MULE_INTERNAL": 0x95 0xa3
- invalid, NUL byte | \x9200bedd | \x | \x9200bedd | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0x00 0xbe
- invalid, NUL byte | \x92bedd00 | \x92bedd | \x00 | invalid byte sequence for encoding "MULE_INTERNAL": 0x00
- invalid, NUL byte | \x8b00c68bcf8bcf | \x | \x8b00c68bcf8bcf | invalid byte sequence for encoding "MULE_INTERNAL": 0x8b 0x00
-(10 rows)
-
--- Test conversions from MULE_INTERNAL
-select description, inbytes, (test_conv(inbytes, 'mule_internal', 'koi8r')).* from mic_inputs;
- description | inbytes | result | errorat | error
----------------------------+------------------+----------+------------------+---------------------------------------------------------------------------------------------------------------
- valid, pure ASCII | \x666f6f | \x666f6f | |
- valid (in KOI8R) | \x8bc68bcf8bcf | \xc6cfcf | |
- invalid,incomplete char | \x8bc68bcf8b | \xc6cf | \x8b | invalid byte sequence for encoding "MULE_INTERNAL": 0x8b
- valid (in SHIFT_JIS) | \x92bedd | \x | \x92bedd | character with byte sequence 0x92 0xbe 0xdd in encoding "MULE_INTERNAL" has no equivalent in encoding "KOI8R"
- invalid, incomplete char) | \x92be | \x | \x92be | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0xbe
- valid (in Big5) | \x666f6f95a3c1 | \x666f6f | \x95a3c1 | character with byte sequence 0x95 0xa3 0xc1 in encoding "MULE_INTERNAL" has no equivalent in encoding "KOI8R"
- invalid, incomplete char | \x666f6f95a3 | \x666f6f | \x95a3 | invalid byte sequence for encoding "MULE_INTERNAL": 0x95 0xa3
- invalid, NUL byte | \x9200bedd | \x | \x9200bedd | character with byte sequence 0x92 0x00 0xbe in encoding "MULE_INTERNAL" has no equivalent in encoding "KOI8R"
- invalid, NUL byte | \x92bedd00 | \x | \x92bedd00 | character with byte sequence 0x92 0xbe 0xdd in encoding "MULE_INTERNAL" has no equivalent in encoding "KOI8R"
- invalid, NUL byte | \x8b00c68bcf8bcf | \x | \x8b00c68bcf8bcf | character with byte sequence 0x8b 0x00 in encoding "MULE_INTERNAL" has no equivalent in encoding "KOI8R"
-(10 rows)
-
-select description, inbytes, (test_conv(inbytes, 'mule_internal', 'iso8859-5')).* from mic_inputs;
- description | inbytes | result | errorat | error
----------------------------+------------------+----------+------------------+--------------------------------------------------------------------------------------------------------------------
- valid, pure ASCII | \x666f6f | \x666f6f | |
- valid (in KOI8R) | \x8bc68bcf8bcf | \xe4dede | |
- invalid,incomplete char | \x8bc68bcf8b | \xe4de | \x8b | invalid byte sequence for encoding "MULE_INTERNAL": 0x8b
- valid (in SHIFT_JIS) | \x92bedd | \x | \x92bedd | character with byte sequence 0x92 0xbe 0xdd in encoding "MULE_INTERNAL" has no equivalent in encoding "ISO_8859_5"
- invalid, incomplete char) | \x92be | \x | \x92be | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0xbe
- valid (in Big5) | \x666f6f95a3c1 | \x666f6f | \x95a3c1 | character with byte sequence 0x95 0xa3 0xc1 in encoding "MULE_INTERNAL" has no equivalent in encoding "ISO_8859_5"
- invalid, incomplete char | \x666f6f95a3 | \x666f6f | \x95a3 | invalid byte sequence for encoding "MULE_INTERNAL": 0x95 0xa3
- invalid, NUL byte | \x9200bedd | \x | \x9200bedd | character with byte sequence 0x92 0x00 0xbe in encoding "MULE_INTERNAL" has no equivalent in encoding "ISO_8859_5"
- invalid, NUL byte | \x92bedd00 | \x | \x92bedd00 | character with byte sequence 0x92 0xbe 0xdd in encoding "MULE_INTERNAL" has no equivalent in encoding "ISO_8859_5"
- invalid, NUL byte | \x8b00c68bcf8bcf | \x | \x8b00c68bcf8bcf | character with byte sequence 0x8b 0x00 in encoding "MULE_INTERNAL" has no equivalent in encoding "ISO_8859_5"
-(10 rows)
-
-select description, inbytes, (test_conv(inbytes, 'mule_internal', 'sjis')).* from mic_inputs;
- description | inbytes | result | errorat | error
----------------------------+------------------+----------+------------------+--------------------------------------------------------------------------------------------------------------
- valid, pure ASCII | \x666f6f | \x666f6f | |
- valid (in KOI8R) | \x8bc68bcf8bcf | \x | \x8bc68bcf8bcf | character with byte sequence 0x8b 0xc6 in encoding "MULE_INTERNAL" has no equivalent in encoding "SJIS"
- invalid,incomplete char | \x8bc68bcf8b | \x | \x8bc68bcf8b | character with byte sequence 0x8b 0xc6 in encoding "MULE_INTERNAL" has no equivalent in encoding "SJIS"
- valid (in SHIFT_JIS) | \x92bedd | \x8fdb | |
- invalid, incomplete char) | \x92be | \x | \x92be | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0xbe
- valid (in Big5) | \x666f6f95a3c1 | \x666f6f | \x95a3c1 | character with byte sequence 0x95 0xa3 0xc1 in encoding "MULE_INTERNAL" has no equivalent in encoding "SJIS"
- invalid, incomplete char | \x666f6f95a3 | \x666f6f | \x95a3 | invalid byte sequence for encoding "MULE_INTERNAL": 0x95 0xa3
- invalid, NUL byte | \x9200bedd | \x | \x9200bedd | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0x00 0xbe
- invalid, NUL byte | \x92bedd00 | \x8fdb | \x00 | invalid byte sequence for encoding "MULE_INTERNAL": 0x00
- invalid, NUL byte | \x8b00c68bcf8bcf | \x | \x8b00c68bcf8bcf | invalid byte sequence for encoding "MULE_INTERNAL": 0x8b 0x00
-(10 rows)
-
-select description, inbytes, (test_conv(inbytes, 'mule_internal', 'big5')).* from mic_inputs;
- description | inbytes | result | errorat | error
----------------------------+------------------+--------------+------------------+--------------------------------------------------------------------------------------------------------------
- valid, pure ASCII | \x666f6f | \x666f6f | |
- valid (in KOI8R) | \x8bc68bcf8bcf | \x | \x8bc68bcf8bcf | character with byte sequence 0x8b 0xc6 in encoding "MULE_INTERNAL" has no equivalent in encoding "BIG5"
- invalid,incomplete char | \x8bc68bcf8b | \x | \x8bc68bcf8b | character with byte sequence 0x8b 0xc6 in encoding "MULE_INTERNAL" has no equivalent in encoding "BIG5"
- valid (in SHIFT_JIS) | \x92bedd | \x | \x92bedd | character with byte sequence 0x92 0xbe 0xdd in encoding "MULE_INTERNAL" has no equivalent in encoding "BIG5"
- invalid, incomplete char) | \x92be | \x | \x92be | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0xbe
- valid (in Big5) | \x666f6f95a3c1 | \x666f6fa2a1 | |
- invalid, incomplete char | \x666f6f95a3 | \x666f6f | \x95a3 | invalid byte sequence for encoding "MULE_INTERNAL": 0x95 0xa3
- invalid, NUL byte | \x9200bedd | \x | \x9200bedd | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0x00 0xbe
- invalid, NUL byte | \x92bedd00 | \x | \x92bedd00 | character with byte sequence 0x92 0xbe 0xdd in encoding "MULE_INTERNAL" has no equivalent in encoding "BIG5"
- invalid, NUL byte | \x8b00c68bcf8bcf | \x | \x8b00c68bcf8bcf | invalid byte sequence for encoding "MULE_INTERNAL": 0x8b 0x00
-(10 rows)
-
-select description, inbytes, (test_conv(inbytes, 'mule_internal', 'euc_jp')).* from mic_inputs;
- description | inbytes | result | errorat | error
----------------------------+------------------+----------+------------------+----------------------------------------------------------------------------------------------------------------
- valid, pure ASCII | \x666f6f | \x666f6f | |
- valid (in KOI8R) | \x8bc68bcf8bcf | \x | \x8bc68bcf8bcf | character with byte sequence 0x8b 0xc6 in encoding "MULE_INTERNAL" has no equivalent in encoding "EUC_JP"
- invalid,incomplete char | \x8bc68bcf8b | \x | \x8bc68bcf8b | character with byte sequence 0x8b 0xc6 in encoding "MULE_INTERNAL" has no equivalent in encoding "EUC_JP"
- valid (in SHIFT_JIS) | \x92bedd | \xbedd | |
- invalid, incomplete char) | \x92be | \x | \x92be | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0xbe
- valid (in Big5) | \x666f6f95a3c1 | \x666f6f | \x95a3c1 | character with byte sequence 0x95 0xa3 0xc1 in encoding "MULE_INTERNAL" has no equivalent in encoding "EUC_JP"
- invalid, incomplete char | \x666f6f95a3 | \x666f6f | \x95a3 | invalid byte sequence for encoding "MULE_INTERNAL": 0x95 0xa3
- invalid, NUL byte | \x9200bedd | \x | \x9200bedd | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0x00 0xbe
- invalid, NUL byte | \x92bedd00 | \xbedd | \x00 | invalid byte sequence for encoding "MULE_INTERNAL": 0x00
- invalid, NUL byte | \x8b00c68bcf8bcf | \x | \x8b00c68bcf8bcf | invalid byte sequence for encoding "MULE_INTERNAL": 0x8b 0x00
-(10 rows)
-
('UTF8', '6 byte, unsupported', '\xfd8283'),
('UTF8', '6 byte, unsupported', '\xfd828384'),
('UTF8', '6 byte, unsupported', '\xfd82838485'),
- ('UTF8', '6 byte, unsupported', '\xfd8283848586'),
- -- MULE_INTERNAL
- -- 2 81..8d LC1
- -- 3 90..99 LC2
- ('MULE_INTERNAL', 'ASCII', 'a'),
- ('MULE_INTERNAL', 'LC1, short', '\x81'),
- ('MULE_INTERNAL', 'LC1', '\x8182'),
- ('MULE_INTERNAL', 'LC2, short', '\x90'),
- ('MULE_INTERNAL', 'LC2, short', '\x9082'),
- ('MULE_INTERNAL', 'LC2', '\x908283');
+ ('UTF8', '6 byte, unsupported', '\xfd8283848586');
SELECT COUNT(test_encoding(encoding, description, input)) > 0
FROM encoding_tests;
NOTICE: LATIN1 ASCII: \x61 -> {97} -> \x61 = OK
NOTICE: UTF8 6 byte, unsupported: \xfd828384 -> {253,130,131,132} -> \xc3bdc282c283c284 = failed
NOTICE: UTF8 6 byte, unsupported: \xfd82838485 -> {253,130,131,132,133} -> \xc3bdc282c283c284c285 = failed
NOTICE: UTF8 6 byte, unsupported: \xfd8283848586 -> {253,130,131,132,133,134} -> \xc3bdc282c283c284c285c286 = failed
-NOTICE: MULE_INTERNAL ASCII: \x61 -> {97} -> \x61 = OK
-NOTICE: MULE_INTERNAL LC1, short: \x81 -> {} -> \x = truncated
-NOTICE: MULE_INTERNAL LC1: \x8182 -> {8454274} -> \x8182 = OK
-NOTICE: MULE_INTERNAL LC2, short: \x90 -> {} -> \x = truncated
-NOTICE: MULE_INTERNAL LC2, short: \x9082 -> {} -> \x = truncated
-NOTICE: MULE_INTERNAL LC2: \x908283 -> {9470595} -> \x908283 = OK
?column?
----------
t
('collation', '{default}', '{}'),
('table constraint', '{addr_nsp, gentable, a_chk}', '{}'),
('domain constraint', '{addr_nsp.gendomain}', '{domconstr}'),
- ('conversion', '{pg_catalog, koi8_r_to_mic}', '{}'),
+ ('conversion', '{pg_catalog, koi8_r_to_utf8}', '{}'),
('default value', '{addr_nsp, gentable, b}', '{}'),
('language', '{plpgsql}', '{}'),
-- large object
cast|NULL|NULL|(bigint AS integer)|t
table constraint|addr_nsp|NULL|a_chk on addr_nsp.gentable|t
domain constraint|addr_nsp|NULL|domconstr on addr_nsp.gendomain|t
-conversion|pg_catalog|koi8_r_to_mic|pg_catalog.koi8_r_to_mic|t
+conversion|pg_catalog|koi8_r_to_utf8|pg_catalog.koi8_r_to_utf8|t
language|NULL|plpgsql|plpgsql|t
schema|NULL|addr_nsp|addr_nsp|t
operator class|pg_catalog|int4_ops|pg_catalog.int4_ops USING btree|t
END IF;
EXCEPTION
-- character with byte sequence 0xc2 0xb0 in encoding "UTF8" has no equivalent in encoding "LATIN8"
- WHEN untranslatable_character
- -- default conversion function for encoding "UTF8" to "MULE_INTERNAL" does not exist
- OR undefined_function
+ WHEN undefined_function
-- unsupported XML feature
OR feature_not_supported THEN
RAISE LOG 'skip: %', SQLERRM;
END IF;
EXCEPTION
-- character with byte sequence 0xc2 0xb0 in encoding "UTF8" has no equivalent in encoding "LATIN8"
- WHEN untranslatable_character
- -- default conversion function for encoding "UTF8" to "MULE_INTERNAL" does not exist
- OR undefined_function
+ WHEN undefined_function
-- unsupported XML feature
OR feature_not_supported THEN
RAISE LOG 'skip: %', SQLERRM;
END IF;
EXCEPTION
-- character with byte sequence 0xc2 0xb0 in encoding "UTF8" has no equivalent in encoding "LATIN8"
- WHEN untranslatable_character
- -- default conversion function for encoding "UTF8" to "MULE_INTERNAL" does not exist
- OR undefined_function
+ WHEN undefined_function
-- unsupported XML feature
OR feature_not_supported THEN
RAISE LOG 'skip: %', SQLERRM;
mblen,
valid;
+ if (!PG_VALID_ENCODING(i))
+ continue;
if (pg_encoding_max_length(i) == 1)
continue;
pg_encoding_set_invalid(i, buf);
-- Test conversions from ISO-8859-5
select description, inbytes, (test_conv(inbytes, 'iso8859-5', 'utf8')).* from iso8859_5_inputs;
select description, inbytes, (test_conv(inbytes, 'iso8859-5', 'koi8r')).* from iso8859_5_inputs;
-select description, inbytes, (test_conv(inbytes, 'iso8859_5', 'mule_internal')).* from iso8859_5_inputs;
--
-- Big5
select description, inbytes, (test_conv(inbytes, 'big5', 'big5')).* from big5_inputs;
-- Test conversions from Big5
select description, inbytes, (test_conv(inbytes, 'big5', 'utf8')).* from big5_inputs;
-select description, inbytes, (test_conv(inbytes, 'big5', 'mule_internal')).* from big5_inputs;
-
---
--- MULE_INTERNAL
---
-CREATE TABLE mic_inputs (inbytes bytea, description text);
-insert into mic_inputs values
- ('\x666f6f', 'valid, pure ASCII'),
- ('\x8bc68bcf8bcf', 'valid (in KOI8R)'),
- ('\x8bc68bcf8b', 'invalid,incomplete char'),
- ('\x92bedd', 'valid (in SHIFT_JIS)'),
- ('\x92be', 'invalid, incomplete char)'),
- ('\x666f6f95a3c1', 'valid (in Big5)'),
- ('\x666f6f95a3', 'invalid, incomplete char'),
- ('\x9200bedd', 'invalid, NUL byte'),
- ('\x92bedd00', 'invalid, NUL byte'),
- ('\x8b00c68bcf8bcf', 'invalid, NUL byte');
-
--- Test MULE_INTERNAL verification
-select description, inbytes, (test_conv(inbytes, 'mule_internal', 'mule_internal')).* from mic_inputs;
--- Test conversions from MULE_INTERNAL
-select description, inbytes, (test_conv(inbytes, 'mule_internal', 'koi8r')).* from mic_inputs;
-select description, inbytes, (test_conv(inbytes, 'mule_internal', 'iso8859-5')).* from mic_inputs;
-select description, inbytes, (test_conv(inbytes, 'mule_internal', 'sjis')).* from mic_inputs;
-select description, inbytes, (test_conv(inbytes, 'mule_internal', 'big5')).* from mic_inputs;
-select description, inbytes, (test_conv(inbytes, 'mule_internal', 'euc_jp')).* from mic_inputs;
('UTF8', '6 byte, unsupported', '\xfd8283'),
('UTF8', '6 byte, unsupported', '\xfd828384'),
('UTF8', '6 byte, unsupported', '\xfd82838485'),
- ('UTF8', '6 byte, unsupported', '\xfd8283848586'),
- -- MULE_INTERNAL
- -- 2 81..8d LC1
- -- 3 90..99 LC2
- ('MULE_INTERNAL', 'ASCII', 'a'),
- ('MULE_INTERNAL', 'LC1, short', '\x81'),
- ('MULE_INTERNAL', 'LC1', '\x8182'),
- ('MULE_INTERNAL', 'LC2, short', '\x90'),
- ('MULE_INTERNAL', 'LC2, short', '\x9082'),
- ('MULE_INTERNAL', 'LC2', '\x908283');
+ ('UTF8', '6 byte, unsupported', '\xfd8283848586');
SELECT COUNT(test_encoding(encoding, description, input)) > 0
FROM encoding_tests;
('collation', '{default}', '{}'),
('table constraint', '{addr_nsp, gentable, a_chk}', '{}'),
('domain constraint', '{addr_nsp.gendomain}', '{domconstr}'),
- ('conversion', '{pg_catalog, koi8_r_to_mic}', '{}'),
+ ('conversion', '{pg_catalog, koi8_r_to_utf8}', '{}'),
('default value', '{addr_nsp, gentable, b}', '{}'),
('language', '{plpgsql}', '{}'),
-- large object
END IF;
EXCEPTION
-- character with byte sequence 0xc2 0xb0 in encoding "UTF8" has no equivalent in encoding "LATIN8"
- WHEN untranslatable_character
- -- default conversion function for encoding "UTF8" to "MULE_INTERNAL" does not exist
- OR undefined_function
+ WHEN undefined_function
-- unsupported XML feature
OR feature_not_supported THEN
RAISE LOG 'skip: %', SQLERRM;