From: Teodor Sigaev Date: Mon, 2 Mar 2009 15:13:17 +0000 (+0000) Subject: Fix usage of char2wchar/wchar2char. Changes: X-Git-Tag: REL8_2_13~8 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=8fcdac99876df0bd04b8dd48fc37d8a286231033;p=thirdparty%2Fpostgresql.git Fix usage of char2wchar/wchar2char. Changes: - pg_wchar and wchar_t could have different size, so char2wchar doesn't call pg_mb2wchar_with_len to prevent out-of-bound memory bug - make char2wchar/wchar2char symmetric, now they should not be called with C-locale because mbstowcs/wcstombs oftenly doesn't work correct with C-locale. - Text parser uses pg_mb2wchar_with_len directly in case of C-locale and multibyte encoding Per bug report by Hiroshi Inoue and following discussion. Backpatch up to 8.2 when multybyte support was implemented in tsearch. --- diff --git a/contrib/tsearch2/ts_locale.c b/contrib/tsearch2/ts_locale.c index cb022d7e2a4..46e6a1ac007 100644 --- a/contrib/tsearch2/ts_locale.c +++ b/contrib/tsearch2/ts_locale.c @@ -64,15 +64,8 @@ char2wchar(wchar_t *to, const char *from, size_t len) } else #endif /* WIN32 */ - if ( lc_ctype_is_c() ) - { - /* - * pg_mb2wchar_with_len always adds trailing '\0', so - * 'to' should be allocated with sufficient space - */ - return pg_mb2wchar_with_len(from, (pg_wchar *)to, len); - } + Assert( !lc_ctype_is_c() ); return mbstowcs(to, from, len); } diff --git a/contrib/tsearch2/wordparser/parser.c b/contrib/tsearch2/wordparser/parser.c index af58f59a994..19b2cfd228e 100644 --- a/contrib/tsearch2/wordparser/parser.c +++ b/contrib/tsearch2/wordparser/parser.c @@ -1,4 +1,4 @@ -/* $PostgreSQL: pgsql/contrib/tsearch2/wordparser/parser.c,v 1.11.2.2 2007/03/22 15:59:09 teodor Exp $ */ +/* $PostgreSQL: pgsql/contrib/tsearch2/wordparser/parser.c,v 1.11.2.3 2009/03/02 15:13:17 teodor Exp $ */ #include "postgres.h" @@ -46,12 +46,24 @@ TParserInit(char *str, int len) if (prs->charmaxlen > 1) { prs->usewide = true; - prs->wstr = (wchar_t *) palloc(sizeof(wchar_t) * (prs->lenstr+1)); - prs->lenwstr = char2wchar(prs->wstr, prs->str, prs->lenstr); + if ( lc_ctype_is_c() ) + { + /* + * char2wchar doesn't work for C-locale and + * sizeof(pg_wchar) could be not equal to sizeof(wchar_t) + */ + prs->pgwstr = (pg_wchar*) palloc(sizeof(pg_wchar) * (prs->lenstr + 1)); + pg_mb2wchar_with_len(prs->str, prs->pgwstr, prs->lenstr); + } + else + { + prs->wstr = (wchar_t *) palloc(sizeof(wchar_t) * (prs->lenstr+1)); + prs->lenwstr = char2wchar(prs->wstr, prs->str, prs->lenstr); + } } else -#endif prs->usewide = false; +#endif prs->state = newTParserPosition(NULL); prs->state->state = TPS_Base; @@ -73,17 +85,21 @@ TParserClose(TParser * prs) #ifdef TS_USE_WIDE if (prs->wstr) pfree(prs->wstr); + if (prs->pgwstr) + pfree(prs->pgwstr); #endif pfree(prs); } /* - * defining support function, equvalent is* macroses, but - * working with any possible encodings and locales. Note, - * that with multibyte encoding and C-locale isw* function may fail - * or give wrong result. Note 2: multibyte encoding and C-locale - * often are used for Asian languages. + * Character-type support functions, equivalent to is* macros, but + * working with any possible encodings and locales. Notes: + * - with multibyte encoding and C-locale isw* function may fail + * or give wrong result. + * - multibyte encoding and C-locale often are used for + * Asian languages. + * - if locale is C the we use pgwstr instead of wstr */ #ifdef TS_USE_WIDE @@ -94,8 +110,8 @@ p_is##type(TParser *prs) { \ Assert( prs->state ); \ if ( prs->usewide ) \ { \ - if ( lc_ctype_is_c() ) \ - return is##type( 0xff & *( prs->wstr + prs->state->poschar) ); \ + if ( prs->pgwstr ) \ + return is##type( 0xff & *( prs->pgwstr + prs->state->poschar) );\ \ return isw##type( *(wint_t*)( prs->wstr + prs->state->poschar ) ); \ } \ @@ -115,9 +131,9 @@ p_isalnum(TParser *prs) if (prs->usewide) { - if (lc_ctype_is_c()) + if (prs->pgwstr) { - unsigned int c = *(prs->wstr + prs->state->poschar); + unsigned int c = *(prs->pgwstr + prs->state->poschar); /* * any non-ascii symbol with multibyte encoding @@ -148,9 +164,9 @@ p_isalpha(TParser *prs) if (prs->usewide) { - if (lc_ctype_is_c()) + if (prs->pgwstr) { - unsigned int c = *(prs->wstr + prs->state->poschar); + unsigned int c = *(prs->pgwstr + prs->state->poschar); /* * any non-ascii symbol with multibyte encoding diff --git a/contrib/tsearch2/wordparser/parser.h b/contrib/tsearch2/wordparser/parser.h index c40717a80f8..67c30904952 100644 --- a/contrib/tsearch2/wordparser/parser.h +++ b/contrib/tsearch2/wordparser/parser.h @@ -1,4 +1,4 @@ -/* $PostgreSQL: pgsql/contrib/tsearch2/wordparser/parser.h,v 1.11 2006/03/11 04:38:30 momjian Exp $ */ +/* $PostgreSQL: pgsql/contrib/tsearch2/wordparser/parser.h,v 1.11.2.1 2009/03/02 15:13:17 teodor Exp $ */ #ifndef __PARSER_H__ #define __PARSER_H__ @@ -138,12 +138,13 @@ typedef struct TParser int lenstr; /* length of mbstring */ #ifdef TS_USE_WIDE wchar_t *wstr; /* wide character string */ + pg_wchar *pgwstr; /* wide character string for C-locale */ int lenwstr; /* length of wsting */ + bool usewide; #endif /* State of parse */ int charmaxlen; - bool usewide; TParserPosition *state; bool ignore; bool wanthost;