]> git.ipfire.org Git - thirdparty/postgresql.git/commitdiff
Implement a solution to the 'Turkish locale downcases I incorrectly'
authorTom Lane <tgl@sss.pgh.pa.us>
Sat, 21 Feb 2004 00:35:13 +0000 (00:35 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Sat, 21 Feb 2004 00:35:13 +0000 (00:35 +0000)
problem, per previous discussion.  Make some additional changes to
centralize the knowledge of just how identifier downcasing is done,
in hopes of simplifying any future tweaking in this area.

src/backend/commands/define.c
src/backend/commands/functioncmds.c
src/backend/commands/proclang.c
src/backend/parser/keywords.c
src/backend/parser/scan.l
src/backend/parser/scansup.c
src/backend/utils/adt/varlena.c
src/include/commands/defrem.h
src/include/parser/scansup.h
src/pl/plpgsql/src/pl_funcs.c

index 4ac687259da3eedf50c97a65a58952c4b665bfde..68c3248e5418f034f31c5e87e177c6d58142d30f 100644 (file)
@@ -9,7 +9,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/commands/define.c,v 1.84 2003/08/04 02:39:58 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/commands/define.c,v 1.84.4.1 2004/02/21 00:35:13 tgl Exp $
  *
  * DESCRIPTION
  *       The "DefineFoo" routines take the parse tree and pick out the
 #include "catalog/namespace.h"
 #include "commands/defrem.h"
 #include "parser/parse_type.h"
+#include "parser/scansup.h"
 #include "utils/int8.h"
 
 
 /*
- * Translate the input language name to lower case.
+ * Translate the input language name to lower case, and truncate if needed.
  *
- * Output buffer must be NAMEDATALEN long.
+ * Returns a palloc'd string
  */
-void
-case_translate_language_name(const char *input, char *output)
+char *
+case_translate_language_name(const char *input)
 {
-       int                     i;
-
-       MemSet(output, 0, NAMEDATALEN);         /* ensure result Name is
-                                                                                * zero-filled */
-
-       for (i = 0; i < NAMEDATALEN - 1 && input[i]; ++i)
-               output[i] = tolower((unsigned char) input[i]);
+       return downcase_truncate_identifier(input, strlen(input), false);
 }
 
 
index 35ab80c09a7dc1bbf640a0466e60d74493ab1b6b..ce5b2cc2bf647850881f09aa42e5fa4a8af7b577 100644 (file)
@@ -9,7 +9,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/commands/functioncmds.c,v 1.38 2003/10/02 06:34:03 petere Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/commands/functioncmds.c,v 1.38.2.1 2004/02/21 00:35:13 tgl Exp $
  *
  * DESCRIPTION
  *       These routines take the parse tree and pick out the
@@ -393,7 +393,7 @@ CreateFunction(CreateFunctionStmt *stmt)
        Oid                     prorettype;
        bool            returnsSet;
        char       *language;
-       char            languageName[NAMEDATALEN];
+       char       *languageName;
        Oid                     languageOid;
        Oid                     languageValidator;
        char       *funcname;
@@ -428,7 +428,7 @@ CreateFunction(CreateFunctionStmt *stmt)
                           &as_clause, &language, &volatility, &isStrict, &security);
 
        /* Convert language name to canonical case */
-       case_translate_language_name(language, languageName);
+       languageName = case_translate_language_name(language);
 
        /* Look up the language and validate permissions */
        languageTuple = SearchSysCache(LANGNAME,
index 09325d647ca6351f4e3f508ee29e65456a9e612e..97d7c38a2e91b1143e713c137708afa8a3c86ba3 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/commands/proclang.c,v 1.51 2003/10/02 06:34:03 petere Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/commands/proclang.c,v 1.51.2.1 2004/02/21 00:35:13 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 void
 CreateProceduralLanguage(CreatePLangStmt *stmt)
 {
-       char            languageName[NAMEDATALEN];
+       char       *languageName;
        Oid                     procOid,
                                valProcOid;
        Oid                     funcrettype;
        Oid                     typev[FUNC_MAX_ARGS];
+       NameData        langname;
        char            nulls[Natts_pg_language];
        Datum           values[Natts_pg_language];
        Relation        rel;
@@ -66,7 +67,7 @@ CreateProceduralLanguage(CreatePLangStmt *stmt)
         * Translate the language name and check that this language doesn't
         * already exist
         */
-       case_translate_language_name(stmt->plname, languageName);
+       languageName = case_translate_language_name(stmt->plname);
 
        if (SearchSysCacheExists(LANGNAME,
                                                         PointerGetDatum(languageName),
@@ -124,12 +125,13 @@ CreateProceduralLanguage(CreatePLangStmt *stmt)
        }
 
        i = 0;
-       values[i++] = PointerGetDatum(languageName);
-       values[i++] = BoolGetDatum(true);       /* lanispl */
-       values[i++] = BoolGetDatum(stmt->pltrusted);
-       values[i++] = ObjectIdGetDatum(procOid);
-       values[i++] = ObjectIdGetDatum(valProcOid);
-       nulls[i] = 'n';                         /* lanacl */
+       namestrcpy(&langname, languageName);
+       values[i++] = NameGetDatum(&langname);                  /* lanname */
+       values[i++] = BoolGetDatum(true);                               /* lanispl */
+       values[i++] = BoolGetDatum(stmt->pltrusted);    /* lanpltrusted */
+       values[i++] = ObjectIdGetDatum(procOid);                /* lanplcallfoid */
+       values[i++] = ObjectIdGetDatum(valProcOid);             /* lanvalidator */
+       nulls[i] = 'n';                                                                 /* lanacl */
 
        rel = heap_openr(LanguageRelationName, RowExclusiveLock);
 
@@ -173,7 +175,7 @@ CreateProceduralLanguage(CreatePLangStmt *stmt)
 void
 DropProceduralLanguage(DropPLangStmt *stmt)
 {
-       char            languageName[NAMEDATALEN];
+       char       *languageName;
        HeapTuple       langTup;
        ObjectAddress object;
 
@@ -189,7 +191,7 @@ DropProceduralLanguage(DropPLangStmt *stmt)
         * Translate the language name, check that this language exist and is
         * a PL
         */
-       case_translate_language_name(stmt->plname, languageName);
+       languageName = case_translate_language_name(stmt->plname);
 
        langTup = SearchSysCache(LANGNAME,
                                                         CStringGetDatum(languageName),
index c4048b4c1d89d770f6e12c88540f0d620bf23c83..f4f454715c613d86cd6e54c48aa55131d2688020 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/parser/keywords.c,v 1.141 2003/08/04 02:40:01 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/parser/keywords.c,v 1.141.4.1 2004/02/21 00:35:13 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -365,17 +365,13 @@ ScanKeywordLookup(const char *text)
 
        /*
         * Apply an ASCII-only downcasing.      We must not use tolower() since it
-        * may produce the wrong translation in some locales (eg, Turkish),
-        * and we don't trust isupper() very much either.  In an ASCII-based
-        * encoding the tests against A and Z are sufficient, but we also
-        * check isupper() so that we will work correctly under EBCDIC.  The
-        * actual case conversion step should work for either ASCII or EBCDIC.
+        * may produce the wrong translation in some locales (eg, Turkish).
         */
        for (i = 0; i < len; i++)
        {
                char            ch = text[i];
 
-               if (ch >= 'A' && ch <= 'Z' && isupper((unsigned char) ch))
+               if (ch >= 'A' && ch <= 'Z')
                        ch += 'a' - 'A';
                word[i] = ch;
        }
index b10d4531851eff878a895e2fd43a08b705d7093d..c3a423a7f4a320fdb9e5c673b9aacd737d7f9114 100644 (file)
@@ -9,7 +9,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/parser/scan.l,v 1.111 2003/10/09 19:13:23 petere Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/parser/scan.l,v 1.111.2.1 2004/02/21 00:35:13 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -26,6 +26,7 @@
 #include "parser/keywords.h"
 /* Not needed now that this file is compiled as part of gram.y */
 /* #include "parser/parse.h" */
+#include "parser/scansup.h"
 #include "utils/builtins.h"
 #include "mb/pg_wchar.h"
 
@@ -394,23 +395,15 @@ other                     .
                                        startlit();
                                }
 <xd>{xdstop}   {
+                                       char               *ident;
+
                                        BEGIN(INITIAL);
                                        if (literallen == 0)
                                                yyerror("zero-length delimited identifier");
+                                       ident = litbufdup();
                                        if (literallen >= NAMEDATALEN)
-                                       {
-                                               int len;
-
-                                               len = pg_mbcliplen(literalbuf, literallen,
-                                                                                  NAMEDATALEN-1);
-                                               ereport(NOTICE,
-                                                               (errcode(ERRCODE_NAME_TOO_LONG),
-                                                                errmsg("identifier \"%s\" will be truncated to \"%.*s\"",
-                                                                               literalbuf, len, literalbuf)));
-                                               literalbuf[len] = '\0';
-                                               literallen = len;
-                                       }
-                                       yylval.str = litbufdup();
+                                               truncate_identifier(ident, literallen, true);
+                                       yylval.str = ident;
                                        return IDENT;
                                }
 <xd>{xddouble} {
@@ -532,7 +525,6 @@ other                       .
 {identifier}   {
                                        const ScanKeyword *keyword;
                                        char               *ident;
-                                       int                             i;
 
                                        /* Is it a keyword? */
                                        keyword = ScanKeywordLookup(yytext);
@@ -545,28 +537,8 @@ other                      .
                                        /*
                                         * No.  Convert the identifier to lower case, and truncate
                                         * if necessary.
-                                        *
-                                        * Note: here we use a locale-dependent case conversion,
-                                        * which seems appropriate under standard SQL rules, whereas
-                                        * the keyword comparison was NOT locale-dependent.
                                         */
-                                       ident = pstrdup(yytext);
-                                       for (i = 0; ident[i]; i++)
-                                       {
-                                               if (isupper((unsigned char) ident[i]))
-                                                       ident[i] = tolower((unsigned char) ident[i]);
-                                       }
-                                       if (i >= NAMEDATALEN)
-                    {
-                                               int len;
-
-                                               len = pg_mbcliplen(ident, i, NAMEDATALEN-1);
-                                               ereport(NOTICE,
-                                                               (errcode(ERRCODE_NAME_TOO_LONG),
-                                                                errmsg("identifier \"%s\" will be truncated to \"%.*s\"",
-                                                                               ident, len, ident)));
-                                               ident[len] = '\0';
-                    }
+                                       ident = downcase_truncate_identifier(yytext, yyleng, true);
                                        yylval.str = ident;
                                        return IDENT;
                                }
index e00d284edcc81cd615c51b8e575e764e261a466b..14467dcdc775ec8192b83b77913d6c2eeb1fe17b 100644 (file)
@@ -9,7 +9,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/parser/scansup.c,v 1.24 2003/08/04 02:40:02 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/parser/scansup.c,v 1.24.4.1 2004/02/21 00:35:13 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -19,6 +19,8 @@
 
 #include "miscadmin.h"
 #include "parser/scansup.h"
+#include "mb/pg_wchar.h"
+
 
 /* ----------------
  *             scanstr
@@ -32,7 +34,7 @@
  */
 
 char *
-scanstr(char *s)
+scanstr(const char *s)
 {
        char       *newStr;
        int                     len,
@@ -109,3 +111,75 @@ scanstr(char *s)
        newStr[j] = '\0';
        return newStr;
 }
+
+
+/*
+ * downcase_truncate_identifier() --- do appropriate downcasing and
+ * truncation of an unquoted identifier.  Optionally warn of truncation.
+ *
+ * Returns a palloc'd string containing the adjusted identifier.
+ *
+ * Note: in some usages the passed string is not null-terminated.
+ *
+ * Note: the API of this function is designed to allow for downcasing
+ * transformations that increase the string length, but we don't yet
+ * support that.  If you want to implement it, you'll need to fix
+ * SplitIdentifierString() in utils/adt/varlena.c.
+ */
+char *
+downcase_truncate_identifier(const char *ident, int len, bool warn)
+{
+       char       *result;
+       int                     i;
+
+       result = palloc(len + 1);
+       /*
+        * SQL99 specifies Unicode-aware case normalization, which we don't yet
+        * have the infrastructure for.  Instead we use tolower() to provide a
+        * locale-aware translation.  However, there are some locales where this
+        * is not right either (eg, Turkish may do strange things with 'i' and
+        * 'I').  Our current compromise is to use tolower() for characters with
+        * the high bit set, and use an ASCII-only downcasing for 7-bit
+        * characters.
+        */
+       for (i = 0; i < len; i++)
+       {
+               unsigned char   ch = (unsigned char) ident[i];
+
+               if (ch >= 'A' && ch <= 'Z')
+                       ch += 'a' - 'A';
+               else if (ch >= 0x80 && isupper(ch))
+                       ch = tolower(ch);
+               result[i] = (char) ch;
+       }
+       result[i] = '\0';
+
+       if (i >= NAMEDATALEN)
+               truncate_identifier(result, i, warn);
+
+       return result;
+}
+
+/*
+ * truncate_identifier() --- truncate an identifier to NAMEDATALEN-1 bytes.
+ *
+ * The given string is modified in-place, if necessary.  A warning is
+ * issued if requested.
+ *
+ * We require the caller to pass in the string length since this saves a
+ * strlen() call in some common usages.
+ */
+void
+truncate_identifier(char *ident, int len, bool warn)
+{
+       if (len >= NAMEDATALEN)
+       {
+               len = pg_mbcliplen(ident, len, NAMEDATALEN-1);
+               if (warn)
+                       ereport(NOTICE,
+                                       (errcode(ERRCODE_NAME_TOO_LONG),
+                                        errmsg("identifier \"%s\" will be truncated to \"%.*s\"",
+                                                       ident, len, ident)));
+               ident[len] = '\0';
+       }
+}
index caf0250e886a93b1527e319c3e4c90260c3b9a5c..7b17b50aec46cc23db5b3182c6ef88f03021939d 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/utils/adt/varlena.c,v 1.106.2.3 2004/01/31 00:45:34 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/utils/adt/varlena.c,v 1.106.2.4 2004/02/21 00:35:13 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 
 #include <ctype.h>
 
-#include "mb/pg_wchar.h"
-#include "miscadmin.h"
 #include "access/tuptoaster.h"
 #include "catalog/pg_type.h"
 #include "lib/stringinfo.h"
 #include "libpq/crypt.h"
 #include "libpq/pqformat.h"
+#include "mb/pg_wchar.h"
+#include "miscadmin.h"
+#include "parser/scansup.h"
 #include "utils/array.h"
 #include "utils/builtins.h"
-#include "utils/pg_locale.h"
 #include "utils/lsyscache.h"
+#include "utils/pg_locale.h"
 
 
 typedef struct varlena unknown;
@@ -1681,7 +1682,6 @@ SplitIdentifierString(char *rawstring, char separator,
        {
                char       *curname;
                char       *endp;
-               int                     curlen;
 
                if (*nextp == '\"')
                {
@@ -1704,21 +1704,30 @@ SplitIdentifierString(char *rawstring, char separator,
                else
                {
                        /* Unquoted name --- extends to separator or whitespace */
+                       char       *downname;
+                       int                     len;
+
                        curname = nextp;
                        while (*nextp && *nextp != separator &&
                                   !isspace((unsigned char) *nextp))
-                       {
-                               /*
-                                * It's important that this match the identifier
-                                * downcasing code used by backend/parser/scan.l.
-                                */
-                               if (isupper((unsigned char) *nextp))
-                                       *nextp = tolower((unsigned char) *nextp);
                                nextp++;
-                       }
                        endp = nextp;
                        if (curname == nextp)
                                return false;   /* empty unquoted name not allowed */
+                       /*
+                        * Downcase the identifier, using same code as main lexer does.
+                        *
+                        * XXX because we want to overwrite the input in-place, we cannot
+                        * support a downcasing transformation that increases the
+                        * string length.  This is not a problem given the current
+                        * implementation of downcase_truncate_identifier, but we'll
+                        * probably have to do something about this someday.
+                        */
+                       len = endp - curname;
+                       downname = downcase_truncate_identifier(curname, len, false);
+                       Assert(strlen(downname) <= len);
+                       strncpy(curname, downname, len);
+                       pfree(downname);
                }
 
                while (isspace((unsigned char) *nextp))
@@ -1739,13 +1748,8 @@ SplitIdentifierString(char *rawstring, char separator,
                /* Now safe to overwrite separator with a null */
                *endp = '\0';
 
-               /* Truncate name if it's overlength; again, should match scan.l */
-               curlen = strlen(curname);
-               if (curlen >= NAMEDATALEN)
-               {
-                       curlen = pg_mbcliplen(curname, curlen, NAMEDATALEN - 1);
-                       curname[curlen] = '\0';
-               }
+               /* Truncate name if it's overlength */
+               truncate_identifier(curname, strlen(curname), false);
 
                /*
                 * Finished isolating current name --- add it to list
index e192c868fa037f2cdc32f1c135eb39d9391f11cc..f6a5da4531d2dfac1c22651527fd27c04520260d 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: defrem.h,v 1.52 2003/08/04 02:40:13 momjian Exp $
+ * $Id: defrem.h,v 1.52.4.1 2004/02/21 00:35:13 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -61,7 +61,7 @@ extern void RenameOpClass(List *name, const char *access_method, const char *new
 
 /* support routines in commands/define.c */
 
-extern void case_translate_language_name(const char *input, char *output);
+extern char *case_translate_language_name(const char *input);
 
 extern char *defGetString(DefElem *def);
 extern double defGetNumeric(DefElem *def);
index 12b8794d28d51fbb2223c3009b3cd882fb5eb450..ef4e1179a99caf45eb03a8fa682231bd7ec95a4b 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: scansup.h,v 1.13 2003/08/04 02:40:14 momjian Exp $
+ * $Id: scansup.h,v 1.13.4.1 2004/02/21 00:35:13 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 #ifndef SCANSUP_H
 #define SCANSUP_H
 
-extern char *scanstr(char *s);
+extern char *scanstr(const char *s);
+
+extern char *downcase_truncate_identifier(const char *ident, int len,
+                                                                                 bool warn);
+
+extern void truncate_identifier(char *ident, int len, bool warn);
 
 #endif   /* SCANSUP_H */
index c47da263099589a669ca8942b2d0bb346cc48e87..1f1e0f389107d195e227b70f82446fe5d9145106 100644 (file)
@@ -3,7 +3,7 @@
  *                       procedural language
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/pl/plpgsql/src/pl_funcs.c,v 1.30 2003/09/25 23:02:12 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/pl/plpgsql/src/pl_funcs.c,v 1.30.2.1 2004/02/21 00:35:13 tgl Exp $
  *
  *       This software is copyrighted by Jan Wieck - Hamburg.
  *
@@ -40,7 +40,7 @@
 
 #include <ctype.h>
 
-#include "mb/pg_wchar.h"
+#include "parser/scansup.h"
 
 
 /* ----------
@@ -348,15 +348,15 @@ plpgsql_convert_ident(const char *s, char **output, int numidents)
        {
                char       *curident;
                char       *cp;
-               int                     i;
 
                /* Process current identifier */
-               curident = palloc(strlen(s) + 1);               /* surely enough room */
-               cp = curident;
 
                if (*s == '"')
                {
                        /* Quoted identifier: copy, collapsing out doubled quotes */
+
+                       curident = palloc(strlen(s) + 1); /* surely enough room */
+                       cp = curident;
                        s++;
                        while (*s)
                        {
@@ -373,35 +373,20 @@ plpgsql_convert_ident(const char *s, char **output, int numidents)
                                                (errcode(ERRCODE_SYNTAX_ERROR),
                                                 errmsg("unterminated \" in name: %s", sstart)));
                        s++;
+                       *cp = '\0';
+                       /* Truncate to NAMEDATALEN */
+                       truncate_identifier(curident, cp-curident, false);
                }
                else
                {
-                       /*
-                        * Normal identifier: downcase, stop at dot or whitespace.
-                        *
-                        * Note that downcasing is locale-sensitive, following SQL99
-                        * rules for identifiers.  We have already decided that the
-                        * item is not a PLPGSQL keyword.
-                        */
-                       while (*s && *s != '.' && !isspace((unsigned char) *s))
-                       {
-                               if (isupper((unsigned char) *s))
-                                       *cp++ = tolower((unsigned char) *s++);
-                               else
-                                       *cp++ = *s++;
-                       }
-               }
-
-               /* Truncate to NAMEDATALEN */
-               *cp = '\0';
-               i = cp - curident;
-
-               if (i >= NAMEDATALEN)
-               {
-                       int                     len;
+                       /* Normal identifier: extends till dot or whitespace */
+                       const char *thisstart = s;
 
-                       len = pg_mbcliplen(curident, i, NAMEDATALEN - 1);
-                       curident[len] = '\0';
+                       while (*s && *s != '.' && !isspace((unsigned char) *s))
+                               s++;
+                       /* Downcase and truncate to NAMEDATALEN */
+                       curident = downcase_truncate_identifier(thisstart, s-thisstart,
+                                                                                                       false);
                }
 
                /* Pass ident to caller */