Consider collation when proving uniqueness from unique indexes

author Richard Guo <rguo@postgresql.org>

Tue, 5 May 2026 01:28:24 +0000 (10:28 +0900)

committer Richard Guo <rguo@postgresql.org>

Tue, 5 May 2026 01:28:24 +0000 (10:28 +0900)
author Richard Guo <rguo@postgresql.org>
Tue, 5 May 2026 01:28:24 +0000 (10:28 +0900)
committer Richard Guo <rguo@postgresql.org>
Tue, 5 May 2026 01:28:24 +0000 (10:28 +0900)
diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c

index 534b1d0c8a4f9efb4a1921ea38ea029e61781b4a..afb9df4763d033624c7d8ea25413dbe6bcbb4ae5 100644 (file)
--- a/src/backend/optimizer/path/indxpath.c
+++ b/src/backend/optimizer/path/indxpath.c
@@ -3536,16 +3536,19 @@ relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel,
                                  * The condition's equality operator must be a member of the
                                  * index opfamily, else it is not asserting the right kind of
                                  * equality behavior for this index.  We check this first
-                                * since it's probably cheaper than match_index_to_operand().
+                                * since it's probably the cheapest test.
                                  */
                                 if (!list_member_oid(rinfo->mergeopfamilies, ind->opfamily[c]))
                                         continue;
  
                                 /*
-                                * XXX at some point we may need to check collations here too.
-                                * For the moment we assume all collations reduce to the same
-                                * notion of equality.
+                                * The index's collation must agree with the clause's input
+                                * collation on equality, else the index's uniqueness does not
+                                * imply uniqueness under the clause's equality semantics.
                                  */
+                               if (!collations_agree_on_equality(ind->indexcollations[c],
+                                                                                                 exprInputCollation((Node *) rinfo->clause)))
+                                       continue;
  
                                 /* OK, see if the condition operand matches the index key */
                                 if (rinfo->outer_is_left)
@@ -3583,10 +3586,13 @@ relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel,
                                         continue;
  
                                 /*
-                                * XXX at some point we may need to check collations here too.
-                                * For the moment we assume all collations reduce to the same
-                                * notion of equality.
+                                * The index's collation must agree with the operand's
+                                * collation on equality, else the index's uniqueness does not
+                                * imply uniqueness under the operator's equality semantics.
                                  */
+                               if (!collations_agree_on_equality(ind->indexcollations[c],
+                                                                                                 exprCollation(expr)))
+                                       continue;
  
                                 matched = true; /* column is unique */
                                 break;
diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c

index 48a280d089b70d1606240dd3357ea9afeba5c918..85bc6fdb6f0c036fb616336c1dd9f0d24d61ed79 100644 (file)
--- a/src/backend/utils/cache/lsyscache.c
+++ b/src/backend/utils/cache/lsyscache.c
@@ -782,6 +782,44 @@ comparison_ops_are_compatible(Oid opno1, Oid opno2)
         return result;
  }
  
+/*
+ * collations_agree_on_equality
+ *             Return true if the two collations have equivalent notions of equality,
+ *             so that a uniqueness or equality proof established under one side
+ *             carries over to a comparison performed under the other side.
+ *
+ * Note: this is equality compatibility only.  Do NOT use this to reason
+ * about ordering.
+ *
+ * An InvalidOid on either side denotes the absence of a collation -- that
+ * side's operation is not collation-sensitive (e.g. a non-collatable column
+ * type).  Absence of a collation cannot conflict with the other side's
+ * collation, so we treat such pairs as agreeing on equality.  This generalizes
+ * the asymmetric treatment in IndexCollMatchesExprColl().
+ *
+ * Otherwise the collations have equivalent equality if they match, or if both
+ * are deterministic: by definition a deterministic collation treats two
+ * strings as equal iff they are byte-wise equal (see CREATE COLLATION), so any
+ * two deterministic collations share the same equality relation.  A mismatch
+ * involving a nondeterministic collation, however, may mean the two equality
+ * relations disagree, and the proof is unsound.
+ */
+bool
+collations_agree_on_equality(Oid coll1, Oid coll2)
+{
+       if (!OidIsValid(coll1) || !OidIsValid(coll2))
+               return true;
+
+       if (coll1 == coll2)
+               return true;
+
+       if (!get_collation_isdeterministic(coll1) ||
+               !get_collation_isdeterministic(coll2))
+               return false;
+
+       return true;
+}
+
  
  /*                             ---------- AMPROC CACHES ----------                                              */
  
diff --git a/src/include/utils/lsyscache.h b/src/include/utils/lsyscache.h

index 20446f6f8368d58e836e41b46a118b4b534b3ae8..3f952ddf8641af4cfc5bcf98a3165f1cc1908250 100644 (file)
--- a/src/include/utils/lsyscache.h
+++ b/src/include/utils/lsyscache.h
@@ -86,6 +86,7 @@ extern bool get_op_hash_functions(Oid opno,
  extern List *get_op_btree_interpretation(Oid opno);
  extern bool equality_ops_are_compatible(Oid opno1, Oid opno2);
  extern bool comparison_ops_are_compatible(Oid opno1, Oid opno2);
+extern bool collations_agree_on_equality(Oid coll1, Oid coll2);
  extern Oid     get_opfamily_proc(Oid opfamily, Oid lefttype, Oid righttype,
                                                           int16 procnum);
  extern char *get_attname(Oid relid, AttrNumber attnum, bool missing_ok);
diff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out

index 2360fcac90ff7b02e05376a01f79988d78d567b7..75d367389cc792b53a427d7860e23826ba98604f 100644 (file)
--- a/src/test/regress/expected/collate.icu.utf8.out
+++ b/src/test/regress/expected/collate.icu.utf8.out
@@ -1489,6 +1489,129 @@ SELECT string_to_array('ABCDEFGHI' COLLATE case_sensitive, NULL, 'b');
   {A,B,C,D,E,F,G,H,I}
  (1 row)
  
+--
+-- A unique index under one collation does not prove uniqueness under
+-- another, so the planner must not use such a proof for any optimization.
+--
+-- Ensure that we do not use inner-unique join execution
+EXPLAIN (VERBOSE, COSTS OFF)
+SELECT * FROM test1cs t1, test3cs t2
+WHERE t1.x = t2.x COLLATE case_insensitive
+ORDER BY 1, 2;
+                              QUERY PLAN                              
+----------------------------------------------------------------------
+ Sort
+   Output: t1.x, t2.x
+   Sort Key: t1.x COLLATE case_sensitive, t2.x COLLATE case_sensitive
+   ->  Hash Join
+         Output: t1.x, t2.x
+         Hash Cond: ((t2.x)::text = (t1.x)::text)
+         ->  Seq Scan on collate_tests.test3cs t2
+               Output: t2.x
+         ->  Hash
+               Output: t1.x
+               ->  Seq Scan on collate_tests.test1cs t1
+                     Output: t1.x
+(12 rows)
+
+SELECT * FROM test1cs t1, test3cs t2
+WHERE t1.x = t2.x COLLATE case_insensitive
+ORDER BY 1, 2;
+  x  |  x  
+-----+-----
+ abc | abc
+ abc | ABC
+ ABC | abc
+ ABC | ABC
+ def | def
+ ghi | ghi
+(6 rows)
+
+-- Ensure that left-join is not removed
+EXPLAIN (COSTS OFF)
+SELECT t1.* FROM test3cs t1
+       LEFT JOIN test3cs t2 ON t1.x = t2.x COLLATE case_insensitive
+ORDER BY 1;
+                QUERY PLAN                
+------------------------------------------
+ Sort
+   Sort Key: t1.x COLLATE case_sensitive
+   ->  Hash Left Join
+         Hash Cond: (t1.x = (t2.x)::text)
+         ->  Seq Scan on test3cs t1
+         ->  Hash
+               ->  Seq Scan on test3cs t2
+(7 rows)
+
+SELECT t1.* FROM test3cs t1
+       LEFT JOIN test3cs t2 ON t1.x = t2.x COLLATE case_insensitive
+ORDER BY 1;
+  x  
+-----
+ abc
+ abc
+ ABC
+ ABC
+ def
+ ghi
+(6 rows)
+
+-- Ensure that self-join is not removed
+EXPLAIN (COSTS OFF)
+SELECT * FROM test3cs t1, test3cs t2
+WHERE t1.x = t2.x COLLATE case_insensitive
+ORDER BY 1, 2;
+                              QUERY PLAN                              
+----------------------------------------------------------------------
+ Sort
+   Sort Key: t1.x COLLATE case_sensitive, t2.x COLLATE case_sensitive
+   ->  Hash Join
+         Hash Cond: ((t1.x)::text = (t2.x)::text)
+         ->  Seq Scan on test3cs t1
+         ->  Hash
+               ->  Seq Scan on test3cs t2
+(7 rows)
+
+SELECT * FROM test3cs t1, test3cs t2
+WHERE t1.x = t2.x COLLATE case_insensitive
+ORDER BY 1, 2;
+  x  |  x  
+-----+-----
+ abc | abc
+ abc | ABC
+ ABC | abc
+ ABC | ABC
+ def | def
+ ghi | ghi
+(6 rows)
+
+-- Ensure that semijoin is not reduced to innerjoin
+EXPLAIN (COSTS OFF)
+SELECT * FROM test3cs t1
+  WHERE EXISTS (SELECT 1 FROM test3cs t2 WHERE t1.x = t2.x COLLATE case_insensitive)
+ORDER BY 1;
+                    QUERY PLAN                    
+--------------------------------------------------
+ Sort
+   Sort Key: t1.x COLLATE case_sensitive
+   ->  Hash Semi Join
+         Hash Cond: ((t1.x)::text = (t2.x)::text)
+         ->  Seq Scan on test3cs t1
+         ->  Hash
+               ->  Seq Scan on test3cs t2
+(7 rows)
+
+SELECT * FROM test3cs t1
+  WHERE EXISTS (SELECT 1 FROM test3cs t2 WHERE t1.x = t2.x COLLATE case_insensitive)
+ORDER BY 1;
+  x  
+-----
+ abc
+ ABC
+ def
+ ghi
+(4 rows)
+
  CREATE TABLE test1ci (x text COLLATE case_insensitive);
  CREATE TABLE test2ci (x text COLLATE case_insensitive);
  CREATE TABLE test3ci (x text COLLATE case_insensitive);
diff --git a/src/test/regress/sql/collate.icu.utf8.sql b/src/test/regress/sql/collate.icu.utf8.sql

index c797b5181be589fa11a03344daad54e91b8182a3..0c2733fe1d191e99a2d81db6c9097b790bb63e03 100644 (file)
--- a/src/test/regress/sql/collate.icu.utf8.sql
+++ b/src/test/regress/sql/collate.icu.utf8.sql
@@ -572,6 +572,51 @@ CREATE UNIQUE INDEX ON test3cs (x);  -- ok
  SELECT string_to_array('ABC,DEF,GHI' COLLATE case_sensitive, ',', 'abc');
  SELECT string_to_array('ABCDEFGHI' COLLATE case_sensitive, NULL, 'b');
  
+--
+-- A unique index under one collation does not prove uniqueness under
+-- another, so the planner must not use such a proof for any optimization.
+--
+
+-- Ensure that we do not use inner-unique join execution
+EXPLAIN (VERBOSE, COSTS OFF)
+SELECT * FROM test1cs t1, test3cs t2
+WHERE t1.x = t2.x COLLATE case_insensitive
+ORDER BY 1, 2;
+
+SELECT * FROM test1cs t1, test3cs t2
+WHERE t1.x = t2.x COLLATE case_insensitive
+ORDER BY 1, 2;
+
+-- Ensure that left-join is not removed
+EXPLAIN (COSTS OFF)
+SELECT t1.* FROM test3cs t1
+       LEFT JOIN test3cs t2 ON t1.x = t2.x COLLATE case_insensitive
+ORDER BY 1;
+
+SELECT t1.* FROM test3cs t1
+       LEFT JOIN test3cs t2 ON t1.x = t2.x COLLATE case_insensitive
+ORDER BY 1;
+
+-- Ensure that self-join is not removed
+EXPLAIN (COSTS OFF)
+SELECT * FROM test3cs t1, test3cs t2
+WHERE t1.x = t2.x COLLATE case_insensitive
+ORDER BY 1, 2;
+
+SELECT * FROM test3cs t1, test3cs t2
+WHERE t1.x = t2.x COLLATE case_insensitive
+ORDER BY 1, 2;
+
+-- Ensure that semijoin is not reduced to innerjoin
+EXPLAIN (COSTS OFF)
+SELECT * FROM test3cs t1
+  WHERE EXISTS (SELECT 1 FROM test3cs t2 WHERE t1.x = t2.x COLLATE case_insensitive)
+ORDER BY 1;
+
+SELECT * FROM test3cs t1
+  WHERE EXISTS (SELECT 1 FROM test3cs t2 WHERE t1.x = t2.x COLLATE case_insensitive)
+ORDER BY 1;
+
  CREATE TABLE test1ci (x text COLLATE case_insensitive);
  CREATE TABLE test2ci (x text COLLATE case_insensitive);
  CREATE TABLE test3ci (x text COLLATE case_insensitive);
author	Richard Guo <rguo@postgresql.org>
	Tue, 5 May 2026 01:28:24 +0000 (10:28 +0900)
committer	Richard Guo <rguo@postgresql.org>
	Tue, 5 May 2026 01:28:24 +0000 (10:28 +0900)
src/backend/optimizer/path/indxpath.c		patch \| blob \| blame \| history
src/backend/utils/cache/lsyscache.c		patch \| blob \| blame \| history
src/include/utils/lsyscache.h		patch \| blob \| blame \| history
src/test/regress/expected/collate.icu.utf8.out		patch \| blob \| blame \| history
src/test/regress/sql/collate.icu.utf8.sql		patch \| blob \| blame \| history