static bool rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel);
static bool rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel,
List *clause_list, List **extra_clauses);
-static Oid distinct_col_search(int colno, List *colnos, List *opids);
+static DistinctColInfo *distinct_col_search(int colno, List *distinct_cols);
static bool is_innerrel_unique_for(PlannerInfo *root,
Relids joinrelids,
Relids outerrelids,
{
Index relid = rel->relid;
Query *subquery = root->simple_rte_array[relid]->subquery;
- List *colnos = NIL;
- List *opids = NIL;
+ List *distinct_cols = NIL;
ListCell *l;
/*
- * Build the argument lists for query_is_distinct_for: a list of
- * output column numbers that the query needs to be distinct over, and
- * a list of equality operators that the output columns need to be
- * distinct according to.
+ * Build the argument list for query_is_distinct_for: a list of
+ * DistinctColInfo entries, each holding an output column number that
+ * the query needs to be distinct over, the equality operator that the
+ * column needs to be distinct according to, and that operator's input
+ * collation. The collation matters because the subquery's own
+ * DISTINCT / GROUP BY / set-op proves uniqueness under its own
+ * collation, which need not agree with the operator's.
*
* (XXX we are not considering restriction clauses attached to the
* subquery; is that worth doing?)
foreach(l, clause_list)
{
RestrictInfo *rinfo = lfirst_node(RestrictInfo, l);
- Oid op;
+ OpExpr *opexpr;
Var *var;
+ DistinctColInfo *dcinfo;
/*
- * Get the equality operator we need uniqueness according to.
- * (This might be a cross-type operator and thus not exactly the
- * same operator the subquery would consider; that's all right
- * since query_is_distinct_for can resolve such cases.) The
- * caller's mergejoinability test should have selected only
- * OpExprs.
+ * The caller's mergejoinability test should have selected only
+ * OpExprs. The operator might be a cross-type operator and thus
+ * not exactly the same operator the subquery would consider;
+ * that's all right since query_is_distinct_for can resolve such
+ * cases.
*/
- op = castNode(OpExpr, rinfo->clause)->opno;
+ opexpr = castNode(OpExpr, rinfo->clause);
/* caller identified the inner side for us */
if (rinfo->outer_is_left)
var->varno != relid || var->varlevelsup != 0)
continue;
- colnos = lappend_int(colnos, var->varattno);
- opids = lappend_oid(opids, op);
+ dcinfo = palloc(sizeof(DistinctColInfo));
+ dcinfo->colno = var->varattno;
+ dcinfo->opid = opexpr->opno;
+ dcinfo->collid = opexpr->inputcollid;
+ distinct_cols = lappend(distinct_cols, dcinfo);
}
- if (query_is_distinct_for(subquery, colnos, opids))
+ if (query_is_distinct_for(subquery, distinct_cols))
return true;
}
return false;
* query is a not-yet-planned subquery (in current usage, it's always from
* a subquery RTE, which the planner avoids scribbling on).
*
- * colnos is an integer list of output column numbers (resno's). We are
- * interested in whether rows consisting of just these columns are certain
- * to be distinct. "Distinctness" is defined according to whether the
- * corresponding upper-level equality operators listed in opids would think
- * the values are distinct. (Note: the opids entries could be cross-type
- * operators, and thus not exactly the equality operators that the subquery
- * would use itself. We use equality_ops_are_compatible() to check
- * compatibility. That looks at opfamily membership for index AMs that have
- * declared that they support consistent equality semantics within an
+ * distinct_cols is a list of DistinctColInfo, one per requested output column.
+ * Each entry names the subquery output column number we want distinct, the
+ * upper-level equality operator we'll compare values with, and that operator's
+ * input collation. We are interested in whether rows consisting of just these
+ * columns are certain to be distinct.
+ *
+ * "Distinctness" is defined according to whether the corresponding upper-level
+ * equality operators would think the values are distinct. (Note: each opid
+ * could be a cross-type operator, and thus not exactly the equality operator
+ * that the subquery would use itself. We use equality_ops_are_compatible() to
+ * check compatibility. That looks at opfamily membership for index AMs that
+ * have declared that they support consistent equality semantics within an
* opfamily, and so should give trustworthy answers for all operators that we
* might need to deal with here.)
+ *
+ * The collid must also agree on equality with the collation the subquery's own
+ * DISTINCT/GROUP BY/set-op uses to deduplicate the column, else the subquery's
+ * distinctness does not carry over to the caller's equality semantics. Two
+ * collations agree on equality if they match or if both are deterministic (in
+ * which case both reduce equality to byte-equality; see CREATE COLLATION).
*/
bool
-query_is_distinct_for(Query *query, List *colnos, List *opids)
+query_is_distinct_for(Query *query, List *distinct_cols)
{
ListCell *l;
- Oid opid;
-
- Assert(list_length(colnos) == list_length(opids));
+ DistinctColInfo *dcinfo;
/*
* DISTINCT (including DISTINCT ON) guarantees uniqueness if all the
TargetEntry *tle = get_sortgroupclause_tle(sgc,
query->targetList);
- opid = distinct_col_search(tle->resno, colnos, opids);
- if (!OidIsValid(opid) ||
- !equality_ops_are_compatible(opid, sgc->eqop))
+ dcinfo = distinct_col_search(tle->resno, distinct_cols);
+ if (dcinfo == NULL ||
+ !equality_ops_are_compatible(dcinfo->opid, sgc->eqop) ||
+ !collations_agree_on_equality(dcinfo->collid,
+ exprCollation((Node *) tle->expr)))
break; /* exit early if no match */
}
if (l == NULL) /* had matches for all? */
TargetEntry *tle = get_sortgroupclause_tle(sgc,
query->targetList);
- opid = distinct_col_search(tle->resno, colnos, opids);
- if (!OidIsValid(opid) ||
- !equality_ops_are_compatible(opid, sgc->eqop))
+ dcinfo = distinct_col_search(tle->resno, distinct_cols);
+ if (dcinfo == NULL ||
+ !equality_ops_are_compatible(dcinfo->opid, sgc->eqop) ||
+ !collations_agree_on_equality(dcinfo->collid,
+ exprCollation((Node *) tle->expr)))
break; /* exit early if no match */
}
if (l == NULL) /* had matches for all? */
sgc = (SortGroupClause *) lfirst(lg);
lg = lnext(topop->groupClauses, lg);
- opid = distinct_col_search(tle->resno, colnos, opids);
- if (!OidIsValid(opid) ||
- !equality_ops_are_compatible(opid, sgc->eqop))
+ dcinfo = distinct_col_search(tle->resno, distinct_cols);
+ if (dcinfo == NULL ||
+ !equality_ops_are_compatible(dcinfo->opid, sgc->eqop) ||
+ !collations_agree_on_equality(dcinfo->collid,
+ exprCollation((Node *) tle->expr)))
break; /* exit early if no match */
}
if (l == NULL) /* had matches for all? */
/*
* distinct_col_search - subroutine for query_is_distinct_for
*
- * If colno is in colnos, return the corresponding element of opids,
- * else return InvalidOid. (Ordinarily colnos would not contain duplicates,
- * but if it does, we arbitrarily select the first match.)
+ * If colno matches the colno field of an entry in distinct_cols, return a
+ * pointer to that entry; else return NULL. (Ordinarily distinct_cols would
+ * not contain duplicate colnos, but if it does, we arbitrarily select the
+ * first match.)
*/
-static Oid
-distinct_col_search(int colno, List *colnos, List *opids)
+static DistinctColInfo *
+distinct_col_search(int colno, List *distinct_cols)
{
- ListCell *lc1,
- *lc2;
-
- forboth(lc1, colnos, lc2, opids)
+ foreach_ptr(DistinctColInfo, dcinfo, distinct_cols)
{
- if (colno == lfirst_int(lc1))
- return lfirst_oid(lc2);
+ if (dcinfo->colno == colno)
+ return dcinfo;
}
- return InvalidOid;
+
+ return NULL;
}
ghi
(4 rows)
+--
+-- A DISTINCT / GROUP BY / set-op on a subquery does not prove uniqueness
+-- under a different collation, so the planner must not use such a proof for
+-- any optimization.
+--
+-- Ensure that we do not use inner-unique join execution
+EXPLAIN (VERBOSE, COSTS OFF)
+SELECT * FROM test1cs t1, (SELECT DISTINCT x FROM test3cs) t2
+WHERE t1.x = t2.x COLLATE case_insensitive
+ORDER BY 1, 2;
+ QUERY PLAN
+---------------------------------------------------------------------------
+ Sort
+ Output: t1.x, test3cs.x
+ Sort Key: t1.x COLLATE case_sensitive, test3cs.x COLLATE case_sensitive
+ -> Hash Join
+ Output: t1.x, test3cs.x
+ Hash Cond: ((t1.x)::text = (test3cs.x)::text)
+ -> Seq Scan on collate_tests.test1cs t1
+ Output: t1.x
+ -> Hash
+ Output: test3cs.x
+ -> HashAggregate
+ Output: test3cs.x
+ Group Key: test3cs.x
+ -> Seq Scan on collate_tests.test3cs
+ Output: test3cs.x
+(15 rows)
+
+SELECT * FROM test1cs t1, (SELECT DISTINCT x FROM test3cs) t2
+WHERE t1.x = t2.x COLLATE case_insensitive
+ORDER BY 1, 2;
+ x | x
+-----+-----
+ abc | abc
+ abc | ABC
+ ABC | abc
+ ABC | ABC
+ def | def
+ ghi | ghi
+(6 rows)
+
+-- Same with GROUP BY
+EXPLAIN (VERBOSE, COSTS OFF)
+SELECT * FROM test1cs t1, (SELECT x FROM test3cs GROUP BY x) t2
+WHERE t1.x = t2.x COLLATE case_insensitive
+ORDER BY 1, 2;
+ QUERY PLAN
+---------------------------------------------------------------------------
+ Sort
+ Output: t1.x, test3cs.x
+ Sort Key: t1.x COLLATE case_sensitive, test3cs.x COLLATE case_sensitive
+ -> Hash Join
+ Output: t1.x, test3cs.x
+ Hash Cond: ((t1.x)::text = (test3cs.x)::text)
+ -> Seq Scan on collate_tests.test1cs t1
+ Output: t1.x
+ -> Hash
+ Output: test3cs.x
+ -> HashAggregate
+ Output: test3cs.x
+ Group Key: test3cs.x
+ -> Seq Scan on collate_tests.test3cs
+ Output: test3cs.x
+(15 rows)
+
+SELECT * FROM test1cs t1, (SELECT x FROM test3cs GROUP BY x) t2
+WHERE t1.x = t2.x COLLATE case_insensitive
+ORDER BY 1, 2;
+ x | x
+-----+-----
+ abc | abc
+ abc | ABC
+ ABC | abc
+ ABC | ABC
+ def | def
+ ghi | ghi
+(6 rows)
+
+-- Same with set-op
+EXPLAIN (VERBOSE, COSTS OFF)
+SELECT * FROM test1cs t1, (SELECT x FROM test3cs UNION SELECT x FROM test3cs) t2
+WHERE t1.x = t2.x COLLATE case_insensitive
+ORDER BY 1, 2;
+ QUERY PLAN
+---------------------------------------------------------------------------
+ Sort
+ Output: t1.x, test3cs.x
+ Sort Key: t1.x COLLATE case_sensitive, test3cs.x COLLATE case_sensitive
+ -> Hash Join
+ Output: t1.x, test3cs.x
+ Hash Cond: ((test3cs.x)::text = (t1.x)::text)
+ -> HashAggregate
+ Output: test3cs.x
+ Group Key: test3cs.x
+ -> Append
+ -> Seq Scan on collate_tests.test3cs
+ Output: test3cs.x
+ -> Seq Scan on collate_tests.test3cs test3cs_1
+ Output: test3cs_1.x
+ -> Hash
+ Output: t1.x
+ -> Seq Scan on collate_tests.test1cs t1
+ Output: t1.x
+(18 rows)
+
+SELECT * FROM test1cs t1, (SELECT x FROM test3cs UNION SELECT x FROM test3cs) t2
+WHERE t1.x = t2.x COLLATE case_insensitive
+ORDER BY 1, 2;
+ x | x
+-----+-----
+ abc | abc
+ abc | ABC
+ ABC | abc
+ ABC | ABC
+ def | def
+ ghi | ghi
+(6 rows)
+
+-- Ensure that left-join is not removed
+EXPLAIN (COSTS OFF)
+SELECT t1.* FROM test3cs t1
+ LEFT JOIN (SELECT DISTINCT x FROM test3cs) t2 ON t1.x = t2.x COLLATE case_insensitive
+ORDER BY 1;
+ QUERY PLAN
+-----------------------------------------------
+ Sort
+ Sort Key: t1.x COLLATE case_sensitive
+ -> Hash Left Join
+ Hash Cond: (t1.x = (test3cs.x)::text)
+ -> Seq Scan on test3cs t1
+ -> Hash
+ -> HashAggregate
+ Group Key: test3cs.x
+ -> Seq Scan on test3cs
+(9 rows)
+
+SELECT t1.* FROM test3cs t1
+ LEFT JOIN (SELECT DISTINCT x FROM test3cs) t2 ON t1.x = t2.x COLLATE case_insensitive
+ORDER BY 1;
+ x
+-----
+ abc
+ abc
+ ABC
+ ABC
+ def
+ ghi
+(6 rows)
+
+-- Ensure that semijoin is not reduced to innerjoin
+EXPLAIN (COSTS OFF)
+SELECT * FROM test3cs t1
+ WHERE EXISTS (SELECT 1 FROM (SELECT DISTINCT x FROM test3cs) t2
+ WHERE t1.x = t2.x COLLATE case_insensitive)
+ORDER BY 1;
+ QUERY PLAN
+-------------------------------------------------------
+ Sort
+ Sort Key: t1.x COLLATE case_sensitive
+ -> Hash Semi Join
+ Hash Cond: ((t1.x)::text = (test3cs.x)::text)
+ -> Seq Scan on test3cs t1
+ -> Hash
+ -> HashAggregate
+ Group Key: test3cs.x
+ -> Seq Scan on test3cs
+(9 rows)
+
+SELECT * FROM test3cs t1
+ WHERE EXISTS (SELECT 1 FROM (SELECT DISTINCT x FROM test3cs) t2
+ WHERE t1.x = t2.x COLLATE case_insensitive)
+ORDER BY 1;
+ x
+-----
+ abc
+ ABC
+ def
+ ghi
+(4 rows)
+
CREATE TABLE test1ci (x text COLLATE case_insensitive);
CREATE TABLE test2ci (x text COLLATE case_insensitive);
CREATE TABLE test3ci (x text COLLATE case_insensitive);
WHERE EXISTS (SELECT 1 FROM test3cs t2 WHERE t1.x = t2.x COLLATE case_insensitive)
ORDER BY 1;
+--
+-- A DISTINCT / GROUP BY / set-op on a subquery does not prove uniqueness
+-- under a different collation, so the planner must not use such a proof for
+-- any optimization.
+--
+
+-- Ensure that we do not use inner-unique join execution
+EXPLAIN (VERBOSE, COSTS OFF)
+SELECT * FROM test1cs t1, (SELECT DISTINCT x FROM test3cs) t2
+WHERE t1.x = t2.x COLLATE case_insensitive
+ORDER BY 1, 2;
+
+SELECT * FROM test1cs t1, (SELECT DISTINCT x FROM test3cs) t2
+WHERE t1.x = t2.x COLLATE case_insensitive
+ORDER BY 1, 2;
+
+-- Same with GROUP BY
+EXPLAIN (VERBOSE, COSTS OFF)
+SELECT * FROM test1cs t1, (SELECT x FROM test3cs GROUP BY x) t2
+WHERE t1.x = t2.x COLLATE case_insensitive
+ORDER BY 1, 2;
+
+SELECT * FROM test1cs t1, (SELECT x FROM test3cs GROUP BY x) t2
+WHERE t1.x = t2.x COLLATE case_insensitive
+ORDER BY 1, 2;
+
+-- Same with set-op
+EXPLAIN (VERBOSE, COSTS OFF)
+SELECT * FROM test1cs t1, (SELECT x FROM test3cs UNION SELECT x FROM test3cs) t2
+WHERE t1.x = t2.x COLLATE case_insensitive
+ORDER BY 1, 2;
+
+SELECT * FROM test1cs t1, (SELECT x FROM test3cs UNION SELECT x FROM test3cs) t2
+WHERE t1.x = t2.x COLLATE case_insensitive
+ORDER BY 1, 2;
+
+-- Ensure that left-join is not removed
+EXPLAIN (COSTS OFF)
+SELECT t1.* FROM test3cs t1
+ LEFT JOIN (SELECT DISTINCT x FROM test3cs) t2 ON t1.x = t2.x COLLATE case_insensitive
+ORDER BY 1;
+
+SELECT t1.* FROM test3cs t1
+ LEFT JOIN (SELECT DISTINCT x FROM test3cs) t2 ON t1.x = t2.x COLLATE case_insensitive
+ORDER BY 1;
+
+-- Ensure that semijoin is not reduced to innerjoin
+EXPLAIN (COSTS OFF)
+SELECT * FROM test3cs t1
+ WHERE EXISTS (SELECT 1 FROM (SELECT DISTINCT x FROM test3cs) t2
+ WHERE t1.x = t2.x COLLATE case_insensitive)
+ORDER BY 1;
+
+SELECT * FROM test3cs t1
+ WHERE EXISTS (SELECT 1 FROM (SELECT DISTINCT x FROM test3cs) t2
+ WHERE t1.x = t2.x COLLATE case_insensitive)
+ORDER BY 1;
+
CREATE TABLE test1ci (x text COLLATE case_insensitive);
CREATE TABLE test2ci (x text COLLATE case_insensitive);
CREATE TABLE test3ci (x text COLLATE case_insensitive);