]> git.ipfire.org Git - thirdparty/postgresql.git/commitdiff
Fix missed checks for hashability of container-type equality.
authorTom Lane <tgl@sss.pgh.pa.us>
Mon, 8 Jun 2026 15:48:07 +0000 (11:48 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Mon, 8 Jun 2026 15:48:16 +0000 (11:48 -0400)
The operators for array_eq, record_eq, range_eq, and multirange_eq
are all marked oprcanhash, but there's a pitfall: their hash functions
can fail at runtime if the contained type(s) are not hashable.
Therefore, the planner has to check hashability of the contained types
before deciding it can use hashing in these cases.  Not every place
had gotten this memo, and noplace at all had considered the issue
for ranges or multiranges.  In particular we could attempt to use
hashing for a ScalarArrayOpExpr on a container type when it won't
actually work, leading to "could not identify a hash function ..."
runtime failures.

For the most part we should fix this in the lookup functions provided
by lsyscache.c, to wit get_op_hash_functions and op_hashjoinable.
But there's a problem: get_op_hash_functions is not passed the input
data type it would need to check.  We mustn't change the API of that
exported function in a back-patched fix, and even if we wanted to,
its call sites in the executor mostly don't have easy access to the
required data type OID.  Fortunately, the executor call sites don't
actually need fixing, because it's expected that the planner verified
hashability before building a plan that requires it.  Therefore,
leave get_op_hash_functions as-is and invent a wrapper function
get_op_hash_functions_ext that does the additional checking needed
in the planner's uses.

We also need to fix hash_ok_operator (extending the fix in 647889667).

While at it, neaten up a couple of places in lookup_type_cache where
relevant code for multirange cases was written differently from the
code for other container types.

Note: while this touches pg_operator.dat, it's only to add oid_symbol
macros.  So there's no on-disk data change and no need for a
catversion bump.

Reported-by: Andrei Lepikhov <lepihov@gmail.com>
Author: Andrei Lepikhov <lepihov@gmail.com>
Co-authored-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://postgr.es/m/ed221f95-f09b-4a9c-b05b-e1fed621ec87@gmail.com
Backpatch-through: 14

src/backend/optimizer/plan/subselect.c
src/backend/optimizer/util/clauses.c
src/backend/utils/adt/selfuncs.c
src/backend/utils/cache/lsyscache.c
src/backend/utils/cache/typcache.c
src/include/catalog/pg_operator.dat
src/include/utils/lsyscache.h

index ccec1eaa7fe16ad574307155fcb07ab635192a96..6aa8971c95d0d74544efce99d0d7ef5ba2a85163 100644 (file)
@@ -841,7 +841,9 @@ hash_ok_operator(OpExpr *expr)
        if (list_length(expr->args) != 2)
                return false;
        if (opid == ARRAY_EQ_OP ||
-               opid == RECORD_EQ_OP)
+               opid == RECORD_EQ_OP ||
+               opid == RANGE_EQ_OP ||
+               opid == MULTIRANGE_EQ_OP)
        {
                /* these are strict, but must check input type to ensure hashable */
                Node       *leftarg = linitial(expr->args);
index cd86311bb0b6cca1547e69770612fa4790336cc5..07738894d1af138860e6824c8cc3d6df53d752f8 100644 (file)
@@ -2544,7 +2544,8 @@ convert_saop_to_hashed_saop_walker(Node *node, void *context)
        if (IsA(node, ScalarArrayOpExpr))
        {
                ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) node;
-               Expr       *arrayarg = (Expr *) lsecond(saop->args);
+               Node       *leftarg = (Node *) linitial(saop->args);
+               Node       *arrayarg = (Node *) lsecond(saop->args);
                Oid                     lefthashfunc;
                Oid                     righthashfunc;
 
@@ -2553,7 +2554,8 @@ convert_saop_to_hashed_saop_walker(Node *node, void *context)
                {
                        if (saop->useOr)
                        {
-                               if (get_op_hash_functions(saop->opno, &lefthashfunc, &righthashfunc) &&
+                               if (get_op_hash_functions_ext(saop->opno, exprType(leftarg),
+                                                                                         &lefthashfunc, &righthashfunc) &&
                                        lefthashfunc == righthashfunc)
                                {
                                        Datum           arrdatum = ((Const *) arrayarg)->constvalue;
@@ -2585,7 +2587,8 @@ convert_saop_to_hashed_saop_walker(Node *node, void *context)
                                 * just ensure the lookup items are not in the hash table.
                                 */
                                if (OidIsValid(negator) &&
-                                       get_op_hash_functions(negator, &lefthashfunc, &righthashfunc) &&
+                                       get_op_hash_functions_ext(negator, exprType(leftarg),
+                                                                                         &lefthashfunc, &righthashfunc) &&
                                        lefthashfunc == righthashfunc)
                                {
                                        Datum           arrdatum = ((Const *) arrayarg)->constvalue;
index b9449b4574a3580381fef68f4f7fe369d4499268..d6efd07073a1e6e4a7dc827fc5aa6b98d7de2a17 100644 (file)
@@ -2476,7 +2476,9 @@ eqjoinsel(PG_FUNCTION_ARGS)
                 * hash functions for the join operator.
                 */
                if ((sslot1.nvalues + sslot2.nvalues) >= EQJOINSEL_MCV_HASH_THRESHOLD)
-                       (void) get_op_hash_functions(operator, &hashLeft, &hashRight);
+                       (void) get_op_hash_functions_ext(operator,
+                                                                                        exprType((Node *) linitial(args)),
+                                                                                        &hashLeft, &hashRight);
        }
        else
                memset(&eqproc, 0, sizeof(eqproc)); /* silence uninit-var warnings */
index 036086057d78402bbd839978f190b20413f8d118..036de5f79ef9a77022731f271ac61aec23f57e8d 100644 (file)
@@ -472,6 +472,12 @@ get_mergejoin_opfamilies(Oid opno)
  *
  * Returns true if able to find the requested operator(s), false if not.
  * (This indicates that the operator should not have been marked oprcanhash.)
+ *
+ * Callers must beware that for container types (arrays, records, ranges)
+ * this function will succeed for array_eq etc, but the hash function could
+ * fail at runtime if the contained type(s) are not hashable.  If it is
+ * possible that the operator is one of these, precheck with op_hashjoinable
+ * or get_op_hash_functions_ext.
  */
 bool
 get_compatible_hash_operators(Oid opno,
@@ -572,6 +578,12 @@ get_compatible_hash_operators(Oid opno,
  *
  * Returns true if able to find the requested function(s), false if not.
  * (This indicates that the operator should not have been marked oprcanhash.)
+ *
+ * Callers must beware that for container types (arrays, records, ranges)
+ * this function will succeed for array_eq etc, but the hash function could
+ * fail at runtime if the contained type(s) are not hashable.  If it is
+ * possible that the operator is one of these, use get_op_hash_functions_ext
+ * or precheck with op_hashjoinable.
  */
 bool
 get_op_hash_functions(Oid opno,
@@ -654,6 +666,55 @@ get_op_hash_functions(Oid opno,
        return result;
 }
 
+/*
+ * get_op_hash_functions_ext
+ *             As above, but verify hashability in container-type cases.
+ *
+ * As with op_hashjoinable, assume the left input type is sufficient
+ * to disambiguate container-type cases.
+ */
+bool
+get_op_hash_functions_ext(Oid opno, Oid inputtype,
+                                                 RegProcedure *lhs_procno, RegProcedure *rhs_procno)
+{
+       TypeCacheEntry *typentry;
+
+       /* Ensure output args are initialized on failure */
+       if (lhs_procno)
+               *lhs_procno = InvalidOid;
+       if (rhs_procno)
+               *rhs_procno = InvalidOid;
+
+       /* As in op_hashjoinable, let the typcache handle the hard cases */
+       if (opno == ARRAY_EQ_OP)
+       {
+               typentry = lookup_type_cache(inputtype, TYPECACHE_HASH_PROC);
+               if (typentry->hash_proc != F_HASH_ARRAY)
+                       return false;
+       }
+       else if (opno == RECORD_EQ_OP)
+       {
+               typentry = lookup_type_cache(inputtype, TYPECACHE_HASH_PROC);
+               if (typentry->hash_proc != F_HASH_RECORD)
+                       return false;
+       }
+       else if (opno == RANGE_EQ_OP)
+       {
+               typentry = lookup_type_cache(inputtype, TYPECACHE_HASH_PROC);
+               if (typentry->hash_proc != F_HASH_RANGE)
+                       return false;
+       }
+       else if (opno == MULTIRANGE_EQ_OP)
+       {
+               typentry = lookup_type_cache(inputtype, TYPECACHE_HASH_PROC);
+               if (typentry->hash_proc != F_HASH_MULTIRANGE)
+                       return false;
+       }
+
+       /* OK, do the normal lookup */
+       return get_op_hash_functions(opno, lhs_procno, rhs_procno);
+}
+
 /*
  * get_op_index_interpretation
  *             Given an operator's OID, find out which amcanorder opfamilies it belongs to,
@@ -1624,7 +1685,8 @@ op_mergejoinable(Oid opno, Oid inputtype)
         * For array_eq or record_eq, we can sort if the element or field types
         * are all sortable.  We could implement all the checks for that here, but
         * the typcache already does that and caches the results too, so let's
-        * rely on the typcache.
+        * rely on the typcache.  We do not need similar special cases for ranges
+        * or multiranges, because their subtypes are required to be sortable.
         */
        if (opno == ARRAY_EQ_OP)
        {
@@ -1659,10 +1721,11 @@ op_mergejoinable(Oid opno, Oid inputtype)
  * Returns true if the operator is hashjoinable.  (There must be a suitable
  * hash opfamily entry for this operator if it is so marked.)
  *
- * In some cases (currently only array_eq), hashjoinability depends on the
- * specific input data type the operator is invoked for, so that must be
- * passed as well.  We currently assume that only one input's type is needed
- * to check this --- by convention, pass the left input's data type.
+ * In some cases (currently array_eq, record_eq, range_eq, multirange_eq),
+ * hashjoinability depends on the specific input data type the operator is
+ * invoked for, so that must be passed as well.  We currently assume that only
+ * one input's type is needed to check this --- by convention, pass the left
+ * input's data type.
  */
 bool
 op_hashjoinable(Oid opno, Oid inputtype)
@@ -1684,6 +1747,18 @@ op_hashjoinable(Oid opno, Oid inputtype)
                if (typentry->hash_proc == F_HASH_RECORD)
                        result = true;
        }
+       else if (opno == RANGE_EQ_OP)
+       {
+               typentry = lookup_type_cache(inputtype, TYPECACHE_HASH_PROC);
+               if (typentry->hash_proc == F_HASH_RANGE)
+                       result = true;
+       }
+       else if (opno == MULTIRANGE_EQ_OP)
+       {
+               typentry = lookup_type_cache(inputtype, TYPECACHE_HASH_PROC);
+               if (typentry->hash_proc == F_HASH_MULTIRANGE)
+                       result = true;
+       }
        else
        {
                /* For all other operators, rely on pg_operator.oprcanhash */
index cebe7a916fbf927e686059d2af36bb9fdb80bfd4..da91a2ff1dd8872aad4a8e8446846fcfe9bb3d04 100644 (file)
@@ -779,8 +779,9 @@ lookup_type_cache(Oid type_id, int flags)
                                                                                  HASHSTANDARD_PROC);
 
                /*
-                * As above, make sure hash_array, hash_record, or hash_range will
-                * succeed.
+                * As above, make sure hash_array, hash_record, hash_range, or
+                * hash_multirange will succeed.  Here we do need to check the range
+                * cases.
                 */
                if (hash_proc == F_HASH_ARRAY &&
                        !array_element_has_hashing(typentry))
@@ -791,12 +792,8 @@ lookup_type_cache(Oid type_id, int flags)
                else if (hash_proc == F_HASH_RANGE &&
                                 !range_element_has_hashing(typentry))
                        hash_proc = InvalidOid;
-
-               /*
-                * Likewise for hash_multirange.
-                */
-               if (hash_proc == F_HASH_MULTIRANGE &&
-                       !multirange_element_has_hashing(typentry))
+               else if (hash_proc == F_HASH_MULTIRANGE &&
+                                !multirange_element_has_hashing(typentry))
                        hash_proc = InvalidOid;
 
                /* Force update of hash_proc_finfo only if we're changing state */
@@ -828,8 +825,8 @@ lookup_type_cache(Oid type_id, int flags)
                                                                                                   HASHEXTENDED_PROC);
 
                /*
-                * As above, make sure hash_array_extended, hash_record_extended, or
-                * hash_range_extended will succeed.
+                * As above, make sure hash_array_extended, hash_record_extended,
+                * hash_range_extended, or hash_multirange_extended will succeed.
                 */
                if (hash_extended_proc == F_HASH_ARRAY_EXTENDED &&
                        !array_element_has_extended_hashing(typentry))
@@ -840,12 +837,8 @@ lookup_type_cache(Oid type_id, int flags)
                else if (hash_extended_proc == F_HASH_RANGE_EXTENDED &&
                                 !range_element_has_extended_hashing(typentry))
                        hash_extended_proc = InvalidOid;
-
-               /*
-                * Likewise for hash_multirange_extended.
-                */
-               if (hash_extended_proc == F_HASH_MULTIRANGE_EXTENDED &&
-                       !multirange_element_has_extended_hashing(typentry))
+               else if (hash_extended_proc == F_HASH_MULTIRANGE_EXTENDED &&
+                                !multirange_element_has_extended_hashing(typentry))
                        hash_extended_proc = InvalidOid;
 
                /* Force update of proc finfo only if we're changing state */
index 1a8fd8b864522a71cf479b73748be68962e66cc8..c7f860c442b3dc4dbf2851867b92c148de4c7701 100644 (file)
   oprrest => 'scalargesel', oprjoin => 'scalargejoinsel' },
 
 # generic range type operators
-{ oid => '3882', descr => 'equal',
+{ oid => '3882', oid_symbol => 'RANGE_EQ_OP', descr => 'equal',
   oprname => '=', oprcanmerge => 't', oprcanhash => 't', oprleft => 'anyrange',
   oprright => 'anyrange', oprresult => 'bool', oprcom => '=(anyrange,anyrange)',
   oprnegate => '<>(anyrange,anyrange)', oprcode => 'range_eq',
   oprname => '@@', oprleft => 'jsonb', oprright => 'jsonpath',
   oprresult => 'bool', oprcode => 'jsonb_path_match_opr(jsonb,jsonpath)',
   oprrest => 'matchingsel', oprjoin => 'matchingjoinsel' },
-{ oid => '2860', descr => 'equal',
+{ oid => '2860', oid_symbol => 'MULTIRANGE_EQ_OP', descr => 'equal',
   oprname => '=', oprcanmerge => 't', oprcanhash => 't',
   oprleft => 'anymultirange', oprright => 'anymultirange', oprresult => 'bool',
   oprcom => '=(anymultirange,anymultirange)',
index 8545e67a632baeab93ba6d8d3f98752f81af6e8f..865980cb0f1bc50df17b9fcc9ad61103501e9132 100644 (file)
@@ -86,6 +86,8 @@ extern bool get_compatible_hash_operators(Oid opno,
                                                                                  Oid *lhs_opno, Oid *rhs_opno);
 extern bool get_op_hash_functions(Oid opno,
                                                                  RegProcedure *lhs_procno, RegProcedure *rhs_procno);
+extern bool get_op_hash_functions_ext(Oid opno, Oid inputtype,
+                                                                         RegProcedure *lhs_procno, RegProcedure *rhs_procno);
 extern List *get_op_index_interpretation(Oid opno);
 extern bool equality_ops_are_compatible(Oid opno1, Oid opno2);
 extern bool comparison_ops_are_compatible(Oid opno1, Oid opno2);