*/
#include "postgres.h"
+#include "access/htup_details.h"
#include "access/parallel.h"
#include "common/hashfn.h"
#include "executor/executor.h"
MemoryContextReset(hashtable->tuplescxt);
}
+/*
+ * Estimate the amount of space needed for a TupleHashTable with nentries
+ * entries, if the tuples have average data width tupleWidth and the caller
+ * requires additionalsize extra space per entry.
+ *
+ * Return SIZE_MAX if it'd overflow size_t.
+ *
+ * nentries is "double" because this is meant for use by the planner,
+ * which typically works with double rowcount estimates. So we'd need to
+ * clamp to integer somewhere and that might as well be here. We do expect
+ * the value not to be NaN or negative, else the result will be garbage.
+ */
+Size
+EstimateTupleHashTableSpace(double nentries,
+ Size tupleWidth,
+ Size additionalsize)
+{
+ Size sh_space;
+ double tuples_space;
+
+ /* First estimate the space needed for the simplehash table */
+ sh_space = tuplehash_estimate_space(nentries);
+
+ /* Give up if that's already too big */
+ if (sh_space >= SIZE_MAX)
+ return sh_space;
+
+ /*
+ * Compute space needed for hashed tuples with additional data. nentries
+ * must be somewhat sane, so it should be safe to compute this product.
+ *
+ * We assume that the hashed tuples will be kept in a BumpContext so that
+ * there is not additional per-tuple overhead.
+ *
+ * (Note that this is only accurate if MEMORY_CONTEXT_CHECKING is off,
+ * else bump.c will add a MemoryChunk header to each tuple. However, it
+ * seems undesirable for debug builds to make different planning choices
+ * than production builds, so we assume the production behavior always.)
+ */
+ tuples_space = nentries * (MAXALIGN(SizeofMinimalTupleHeader) +
+ MAXALIGN(tupleWidth) +
+ MAXALIGN(additionalsize));
+
+ /*
+ * Check for size_t overflow. This coding is trickier than it may appear,
+ * because on 64-bit machines SIZE_MAX cannot be represented exactly as a
+ * double. We must cast it explicitly to suppress compiler warnings about
+ * an inexact conversion, and we must trust that any double value that
+ * compares strictly less than "(double) SIZE_MAX" will cast to a
+ * representable size_t value.
+ */
+ if (sh_space + tuples_space >= (double) SIZE_MAX)
+ return SIZE_MAX;
+
+ /* We don't bother estimating size of the miscellaneous overhead data */
+ return (Size) (sh_space + tuples_space);
+}
+
/*
* Find or create a hashtable entry for the tuple group containing the
* given tuple. The tuple must be the same type as the hashtable entries.
false);
}
+/* Planner support routine to estimate space needed for hash table */
+Size
+EstimateSetOpHashTableSpace(double nentries, Size tupleWidth)
+{
+ return EstimateTupleHashTableSpace(nentries,
+ tupleWidth,
+ sizeof(SetOpStatePerGroupData));
+}
+
/*
* We've completed processing a tuple group. Decide how many copies (if any)
* of its representative row to emit, and store the count into numOutput.
node->tab_hash_funcs,
node->tab_collations,
nbuckets,
- 0,
+ 0, /* no additional data */
node->planstate->state->es_query_cxt,
node->tuplesContext,
innerecontext->ecxt_per_tuple_memory,
node->tab_hash_funcs,
node->tab_collations,
nbuckets,
- 0,
+ 0, /* no additional data */
node->planstate->state->es_query_cxt,
node->tuplesContext,
innerecontext->ecxt_per_tuple_memory,
MemoryContextSwitchTo(oldcontext);
}
+/* Planner support routine to estimate space needed for hash table(s) */
+Size
+EstimateSubplanHashTableSpace(double nentries,
+ Size tupleWidth,
+ bool unknownEqFalse)
+{
+ Size tab1space,
+ tab2space;
+
+ /* Estimate size of main hashtable */
+ tab1space = EstimateTupleHashTableSpace(nentries,
+ tupleWidth,
+ 0 /* no additional data */ );
+
+ /* Give up if that's already too big */
+ if (tab1space >= SIZE_MAX)
+ return tab1space;
+
+ /* Done if we don't need a hashnulls table */
+ if (unknownEqFalse)
+ return tab1space;
+
+ /*
+ * Adjust the rowcount estimate in the same way that buildSubPlanHash
+ * will, except that we don't bother with the special case for a single
+ * hash column. (We skip that detail because it'd be notationally painful
+ * for our caller to provide the column count, and this table has
+ * relatively little impact on the total estimate anyway.)
+ */
+ nentries /= 16;
+ if (nentries < 1)
+ nentries = 1;
+
+ /*
+ * It might be sane to also reduce the tupleWidth, but on the other hand
+ * we are not accounting for the space taken by the tuples' null bitmaps.
+ * Leave it alone for now.
+ */
+ tab2space = EstimateTupleHashTableSpace(nentries,
+ tupleWidth,
+ 0 /* no additional data */ );
+
+ /* Guard against overflow */
+ if (tab2space >= SIZE_MAX - tab1space)
+ return SIZE_MAX;
+
+ return tab1space + tab2space;
+}
+
/*
* execTuplesUnequal
* Return true if two tuples are definitely unequal in the indicated
#include "catalog/pg_operator.h"
#include "catalog/pg_type.h"
#include "executor/executor.h"
+#include "executor/nodeSubplan.h"
#include "miscadmin.h"
#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
List *subst_nodes);
static Node *convert_testexpr_mutator(Node *node,
convert_testexpr_context *context);
-static bool subplan_is_hashable(Plan *plan);
-static bool subpath_is_hashable(Path *path);
+static bool subplan_is_hashable(Plan *plan, bool unknownEqFalse);
+static bool subpath_is_hashable(Path *path, bool unknownEqFalse);
static bool testexpr_is_hashable(Node *testexpr, List *param_ids);
static bool test_opexpr_is_hashable(OpExpr *testexpr, List *param_ids);
static bool hash_ok_operator(OpExpr *expr);
best_path = final_rel->cheapest_total_path;
/* Now we can check if it'll fit in hash_mem */
- if (subpath_is_hashable(best_path))
+ if (subpath_is_hashable(best_path, true))
{
SubPlan *hashplan;
AlternativeSubPlan *asplan;
*/
if (subLinkType == ANY_SUBLINK &&
splan->parParam == NIL &&
- subplan_is_hashable(plan) &&
+ subplan_is_hashable(plan, unknownEqFalse) &&
testexpr_is_hashable(splan->testexpr, splan->paramIds))
splan->useHashTable = true;
* is suitable for hashing. We only look at the subquery itself.
*/
static bool
-subplan_is_hashable(Plan *plan)
+subplan_is_hashable(Plan *plan, bool unknownEqFalse)
{
- double subquery_size;
+ Size hashtablesize;
/*
- * The estimated size of the subquery result must fit in hash_mem. (Note:
- * we use heap tuple overhead here even though the tuples will actually be
- * stored as MinimalTuples; this provides some fudge factor for hashtable
- * overhead.)
+ * The estimated size of the hashtable holding the subquery result must
+ * fit in hash_mem. (Note: reject on equality, to ensure that an estimate
+ * of SIZE_MAX disables hashing regardless of the hash_mem limit.)
*/
- subquery_size = plan->plan_rows *
- (MAXALIGN(plan->plan_width) + MAXALIGN(SizeofHeapTupleHeader));
- if (subquery_size > get_hash_memory_limit())
+ hashtablesize = EstimateSubplanHashTableSpace(plan->plan_rows,
+ plan->plan_width,
+ unknownEqFalse);
+ if (hashtablesize >= get_hash_memory_limit())
return false;
return true;
* Identical to subplan_is_hashable, but work from a Path for the subplan.
*/
static bool
-subpath_is_hashable(Path *path)
+subpath_is_hashable(Path *path, bool unknownEqFalse)
{
- double subquery_size;
+ Size hashtablesize;
/*
- * The estimated size of the subquery result must fit in hash_mem. (Note:
- * we use heap tuple overhead here even though the tuples will actually be
- * stored as MinimalTuples; this provides some fudge factor for hashtable
- * overhead.)
+ * The estimated size of the hashtable holding the subquery result must
+ * fit in hash_mem. (Note: reject on equality, to ensure that an estimate
+ * of SIZE_MAX disables hashing regardless of the hash_mem limit.)
*/
- subquery_size = path->rows *
- (MAXALIGN(path->pathtarget->width) + MAXALIGN(SizeofHeapTupleHeader));
- if (subquery_size > get_hash_memory_limit())
+ hashtablesize = EstimateSubplanHashTableSpace(path->rows,
+ path->pathtarget->width,
+ unknownEqFalse);
+ if (hashtablesize >= get_hash_memory_limit())
return false;
return true;
#include <math.h>
#include "access/htup_details.h"
+#include "executor/nodeSetOp.h"
#include "foreign/fdwapi.h"
#include "miscadmin.h"
#include "nodes/extensible.h"
}
else
{
- Size hashentrysize;
+ Size hashtablesize;
/*
* In hashed mode, we must read all the input before we can emit
/*
* Also disable if it doesn't look like the hashtable will fit into
- * hash_mem.
+ * hash_mem. (Note: reject on equality, to ensure that an estimate of
+ * SIZE_MAX disables hashing regardless of the hash_mem limit.)
*/
- hashentrysize = MAXALIGN(leftpath->pathtarget->width) +
- MAXALIGN(SizeofMinimalTupleHeader);
- if (hashentrysize * numGroups > get_hash_memory_limit())
+ hashtablesize = EstimateSetOpHashTableSpace(numGroups,
+ leftpath->pathtarget->width);
+ if (hashtablesize >= get_hash_memory_limit())
pathnode->path.disabled_nodes++;
}
pathnode->path.rows = outputRows;
ExprState *eqcomp,
ExprState *hashexpr);
extern void ResetTupleHashTable(TupleHashTable hashtable);
+extern Size EstimateTupleHashTableSpace(double nentries,
+ Size tupleWidth,
+ Size additionalsize);
#ifndef FRONTEND
/*
extern void ExecEndSetOp(SetOpState *node);
extern void ExecReScanSetOp(SetOpState *node);
+extern Size EstimateSetOpHashTableSpace(double nentries, Size tupleWidth);
+
#endif /* NODESETOP_H */
extern Datum ExecSubPlan(SubPlanState *node, ExprContext *econtext, bool *isNull);
+extern Size EstimateSubplanHashTableSpace(double nentries,
+ Size tupleWidth,
+ bool unknownEqFalse);
+
extern void ExecReScanSetParamPlan(SubPlanState *node, PlanState *parent);
extern void ExecSetParamPlan(SubPlanState *node, ExprContext *econtext);
#define SH_ITERATE SH_MAKE_NAME(iterate)
#define SH_ALLOCATE SH_MAKE_NAME(allocate)
#define SH_FREE SH_MAKE_NAME(free)
+#define SH_ESTIMATE_SPACE SH_MAKE_NAME(estimate_space)
#define SH_STAT SH_MAKE_NAME(stat)
/* internal helper functions (no externally visible prototypes) */
/* <element> *<prefix>_iterate(<prefix>_hash *tb, <prefix>_iterator *iter) */
SH_SCOPE SH_ELEMENT_TYPE *SH_ITERATE(SH_TYPE * tb, SH_ITERATOR * iter);
-/* void <prefix>_stat(<prefix>_hash *tb */
+/* size_t <prefix>_estimate_space(double nentries) */
+SH_SCOPE size_t SH_ESTIMATE_SPACE(double nentries);
+
+/* void <prefix>_stat(<prefix>_hash *tb) */
SH_SCOPE void SH_STAT(SH_TYPE * tb);
#endif /* SH_DECLARE */
/*
* Compute allocation size for hashtable. Result can be passed to
- * SH_UPDATE_PARAMETERS.
+ * SH_UPDATE_PARAMETERS. (Keep SH_ESTIMATE_SPACE in sync with this!)
*/
static inline uint64
SH_COMPUTE_SIZE(uint64 newsize)
return NULL;
}
+/*
+ * Estimate the amount of space needed for a hashtable with nentries entries.
+ * Return SIZE_MAX if that's too many entries.
+ *
+ * nentries is "double" because this is meant for use by the planner,
+ * which typically works with double rowcount estimates. So we'd need to
+ * clamp to integer somewhere and that might as well be here. We do expect
+ * the value not to be NaN or negative, else the result will be garbage.
+ */
+SH_SCOPE size_t
+SH_ESTIMATE_SPACE(double nentries)
+{
+ uint64 size;
+ uint64 space;
+
+ /* scale request by SH_FILLFACTOR, as SH_CREATE does */
+ nentries = nentries / SH_FILLFACTOR;
+
+ /* fail if we'd overrun SH_MAX_SIZE entries */
+ if (nentries >= SH_MAX_SIZE)
+ return SIZE_MAX;
+
+ /* should be safe to convert to uint64 */
+ size = (uint64) nentries;
+
+ /* supporting zero sized hashes would complicate matters */
+ size = Max(size, 2);
+
+ /* round up size to the next power of 2, that's how bucketing works */
+ size = pg_nextpower2_64(size);
+
+ /* calculate space needed for ->data */
+ space = ((uint64) sizeof(SH_ELEMENT_TYPE)) * size;
+
+ /* verify that allocation of ->data is possible on this platform */
+ if (space >= SIZE_MAX / 2)
+ return SIZE_MAX;
+
+ return (size_t) space + sizeof(SH_TYPE);
+}
+
/*
* Report some statistics about the state of the hashtable. For
* debugging/profiling purposes only.
#undef SH_ITERATE
#undef SH_ALLOCATE
#undef SH_FREE
+#undef SH_ESTIMATE_SPACE
#undef SH_STAT
/* internal function names */