Phase 2 of hashed-aggregation project. nodeAgg.c now knows how to do

author Tom Lane <tgl@sss.pgh.pa.us>

Wed, 6 Nov 2002 22:31:24 +0000 (22:31 +0000)

committer Tom Lane <tgl@sss.pgh.pa.us>

Wed, 6 Nov 2002 22:31:24 +0000 (22:31 +0000)
author Tom Lane <tgl@sss.pgh.pa.us>
Wed, 6 Nov 2002 22:31:24 +0000 (22:31 +0000)
committer Tom Lane <tgl@sss.pgh.pa.us>
Wed, 6 Nov 2002 22:31:24 +0000 (22:31 +0000)
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c

index 7714a68090941079bc7536a35b26faebfefd06d2..824e0299278b1b1e594f5c93297b165050e30afe 100644 (file)
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -29,24 +29,23 @@
   *       of course).  A non-strict finalfunc can make its own choice of
   *       what to return for a NULL ending transvalue.
   *
- *       When the transvalue datatype is pass-by-reference, we have to be
- *       careful to ensure that the values survive across tuple cycles yet
- *       are not allowed to accumulate until end of query.  We do this by
- *       "ping-ponging" between two memory contexts; successive calls to the
- *       transfunc are executed in alternate contexts, passing the previous
- *       transvalue that is in the other context.      At the beginning of each
- *       tuple cycle we can reset the current output context to avoid memory
- *       usage growth.  Note: we must use MemoryContextContains() to check
- *       whether the transfunc has perhaps handed us back one of its input
- *       values rather than a freshly palloc'd value; if so, we copy the value
- *       to the context we want it in.
+ *       We compute aggregate input expressions and run the transition functions
+ *       in a temporary econtext (aggstate->tmpcontext).  This is reset at
+ *       least once per input tuple, so when the transvalue datatype is
+ *       pass-by-reference, we have to be careful to copy it into a longer-lived
+ *       memory context, and free the prior value to avoid memory leakage.
+ *       We store transvalues in the memory context aggstate->aggcontext,
+ *       which is also used for the hashtable structures in AGG_HASHED mode.
+ *       The node's regular econtext (aggstate->csstate.cstate.cs_ExprContext)
+ *       is used to run finalize functions and compute the output tuple;
+ *       this context can be reset once per output tuple.
   *
   *
   * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/executor/nodeAgg.c,v 1.91 2002/11/06 00:00:43 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/executor/nodeAgg.c,v 1.92 2002/11/06 22:31:23 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -59,6 +58,7 @@
  #include "executor/executor.h"
  #include "executor/nodeAgg.h"
  #include "executor/nodeGroup.h"
+#include "executor/nodeHash.h"
  #include "miscadmin.h"
  #include "optimizer/clauses.h"
  #include "parser/parse_coerce.h"
@@ -140,8 +140,27 @@ typedef struct AggStatePerAggData
          */
  
         Tuplesortstate *sortstate;      /* sort object, if a DISTINCT agg */
+} AggStatePerAggData;
  
-       Datum           transValue;
+/*
+ * AggStatePerGroupData - per-aggregate-per-group working state
+ *
+ * These values are working state that is initialized at the start of
+ * an input tuple group and updated for each input tuple.
+ *
+ * In AGG_PLAIN and AGG_SORTED modes, we have a single array of these
+ * structs (pointed to by aggstate->pergroup); we re-use the array for
+ * each input group, if it's AGG_SORTED mode.  In AGG_HASHED mode, the
+ * hash table contains an array of these structs for each tuple group.
+ *
+ * Logically, the sortstate field belongs in this struct, but we do not
+ * keep it here for space reasons: we don't support DISTINCT aggregates
+ * in AGG_HASHED mode, so there's no reason to use up a pointer field
+ * in every entry of the hashtable.
+ */
+typedef struct AggStatePerGroupData
+{
+       Datum           transValue;             /* current transition value */
         bool            transValueIsNull;
  
         bool            noTransValue;   /* true if transValue not set yet */
@@ -154,97 +173,143 @@ typedef struct AggStatePerAggData
          * later input value. Only the first non-NULL input will be
          * auto-substituted.
          */
-} AggStatePerAggData;
-
+} AggStatePerGroupData;
  
-static void initialize_aggregate(AggStatePerAgg peraggstate);
-static void advance_transition_function(AggStatePerAgg peraggstate,
-                                                       Datum newVal, bool isNull);
-static void advance_aggregates(AggState *aggstate, ExprContext *econtext);
+/*
+ * To implement hashed aggregation, we need a hashtable that stores a
+ * representative tuple and an array of AggStatePerGroup structs for each
+ * distinct set of GROUP BY column values.  We compute the hash key from
+ * the GROUP BY columns.
+ */
+typedef struct AggHashEntryData
+{
+       AggHashEntry    next;           /* next entry in same hash bucket */
+       uint32          hashkey;                /* exact hash key of this entry */
+       HeapTuple       firstTuple;             /* copy of first tuple in this group */
+       /* per-aggregate transition status array - must be last! */
+       AggStatePerGroupData pergroup[1];       /* VARIABLE LENGTH ARRAY */
+} AggHashEntryData;                            /* VARIABLE LENGTH STRUCT */
+
+typedef struct AggHashTableData
+{
+       int                     nbuckets;               /* number of buckets in hash table */
+       AggHashEntry buckets[1];        /* VARIABLE LENGTH ARRAY */
+} AggHashTableData;                            /* VARIABLE LENGTH STRUCT */
+
+
+static void initialize_aggregates(AggState *aggstate,
+                                                                 AggStatePerAgg peragg,
+                                                                 AggStatePerGroup pergroup);
+static void advance_transition_function(AggState *aggstate,
+                                                                               AggStatePerAgg peraggstate,
+                                                                               AggStatePerGroup pergroupstate,
+                                                                               Datum newVal, bool isNull);
+static void advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup);
  static void process_sorted_aggregate(AggState *aggstate,
-                                                AggStatePerAgg peraggstate);
-static void finalize_aggregate(AggStatePerAgg peraggstate,
-                                  Datum *resultVal, bool *resultIsNull);
+                                                                        AggStatePerAgg peraggstate,
+                                                                        AggStatePerGroup pergroupstate);
+static void finalize_aggregate(AggState *aggstate,
+                                                          AggStatePerAgg peraggstate,
+                                                          AggStatePerGroup pergroupstate,
+                                                          Datum *resultVal, bool *resultIsNull);
+static void build_hash_table(Agg *node);
+static AggHashEntry lookup_hash_entry(Agg *node, TupleTableSlot *slot);
+static TupleTableSlot *agg_retrieve_direct(Agg *node);
+static void agg_fill_hash_table(Agg *node);
+static TupleTableSlot *agg_retrieve_hash_table(Agg *node);
  static Datum GetAggInitVal(Datum textInitVal, Oid transtype);
  
  
  /*
- * Initialize one aggregate for a new set of input values.
+ * Initialize all aggregates for a new group of input values.
   *
   * When called, CurrentMemoryContext should be the per-query context.
   */
  static void
-initialize_aggregate(AggStatePerAgg peraggstate)
+initialize_aggregates(AggState *aggstate,
+                                         AggStatePerAgg peragg,
+                                         AggStatePerGroup pergroup)
  {
-       Aggref     *aggref = peraggstate->aggref;
+       int                     aggno;
  
-       /*
-        * Start a fresh sort operation for each DISTINCT aggregate.
-        */
-       if (aggref->aggdistinct)
+       for (aggno = 0; aggno < aggstate->numaggs; aggno++)
         {
+               AggStatePerAgg peraggstate = &peragg[aggno];
+               AggStatePerGroup pergroupstate = &pergroup[aggno];
+               Aggref     *aggref = peraggstate->aggref;
+
                 /*
-                * In case of rescan, maybe there could be an uncompleted sort
-                * operation?  Clean it up if so.
+                * Start a fresh sort operation for each DISTINCT aggregate.
                  */
-               if (peraggstate->sortstate)
-                       tuplesort_end(peraggstate->sortstate);
+               if (aggref->aggdistinct)
+               {
+                       /*
+                        * In case of rescan, maybe there could be an uncompleted sort
+                        * operation?  Clean it up if so.
+                        */
+                       if (peraggstate->sortstate)
+                               tuplesort_end(peraggstate->sortstate);
  
-               peraggstate->sortstate =
-                       tuplesort_begin_datum(peraggstate->inputType,
-                                                                 peraggstate->sortOperator,
-                                                                 false);
-       }
+                       peraggstate->sortstate =
+                               tuplesort_begin_datum(peraggstate->inputType,
+                                                                         peraggstate->sortOperator,
+                                                                         false);
+               }
  
-       /*
-        * (Re)set transValue to the initial value.
-        *
-        * Note that when the initial value is pass-by-ref, we just reuse it
-        * without copying for each group.      Hence, transition function had
-        * better not scribble on its input, or it will fail for GROUP BY!
-        */
-       peraggstate->transValue = peraggstate->initValue;
-       peraggstate->transValueIsNull = peraggstate->initValueIsNull;
+               /*
+                * (Re)set transValue to the initial value.
+                *
+                * Note that when the initial value is pass-by-ref, we must copy it
+                * (into the aggcontext) since we will pfree the transValue later.
+                */
+               if (peraggstate->initValueIsNull)
+                       pergroupstate->transValue = peraggstate->initValue;
+               else
+               {
+                       MemoryContext oldContext;
  
-       /*
-        * If the initial value for the transition state doesn't exist in the
-        * pg_aggregate table then we will let the first non-NULL value
-        * returned from the outer procNode become the initial value. (This is
-        * useful for aggregates like max() and min().)  The noTransValue flag
-        * signals that we still need to do this.
-        */
-       peraggstate->noTransValue = peraggstate->initValueIsNull;
+                       oldContext = MemoryContextSwitchTo(aggstate->aggcontext);
+                       pergroupstate->transValue = datumCopy(peraggstate->initValue,
+                                                                                                 peraggstate->transtypeByVal,
+                                                                                                 peraggstate->transtypeLen);
+                       MemoryContextSwitchTo(oldContext);
+               }
+               pergroupstate->transValueIsNull = peraggstate->initValueIsNull;
+
+               /*
+                * If the initial value for the transition state doesn't exist in the
+                * pg_aggregate table then we will let the first non-NULL value
+                * returned from the outer procNode become the initial value. (This is
+                * useful for aggregates like max() and min().)  The noTransValue flag
+                * signals that we still need to do this.
+                */
+               pergroupstate->noTransValue = peraggstate->initValueIsNull;
+       }
  }
  
  /*
   * Given a new input value, advance the transition function of an aggregate.
   *
- * When called, CurrentMemoryContext should be the context we want the
- * transition function result to be delivered into on this cycle.
+ * It doesn't matter which memory context this is called in.
   */
  static void
-advance_transition_function(AggStatePerAgg peraggstate,
+advance_transition_function(AggState *aggstate,
+                                                       AggStatePerAgg peraggstate,
+                                                       AggStatePerGroup pergroupstate,
                                                         Datum newVal, bool isNull)
  {
         FunctionCallInfoData fcinfo;
+       MemoryContext oldContext;
  
         if (peraggstate->transfn.fn_strict)
         {
+               /*
+                * For a strict transfn, nothing happens at a NULL input
+                * tuple; we just keep the prior transValue.
+                */
                 if (isNull)
-               {
-                       /*
-                        * For a strict transfn, nothing happens at a NULL input
-                        * tuple; we just keep the prior transValue.  However, if the
-                        * transtype is pass-by-ref, we have to copy it into the new
-                        * context because the old one is going to get reset.
-                        */
-                       if (!peraggstate->transValueIsNull)
-                               peraggstate->transValue = datumCopy(peraggstate->transValue,
-                                                                                        peraggstate->transtypeByVal,
-                                                                                         peraggstate->transtypeLen);
                         return;
-               }
-               if (peraggstate->noTransValue)
+               if (pergroupstate->noTransValue)
                 {
                         /*
                          * transValue has not been initialized. This is the first
@@ -253,18 +318,19 @@ advance_transition_function(AggStatePerAgg peraggstate,
                          * is binary-compatible with its transtype, so straight copy
                          * here is OK.)
                          *
-                        * We had better copy the datum if it is pass-by-ref, since the
-                        * given pointer may be pointing into a scan tuple that will
-                        * be freed on the next iteration of the scan.
+                        * We must copy the datum into aggcontext if it is pass-by-ref.
+                        * We do not need to pfree the old transValue, since it's NULL.
                          */
-                       peraggstate->transValue = datumCopy(newVal,
-                                                                                        peraggstate->transtypeByVal,
-                                                                                         peraggstate->transtypeLen);
-                       peraggstate->transValueIsNull = false;
-                       peraggstate->noTransValue = false;
+                       oldContext = MemoryContextSwitchTo(aggstate->aggcontext);
+                       pergroupstate->transValue = datumCopy(newVal,
+                                                                                                 peraggstate->transtypeByVal,
+                                                                                                 peraggstate->transtypeLen);
+                       pergroupstate->transValueIsNull = false;
+                       pergroupstate->noTransValue = false;
+                       MemoryContextSwitchTo(oldContext);
                         return;
                 }
-               if (peraggstate->transValueIsNull)
+               if (pergroupstate->transValueIsNull)
                 {
                         /*
                          * Don't call a strict function with NULL inputs.  Note it is
@@ -277,6 +343,9 @@ advance_transition_function(AggStatePerAgg peraggstate,
                 }
         }
  
+       /* We run the transition functions in per-input-tuple memory context */
+       oldContext = MemoryContextSwitchTo(aggstate->tmpcontext->ecxt_per_tuple_memory);
+
         /*
          * OK to call the transition function
          *
@@ -291,84 +360,76 @@ advance_transition_function(AggStatePerAgg peraggstate,
  
         fcinfo.flinfo = &peraggstate->transfn;
         fcinfo.nargs = 2;
-       fcinfo.arg[0] = peraggstate->transValue;
-       fcinfo.argnull[0] = peraggstate->transValueIsNull;
+       fcinfo.arg[0] = pergroupstate->transValue;
+       fcinfo.argnull[0] = pergroupstate->transValueIsNull;
         fcinfo.arg[1] = newVal;
         fcinfo.argnull[1] = isNull;
  
         newVal = FunctionCallInvoke(&fcinfo);
  
         /*
-        * If the transition function was uncooperative, it may have given us
-        * a pass-by-ref result that points at the scan tuple or the
-        * prior-cycle working memory.  Copy it into the active context if it
-        * doesn't look right.
+        * If pass-by-ref datatype, must copy the new value into aggcontext and
+        * pfree the prior transValue.  But if transfn returned a pointer to its
+        * first input, we don't need to do anything.
          */
-       if (!peraggstate->transtypeByVal && !fcinfo.isnull &&
-               !MemoryContextContains(CurrentMemoryContext,
-                                                          DatumGetPointer(newVal)))
-               newVal = datumCopy(newVal,
-                                                  peraggstate->transtypeByVal,
-                                                  peraggstate->transtypeLen);
+       if (!peraggstate->transtypeByVal &&
+               DatumGetPointer(newVal) != DatumGetPointer(pergroupstate->transValue))
+       {
+               if (!fcinfo.isnull)
+               {
+                       MemoryContextSwitchTo(aggstate->aggcontext);
+                       newVal = datumCopy(newVal,
+                                                          peraggstate->transtypeByVal,
+                                                          peraggstate->transtypeLen);
+               }
+               if (!pergroupstate->transValueIsNull)
+                       pfree(DatumGetPointer(pergroupstate->transValue));
+       }
+
+       pergroupstate->transValue = newVal;
+       pergroupstate->transValueIsNull = fcinfo.isnull;
  
-       peraggstate->transValue = newVal;
-       peraggstate->transValueIsNull = fcinfo.isnull;
+       MemoryContextSwitchTo(oldContext);
  }
  
  /*
   * Advance all the aggregates for one input tuple.  The input tuple
- * has been stored in econtext->ecxt_scantuple, so that it is accessible
- * to ExecEvalExpr.
+ * has been stored in tmpcontext->ecxt_scantuple, so that it is accessible
+ * to ExecEvalExpr.  pergroup is the array of per-group structs to use
+ * (this might be in a hashtable entry).
   *
   * When called, CurrentMemoryContext should be the per-query context.
   */
  static void
-advance_aggregates(AggState *aggstate, ExprContext *econtext)
+advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
  {
-       MemoryContext oldContext;
+       ExprContext *econtext = aggstate->tmpcontext;
         int                     aggno;
  
-       /*
-        * Clear and select the current working context for evaluation
-        * of the input expressions and transition functions at this
-        * input tuple.
-        */
-       econtext->ecxt_per_tuple_memory = aggstate->agg_cxt[aggstate->which_cxt];
-       ResetExprContext(econtext);
-       oldContext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory);
-
         for (aggno = 0; aggno < aggstate->numaggs; aggno++)
         {
                 AggStatePerAgg peraggstate = &aggstate->peragg[aggno];
+               AggStatePerGroup pergroupstate = &pergroup[aggno];
                 Aggref     *aggref = peraggstate->aggref;
                 Datum           newVal;
                 bool            isNull;
  
-               newVal = ExecEvalExpr(aggref->target, econtext, &isNull, NULL);
+               newVal = ExecEvalExprSwitchContext(aggref->target, econtext,
+                                                                                  &isNull, NULL);
  
                 if (aggref->aggdistinct)
                 {
                         /* in DISTINCT mode, we may ignore nulls */
                         if (isNull)
                                 continue;
-                       /* putdatum has to be called in per-query context */
-                       MemoryContextSwitchTo(oldContext);
                         tuplesort_putdatum(peraggstate->sortstate, newVal, isNull);
-                       MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory);
                 }
                 else
                 {
-                       advance_transition_function(peraggstate, newVal, isNull);
+                       advance_transition_function(aggstate, peraggstate, pergroupstate,
+                                                                               newVal, isNull);
                 }
         }
-
-       /*
-        * Make the other context current so that these transition
-        * results are preserved.
-        */
-       aggstate->which_cxt = 1 - aggstate->which_cxt;
-
-       MemoryContextSwitchTo(oldContext);
  }
  
  /*
@@ -381,10 +442,12 @@ advance_aggregates(AggState *aggstate, ExprContext *econtext)
   */
  static void
  process_sorted_aggregate(AggState *aggstate,
-                                                AggStatePerAgg peraggstate)
+                                                AggStatePerAgg peraggstate,
+                                                AggStatePerGroup pergroupstate)
  {
         Datum           oldVal = (Datum) 0;
         bool            haveOldVal = false;
+       MemoryContext workcontext = aggstate->tmpcontext->ecxt_per_tuple_memory;
         MemoryContext oldContext;
         Datum           newVal;
         bool            isNull;
@@ -408,12 +471,11 @@ process_sorted_aggregate(AggState *aggstate,
                         continue;
  
                 /*
-                * Clear and select the current working context for evaluation of
+                * Clear and select the working context for evaluation of
                  * the equality function and transition function.
                  */
-               MemoryContextReset(aggstate->agg_cxt[aggstate->which_cxt]);
-               oldContext =
-                       MemoryContextSwitchTo(aggstate->agg_cxt[aggstate->which_cxt]);
+               MemoryContextReset(workcontext);
+               oldContext = MemoryContextSwitchTo(workcontext);
  
                 if (haveOldVal &&
                         DatumGetBool(FunctionCall2(&peraggstate->equalfn,
@@ -422,24 +484,15 @@ process_sorted_aggregate(AggState *aggstate,
                         /* equal to prior, so forget this one */
                         if (!peraggstate->inputtypeByVal)
                                 pfree(DatumGetPointer(newVal));
-
-                       /*
-                        * note we do NOT flip contexts in this case, so no need to
-                        * copy prior transValue to other context.
-                        */
                 }
                 else
                 {
-                       advance_transition_function(peraggstate, newVal, false);
-
-                       /*
-                        * Make the other context current so that this transition
-                        * result is preserved.
-                        */
-                       aggstate->which_cxt = 1 - aggstate->which_cxt;
+                       advance_transition_function(aggstate, peraggstate, pergroupstate,
+                                                                               newVal, false);
                         /* forget the old value, if any */
                         if (haveOldVal && !peraggstate->inputtypeByVal)
                                 pfree(DatumGetPointer(oldVal));
+                       /* and remember the new one for subsequent equality checks */
                         oldVal = newVal;
                         haveOldVal = true;
                 }
@@ -457,13 +510,19 @@ process_sorted_aggregate(AggState *aggstate,
  /*
   * Compute the final value of one aggregate.
   *
- * When called, CurrentMemoryContext should be the context where we want
- * final values delivered (ie, the per-output-tuple expression context).
+ * The finalfunction will be run, and the result delivered, in the
+ * output-tuple context; caller's CurrentMemoryContext does not matter.
   */
  static void
-finalize_aggregate(AggStatePerAgg peraggstate,
+finalize_aggregate(AggState *aggstate,
+                                  AggStatePerAgg peraggstate,
+                                  AggStatePerGroup pergroupstate,
                                    Datum *resultVal, bool *resultIsNull)
  {
+       MemoryContext oldContext;
+
+       oldContext = MemoryContextSwitchTo(aggstate->csstate.cstate.cs_ExprContext->ecxt_per_tuple_memory);
+
         /*
          * Apply the agg's finalfn if one is provided, else return transValue.
          */
@@ -474,9 +533,9 @@ finalize_aggregate(AggStatePerAgg peraggstate,
                 MemSet(&fcinfo, 0, sizeof(fcinfo));
                 fcinfo.flinfo = &peraggstate->finalfn;
                 fcinfo.nargs = 1;
-               fcinfo.arg[0] = peraggstate->transValue;
-               fcinfo.argnull[0] = peraggstate->transValueIsNull;
-               if (fcinfo.flinfo->fn_strict && peraggstate->transValueIsNull)
+               fcinfo.arg[0] = pergroupstate->transValue;
+               fcinfo.argnull[0] = pergroupstate->transValueIsNull;
+               if (fcinfo.flinfo->fn_strict && pergroupstate->transValueIsNull)
                 {
                         /* don't call a strict function with NULL inputs */
                         *resultVal = (Datum) 0;
@@ -490,8 +549,8 @@ finalize_aggregate(AggStatePerAgg peraggstate,
         }
         else
         {
-               *resultVal = peraggstate->transValue;
-               *resultIsNull = peraggstate->transValueIsNull;
+               *resultVal = pergroupstate->transValue;
+               *resultIsNull = pergroupstate->transValueIsNull;
         }
  
         /*
@@ -503,8 +562,111 @@ finalize_aggregate(AggStatePerAgg peraggstate,
                 *resultVal = datumCopy(*resultVal,
                                                            peraggstate->resulttypeByVal,
                                                            peraggstate->resulttypeLen);
+
+       MemoryContextSwitchTo(oldContext);
  }
  
+/*
+ * Initialize the hash table to empty.
+ *
+ * The hash table always lives in the aggcontext memory context.
+ */
+static void
+build_hash_table(Agg *node)
+{
+       AggState   *aggstate = node->aggstate;
+       AggHashTable    hashtable;
+       Size                    tabsize;
+
+       Assert(node->aggstrategy == AGG_HASHED);
+       Assert(node->numGroups > 0);
+       tabsize = sizeof(AggHashTableData) +
+               (node->numGroups - 1) * sizeof(AggHashEntry);
+       hashtable = (AggHashTable) MemoryContextAlloc(aggstate->aggcontext,
+                                                                                                 tabsize);
+       MemSet(hashtable, 0, tabsize);
+       hashtable->nbuckets = node->numGroups;
+       aggstate->hashtable = hashtable;
+}
+
+/*
+ * Find or create a hashtable entry for the tuple group containing the
+ * given tuple.
+ *
+ * When called, CurrentMemoryContext should be the per-query context.
+ */
+static AggHashEntry
+lookup_hash_entry(Agg *node, TupleTableSlot *slot)
+{
+       AggState   *aggstate = node->aggstate;
+       AggHashTable hashtable = aggstate->hashtable;
+       MemoryContext   tmpmem = aggstate->tmpcontext->ecxt_per_tuple_memory;
+       HeapTuple       tuple = slot->val;
+       TupleDesc       tupdesc = slot->ttc_tupleDescriptor;
+       uint32          hashkey = 0;
+       int                     i;
+       int                     bucketno;
+       AggHashEntry    entry;
+       MemoryContext oldContext;
+       Size            entrysize;
+
+       /* Need to run the hash function in short-lived context */
+       oldContext = MemoryContextSwitchTo(tmpmem);
+
+       for (i = 0; i < node->numCols; i++)
+       {
+               AttrNumber      att = node->grpColIdx[i];
+               Datum           attr;
+               bool            isNull;
+
+               attr = heap_getattr(tuple, att, tupdesc, &isNull);
+               if (isNull)
+                       continue;                       /* treat nulls as having hash key 0 */
+               hashkey ^= ComputeHashFunc(attr,
+                                                                  (int) tupdesc->attrs[att - 1]->attlen,
+                                                                  tupdesc->attrs[att - 1]->attbyval);
+       }
+       bucketno = hashkey % (uint32) hashtable->nbuckets;
+
+       for (entry = hashtable->buckets[bucketno];
+                entry != NULL;
+                entry = entry->next)
+       {
+               /* Quick check using hashkey */
+               if (entry->hashkey != hashkey)
+                       continue;
+               if (execTuplesMatch(entry->firstTuple,
+                                                       tuple,
+                                                       tupdesc,
+                                                       node->numCols, node->grpColIdx,
+                                                       aggstate->eqfunctions,
+                                                       tmpmem))
+               {
+                       MemoryContextSwitchTo(oldContext);
+                       return entry;
+               }
+       }
+
+       /* Not there, so build a new one */
+       MemoryContextSwitchTo(aggstate->aggcontext);
+       entrysize = sizeof(AggHashEntryData) +
+               (aggstate->numaggs - 1) * sizeof(AggStatePerGroupData);
+       entry = (AggHashEntry) palloc(entrysize);
+       MemSet(entry, 0, entrysize);
+
+       entry->hashkey = hashkey;
+       entry->firstTuple = heap_copytuple(tuple);
+
+       entry->next = hashtable->buckets[bucketno];
+       hashtable->buckets[bucketno] = entry;
+
+       MemoryContextSwitchTo(oldContext);
+
+       /* initialize aggregates for new tuple group */
+       initialize_aggregates(aggstate, aggstate->peragg, entry->pergroup);
+
+       return entry;
+}
  
  /*
   * ExecAgg -
@@ -521,16 +683,39 @@ finalize_aggregate(AggStatePerAgg peraggstate,
   */
  TupleTableSlot *
  ExecAgg(Agg *node)
+{
+       AggState   *aggstate = node->aggstate;
+
+       if (aggstate->agg_done)
+               return NULL;
+
+       if (node->aggstrategy == AGG_HASHED)
+       {
+               if (!aggstate->table_filled)
+                       agg_fill_hash_table(node);
+               return agg_retrieve_hash_table(node);
+       }
+       else
+       {
+               return agg_retrieve_direct(node);
+       }
+}
+
+/*
+ * ExecAgg for non-hashed case
+ */
+static TupleTableSlot *
+agg_retrieve_direct(Agg *node)
  {
         AggState   *aggstate;
-       EState     *estate;
         Plan       *outerPlan;
         ExprContext *econtext;
+       ExprContext *tmpcontext;
         ProjectionInfo *projInfo;
         Datum      *aggvalues;
         bool       *aggnulls;
         AggStatePerAgg peragg;
-       MemoryContext oldContext;
+       AggStatePerGroup pergroup;
         TupleTableSlot *outerslot;
         TupleTableSlot *firstSlot;
         TupleTableSlot *resultSlot;
@@ -540,13 +725,16 @@ ExecAgg(Agg *node)
          * get state info from node
          */
         aggstate = node->aggstate;
-       estate = node->plan.state;
         outerPlan = outerPlan(node);
+       /* econtext is the per-output-tuple expression context */
         econtext = aggstate->csstate.cstate.cs_ExprContext;
         aggvalues = econtext->ecxt_aggvalues;
         aggnulls = econtext->ecxt_aggnulls;
+       /* tmpcontext is the per-input-tuple expression context */
+       tmpcontext = aggstate->tmpcontext;
         projInfo = aggstate->csstate.cstate.cs_ProjInfo;
         peragg = aggstate->peragg;
+       pergroup = aggstate->pergroup;
         firstSlot = aggstate->csstate.css_ScanTupleSlot;
  
         /*
@@ -586,17 +774,12 @@ ExecAgg(Agg *node)
                 /*
                  * Clear the per-output-tuple context for each group
                  */
-               MemoryContextReset(aggstate->tup_cxt);
+               ResetExprContext(econtext);
  
                 /*
                  * Initialize working state for a new input tuple group
                  */
-               for (aggno = 0; aggno < aggstate->numaggs; aggno++)
-               {
-                       AggStatePerAgg peraggstate = &peragg[aggno];
-
-                       initialize_aggregate(peraggstate);
-               }
+               initialize_aggregates(aggstate, peragg, pergroup);
  
                 if (aggstate->grp_firstTuple != NULL)
                 {
@@ -612,7 +795,7 @@ ExecAgg(Agg *node)
                         aggstate->grp_firstTuple = NULL; /* don't keep two pointers */
  
                         /* set up for first advance_aggregates call */
-                       econtext->ecxt_scantuple = firstSlot;
+                       tmpcontext->ecxt_scantuple = firstSlot;
  
                         /*
                          * Process each outer-plan tuple, and then fetch the next one,
@@ -620,7 +803,10 @@ ExecAgg(Agg *node)
                          */
                         for (;;)
                         {
-                               advance_aggregates(aggstate, econtext);
+                               advance_aggregates(aggstate, pergroup);
+
+                               /* Reset per-input-tuple context after each tuple */
+                               ResetExprContext(tmpcontext);
  
                                 outerslot = ExecProcNode(outerPlan, (Plan *) node);
                                 if (TupIsNull(outerslot))
@@ -630,7 +816,7 @@ ExecAgg(Agg *node)
                                         break;
                                 }
                                 /* set up for next advance_aggregates call */
-                               econtext->ecxt_scantuple = outerslot;
+                               tmpcontext->ecxt_scantuple = outerslot;
  
                                 /*
                                  * If we are grouping, check whether we've crossed a group
@@ -643,7 +829,7 @@ ExecAgg(Agg *node)
                                                                                  firstSlot->ttc_tupleDescriptor,
                                                                                  node->numCols, node->grpColIdx,
                                                                                  aggstate->eqfunctions,
-                                                                                aggstate->agg_cxt[aggstate->which_cxt]))
+                                                                                tmpcontext->ecxt_per_tuple_memory))
                                         {
                                                 /*
                                                  * Save the first input tuple of the next group.
@@ -658,37 +844,17 @@ ExecAgg(Agg *node)
                 /*
                  * Done scanning input tuple group. Finalize each aggregate
                  * calculation, and stash results in the per-output-tuple context.
-                *
-                * This is a bit tricky when there are both DISTINCT and plain
-                * aggregates: we must first finalize all the plain aggs and then
-                * all the DISTINCT ones.  This is needed because the last
-                * transition values for the plain aggs are stored in the
-                * not-current working context, and we have to evaluate those aggs
-                * (and stash the results in the output tup_cxt!) before we start
-                * flipping contexts again in process_sorted_aggregate.
                  */
-               oldContext = MemoryContextSwitchTo(aggstate->tup_cxt);
-               for (aggno = 0; aggno < aggstate->numaggs; aggno++)
-               {
-                       AggStatePerAgg peraggstate = &peragg[aggno];
-
-                       if (!peraggstate->aggref->aggdistinct)
-                               finalize_aggregate(peraggstate,
-                                                                  &aggvalues[aggno], &aggnulls[aggno]);
-               }
-               MemoryContextSwitchTo(oldContext);
                 for (aggno = 0; aggno < aggstate->numaggs; aggno++)
                 {
                         AggStatePerAgg peraggstate = &peragg[aggno];
+                       AggStatePerGroup pergroupstate = &pergroup[aggno];
  
                         if (peraggstate->aggref->aggdistinct)
-                       {
-                               process_sorted_aggregate(aggstate, peraggstate);
-                               oldContext = MemoryContextSwitchTo(aggstate->tup_cxt);
-                               finalize_aggregate(peraggstate,
-                                                                  &aggvalues[aggno], &aggnulls[aggno]);
-                               MemoryContextSwitchTo(oldContext);
-                       }
+                               process_sorted_aggregate(aggstate, peraggstate, pergroupstate);
+
+                       finalize_aggregate(aggstate, peraggstate, pergroupstate,
+                                                          &aggvalues[aggno], &aggnulls[aggno]);
                 }
  
                 /*
@@ -737,9 +903,158 @@ ExecAgg(Agg *node)
                 }
  
                 /*
-                * Do projection and qual check in the per-output-tuple context.
+                * Form a projection tuple using the aggregate results and the
+                * representative input tuple.  Store it in the result tuple slot.
+                * Note we do not support aggregates returning sets ...
+                */
+               econtext->ecxt_scantuple = firstSlot;
+               resultSlot = ExecProject(projInfo, NULL);
+
+               /*
+                * If the completed tuple does not match the qualifications, it is
+                * ignored and we loop back to try to process another group.
+                * Otherwise, return the tuple.
+                */
+       }
+       while (!ExecQual(node->plan.qual, econtext, false));
+
+       return resultSlot;
+}
+
+/*
+ * ExecAgg for hashed case: phase 1, read input and build hash table
+ */
+static void
+agg_fill_hash_table(Agg *node)
+{
+       AggState   *aggstate;
+       Plan       *outerPlan;
+       ExprContext *tmpcontext;
+       AggHashEntry    entry;
+       TupleTableSlot *outerslot;
+
+       /*
+        * get state info from node
+        */
+       aggstate = node->aggstate;
+       outerPlan = outerPlan(node);
+       /* tmpcontext is the per-input-tuple expression context */
+       tmpcontext = aggstate->tmpcontext;
+
+       /*
+        * Process each outer-plan tuple, and then fetch the next one,
+        * until we exhaust the outer plan.
+        */
+       for (;;)
+       {
+               outerslot = ExecProcNode(outerPlan, (Plan *) node);
+               if (TupIsNull(outerslot))
+                       break;
+               /* set up for advance_aggregates call */
+               tmpcontext->ecxt_scantuple = outerslot;
+
+               /* Find or build hashtable entry for this tuple's group */
+               entry = lookup_hash_entry(node, outerslot);
+
+               /* Advance the aggregates */
+               advance_aggregates(aggstate, entry->pergroup);
+
+               /* Reset per-input-tuple context after each tuple */
+               ResetExprContext(tmpcontext);
+       }
+
+       aggstate->table_filled = true;
+       /* Initialize to walk the hash table */
+       aggstate->next_hash_entry = NULL;
+       aggstate->next_hash_bucket = 0;
+}
+
+/*
+ * ExecAgg for hashed case: phase 2, retrieving groups from hash table
+ */
+static TupleTableSlot *
+agg_retrieve_hash_table(Agg *node)
+{
+       AggState   *aggstate;
+       ExprContext *econtext;
+       ProjectionInfo *projInfo;
+       Datum      *aggvalues;
+       bool       *aggnulls;
+       AggStatePerAgg peragg;
+       AggStatePerGroup pergroup;
+       AggHashTable    hashtable;
+       AggHashEntry    entry;
+       TupleTableSlot *firstSlot;
+       TupleTableSlot *resultSlot;
+       int                     aggno;
+
+       /*
+        * get state info from node
+        */
+       aggstate = node->aggstate;
+       /* econtext is the per-output-tuple expression context */
+       econtext = aggstate->csstate.cstate.cs_ExprContext;
+       aggvalues = econtext->ecxt_aggvalues;
+       aggnulls = econtext->ecxt_aggnulls;
+       projInfo = aggstate->csstate.cstate.cs_ProjInfo;
+       peragg = aggstate->peragg;
+       hashtable = aggstate->hashtable;
+       firstSlot = aggstate->csstate.css_ScanTupleSlot;
+
+       /*
+        * We loop retrieving groups until we find one matching
+        * node->plan.qual
+        */
+       do
+       {
+               if (aggstate->agg_done)
+                       return NULL;
+
+               /*
+                * Find the next entry in the hash table
+                */
+               entry = aggstate->next_hash_entry;
+               while (entry == NULL)
+               {
+                       if (aggstate->next_hash_bucket >= hashtable->nbuckets)
+                       {
+                               /* No more entries in hashtable, so done */
+                               aggstate->agg_done = TRUE;
+                               return NULL;
+                       }
+                       entry = hashtable->buckets[aggstate->next_hash_bucket++];
+               }
+               aggstate->next_hash_entry = entry->next;
+
+               /*
+                * Clear the per-output-tuple context for each group
+                */
+               ResetExprContext(econtext);
+
+               /*
+                * Store the copied first input tuple in the tuple table slot
+                * reserved for it, so that it can be used in ExecProject.
                  */
-               econtext->ecxt_per_tuple_memory = aggstate->tup_cxt;
+               ExecStoreTuple(entry->firstTuple,
+                                          firstSlot,
+                                          InvalidBuffer,
+                                          false);
+
+               pergroup = entry->pergroup;
+
+               /*
+                * Finalize each aggregate calculation, and stash results in the
+                * per-output-tuple context.
+                */
+               for (aggno = 0; aggno < aggstate->numaggs; aggno++)
+               {
+                       AggStatePerAgg peraggstate = &peragg[aggno];
+                       AggStatePerGroup pergroupstate = &pergroup[aggno];
+
+                       Assert(!peraggstate->aggref->aggdistinct);
+                       finalize_aggregate(aggstate, peraggstate, pergroupstate,
+                                                          &aggvalues[aggno], &aggnulls[aggno]);
+               }
  
                 /*
                  * Form a projection tuple using the aggregate results and the
@@ -789,8 +1104,11 @@ ExecInitAgg(Agg *node, EState *estate, Plan *parent)
         aggstate = makeNode(AggState);
         node->aggstate = aggstate;
         aggstate->eqfunctions = NULL;
-       aggstate->grp_firstTuple = NULL;
+       aggstate->peragg = NULL;
         aggstate->agg_done = false;
+       aggstate->pergroup = NULL;
+       aggstate->grp_firstTuple = NULL;
+       aggstate->hashtable = NULL;
  
         /*
          * find aggregates in targetlist and quals
@@ -817,33 +1135,27 @@ ExecInitAgg(Agg *node, EState *estate, Plan *parent)
         }
  
         /*
-        * Create expression context
+        * Create expression contexts.  We need two, one for per-input-tuple
+        * processing and one for per-output-tuple processing.  We cheat a little
+        * by using ExecAssignExprContext() to build both.
          */
         ExecAssignExprContext(estate, &aggstate->csstate.cstate);
+       aggstate->tmpcontext = aggstate->csstate.cstate.cs_ExprContext;
+       ExecAssignExprContext(estate, &aggstate->csstate.cstate);
  
         /*
-        * We actually need three separate expression memory contexts: one for
-        * calculating per-output-tuple values (ie, the finished aggregate
-        * results), and two that we ping-pong between for per-input-tuple
-        * evaluation of input expressions and transition functions.  The
-        * context made by ExecAssignExprContext() is used as the output
-        * context.
+        * We also need a long-lived memory context for holding hashtable
+        * data structures and transition values.  NOTE: the details of what
+        * is stored in aggcontext and what is stored in the regular per-query
+        * memory context are driven by a simple decision: we want to reset the
+        * aggcontext in ExecReScanAgg to recover no-longer-wanted space.
          */
-       aggstate->tup_cxt =
-               aggstate->csstate.cstate.cs_ExprContext->ecxt_per_tuple_memory;
-       aggstate->agg_cxt[0] =
+       aggstate->aggcontext =
                 AllocSetContextCreate(CurrentMemoryContext,
-                                                         "AggExprContext1",
+                                                         "AggContext",
                                                           ALLOCSET_DEFAULT_MINSIZE,
                                                           ALLOCSET_DEFAULT_INITSIZE,
                                                           ALLOCSET_DEFAULT_MAXSIZE);
-       aggstate->agg_cxt[1] =
-               AllocSetContextCreate(CurrentMemoryContext,
-                                                         "AggExprContext2",
-                                                         ALLOCSET_DEFAULT_MINSIZE,
-                                                         ALLOCSET_DEFAULT_INITSIZE,
-                                                         ALLOCSET_DEFAULT_MAXSIZE);
-       aggstate->which_cxt = 0;
  
  #define AGG_NSLOTS 2
  
@@ -854,7 +1166,7 @@ ExecInitAgg(Agg *node, EState *estate, Plan *parent)
         ExecInitResultTupleSlot(estate, &aggstate->csstate.cstate);
  
         /*
-        * Set up aggregate-result storage in the expr context, and also
+        * Set up aggregate-result storage in the output expr context, and also
          * allocate my private per-agg working storage
          */
         econtext = aggstate->csstate.cstate.cs_ExprContext;
@@ -867,6 +1179,20 @@ ExecInitAgg(Agg *node, EState *estate, Plan *parent)
         MemSet(peragg, 0, sizeof(AggStatePerAggData) * numaggs);
         aggstate->peragg = peragg;
  
+       if (node->aggstrategy == AGG_HASHED)
+       {
+               build_hash_table(node);
+               aggstate->table_filled = false;
+       }
+       else
+       {
+               AggStatePerGroup pergroup;
+
+               pergroup = (AggStatePerGroup) palloc(sizeof(AggStatePerGroupData) * numaggs);
+               MemSet(pergroup, 0, sizeof(AggStatePerGroupData) * numaggs);
+               aggstate->pergroup = pergroup;
+       }
+
         /*
          * initialize child nodes
          */
@@ -984,12 +1310,15 @@ ExecInitAgg(Agg *node, EState *estate, Plan *parent)
                 {
                         /*
                          * Note: use the type from the input expression here, not from
-                        * pg_proc.proargtypes, because the latter might be 0.
+                        * pg_proc.proargtypes, because the latter might be a pseudotype.
                          * (Consider COUNT(*).)
                          */
                         Oid                     inputType = exprType(aggref->target);
                         Oid                     eq_function;
  
+                       /* We don't implement DISTINCT aggs in the HASHED case */
+                       Assert(node->aggstrategy != AGG_HASHED);
+
                         peraggstate->inputType = inputType;
                         get_typlenbyval(inputType,
                                                         &peraggstate->inputtypeLen,
@@ -1055,21 +1384,27 @@ ExecEndAgg(Agg *node)
  {
         AggState   *aggstate = node->aggstate;
         Plan       *outerPlan;
+       int                     aggno;
+
+       /* Make sure we have closed any open tuplesorts */
+       for (aggno = 0; aggno < aggstate->numaggs; aggno++)
+       {
+               AggStatePerAgg peraggstate = &aggstate->peragg[aggno];
+
+               if (peraggstate->sortstate)
+                       tuplesort_end(peraggstate->sortstate);
+       }
  
         ExecFreeProjectionInfo(&aggstate->csstate.cstate);
  
         /*
-        * Make sure ExecFreeExprContext() frees the right expr context...
+        * Free both the expr contexts.
          */
-       aggstate->csstate.cstate.cs_ExprContext->ecxt_per_tuple_memory =
-               aggstate->tup_cxt;
+       ExecFreeExprContext(&aggstate->csstate.cstate);
+       aggstate->csstate.cstate.cs_ExprContext = aggstate->tmpcontext;
         ExecFreeExprContext(&aggstate->csstate.cstate);
  
-       /*
-        * ... and I free the others.
-        */
-       MemoryContextDelete(aggstate->agg_cxt[0]);
-       MemoryContextDelete(aggstate->agg_cxt[1]);
+       MemoryContextDelete(aggstate->aggcontext);
  
         outerPlan = outerPlan(node);
         ExecEndNode(outerPlan, (Plan *) node);
@@ -1088,6 +1423,17 @@ ExecReScanAgg(Agg *node, ExprContext *exprCtxt, Plan *parent)
  {
         AggState   *aggstate = node->aggstate;
         ExprContext *econtext = aggstate->csstate.cstate.cs_ExprContext;
+       int                     aggno;
+
+       /* Make sure we have closed any open tuplesorts */
+       for (aggno = 0; aggno < aggstate->numaggs; aggno++)
+       {
+               AggStatePerAgg peraggstate = &aggstate->peragg[aggno];
+
+               if (peraggstate->sortstate)
+                       tuplesort_end(peraggstate->sortstate);
+               peraggstate->sortstate = NULL;
+       }
  
         aggstate->agg_done = false;
         if (aggstate->grp_firstTuple != NULL)
@@ -1098,6 +1444,14 @@ ExecReScanAgg(Agg *node, ExprContext *exprCtxt, Plan *parent)
         MemSet(econtext->ecxt_aggvalues, 0, sizeof(Datum) * aggstate->numaggs);
         MemSet(econtext->ecxt_aggnulls, 0, sizeof(bool) * aggstate->numaggs);
  
+       MemoryContextReset(aggstate->aggcontext);
+
+       if (node->aggstrategy == AGG_HASHED)
+       {
+               build_hash_table(node);
+               aggstate->table_filled = false;
+       }
+
         /*
          * if chgParam of subnode is not null then plan will be re-scanned by
          * first ExecProcNode.
diff --git a/src/backend/executor/nodeGroup.c b/src/backend/executor/nodeGroup.c

index 662c3d4798ca5c2a9b4b468b3246734feccefdb4..3ea0e44d286b2598cee7813c22bc98d3e3bbffad 100644 (file)
--- a/src/backend/executor/nodeGroup.c
+++ b/src/backend/executor/nodeGroup.c
@@ -15,7 +15,7 @@
   *       locate group boundaries.
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/executor/nodeGroup.c,v 1.48 2002/11/06 00:00:43 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/executor/nodeGroup.c,v 1.49 2002/11/06 22:31:23 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -151,9 +151,8 @@ ExecInitGroup(Group *node, EState *estate, Plan *parent)
          */
         grpstate = makeNode(GroupState);
         node->grpstate = grpstate;
-       grpstate->grp_useFirstTuple = FALSE;
-       grpstate->grp_done = FALSE;
         grpstate->grp_firstTuple = NULL;
+       grpstate->grp_done = FALSE;
  
         /*
          * create expression context
@@ -236,7 +235,6 @@ ExecReScanGroup(Group *node, ExprContext *exprCtxt, Plan *parent)
  {
         GroupState *grpstate = node->grpstate;
  
-       grpstate->grp_useFirstTuple = FALSE;
         grpstate->grp_done = FALSE;
         if (grpstate->grp_firstTuple != NULL)
         {
diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c

index 8bb5bde84c07a8afb96fbb5aebfd3f4eaa8834c2..57faf0622cbd5e122966c10f78ced32e24f29b19 100644 (file)
--- a/src/backend/executor/nodeHash.c
+++ b/src/backend/executor/nodeHash.c
@@ -7,7 +7,8 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   *
- *     $Id: nodeHash.c,v 1.66 2002/09/04 20:31:18 momjian Exp $
+ * IDENTIFICATION
+ *       $Header: /cvsroot/pgsql/src/backend/executor/nodeHash.c,v 1.67 2002/11/06 22:31:23 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -31,8 +32,6 @@
  #include "utils/lsyscache.h"
  
  
-static uint32 hashFunc(Datum key, int typLen, bool byVal);
-
  /* ----------------------------------------------------------------
   *             ExecHash
   *
@@ -532,7 +531,7 @@ ExecHashGetBucket(HashJoinTable hashtable,
  
         /*
          * We reset the eval context each time to reclaim any memory leaked in
-        * the hashkey expression or hashFunc itself.
+        * the hashkey expression or ComputeHashFunc itself.
          */
         ResetExprContext(econtext);
  
@@ -550,9 +549,9 @@ ExecHashGetBucket(HashJoinTable hashtable,
                 bucketno = 0;
         else
         {
-               bucketno = hashFunc(keyval,
-                                                       (int) hashtable->typLen,
-                                                       hashtable->typByVal)
+               bucketno = ComputeHashFunc(keyval,
+                                                                  (int) hashtable->typLen,
+                                                                  hashtable->typByVal)
                         % (uint32) hashtable->totalbuckets;
         }
  
@@ -622,16 +621,16 @@ ExecScanHashBucket(HashJoinState *hjstate,
  }
  
  /* ----------------------------------------------------------------
- *             hashFunc
+ *             ComputeHashFunc
   *
- *             the hash function for hash joins
+ *             the hash function for hash joins (also used for hash aggregation)
   *
   *             XXX this probably ought to be replaced with datatype-specific
   *             hash functions, such as those already implemented for hash indexes.
   * ----------------------------------------------------------------
   */
-static uint32
-hashFunc(Datum key, int typLen, bool byVal)
+uint32
+ComputeHashFunc(Datum key, int typLen, bool byVal)
  {
         unsigned char *k;
  
@@ -681,7 +680,7 @@ hashFunc(Datum key, int typLen, bool byVal)
                 }
                 else
                 {
-                       elog(ERROR, "hashFunc: Invalid typLen %d", typLen);
+                       elog(ERROR, "ComputeHashFunc: Invalid typLen %d", typLen);
                         k = NULL;                       /* keep compiler quiet */
                 }
         }
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c

index 0438e0ce6096d5d4060846d3c454fe3f48eb49cc..447d560064300d4f28d3b4513cb81b6bc1e16cbb 100644 (file)
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -15,7 +15,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v 1.215 2002/11/06 00:00:43 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v 1.216 2002/11/06 22:31:23 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -524,6 +524,7 @@ _copyAgg(Agg *from)
                 memcpy(newnode->grpColIdx, from->grpColIdx,
                            from->numCols * sizeof(AttrNumber));
         }
+       newnode->numGroups = from->numGroups;
  
         return newnode;
  }
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c

index 2d6db222b29f7fbed753907d71d080738b504e0f..b35763f23da634ba154c862ecf5c87b1b1b80395 100644 (file)
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -5,7 +5,7 @@
   * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- *     $Header: /cvsroot/pgsql/src/backend/nodes/outfuncs.c,v 1.177 2002/11/06 00:00:44 tgl Exp $
+ *     $Header: /cvsroot/pgsql/src/backend/nodes/outfuncs.c,v 1.178 2002/11/06 22:31:24 tgl Exp $
   *
   * NOTES
   *       Every (plan) node in POSTGRES has an associated "out" routine which
@@ -597,8 +597,8 @@ _outAgg(StringInfo str, Agg *node)
  {
         appendStringInfo(str, " AGG ");
         _outPlanInfo(str, (Plan *) node);
-       appendStringInfo(str, " :aggstrategy %d :numCols %d ",
-                                        (int) node->aggstrategy, node->numCols);
+       appendStringInfo(str, " :aggstrategy %d :numCols %d :numGroups %ld ",
+                                        (int) node->aggstrategy, node->numCols, node->numGroups);
  }
  
  static void
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c

index 5a2acbd2763b7f2210130adbf942e96a764d4465..cba1b2027d34b4c9351b875126cfc15adc8cbba6 100644 (file)
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -10,7 +10,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.120 2002/11/06 00:00:44 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.121 2002/11/06 22:31:24 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -1675,6 +1675,7 @@ make_agg(List *tlist, List *qual, AggStrategy aggstrategy,
                 plan->plan_rows *= 0.1;
                 if (plan->plan_rows < 1)
                         plan->plan_rows = 1;
+               node->numGroups = (long) plan->plan_rows;
         }
  
         plan->state = (EState *) NULL;
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c

index cc8e7a698d54b7716f46465a8791a1f7f071c29c..7e722d6a099fd61205136ab45eeff863f2831556 100644 (file)
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.126 2002/11/06 00:00:44 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.127 2002/11/06 22:31:24 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -931,6 +931,7 @@ grouping_planner(Query *parse, double tuple_fraction)
                 AttrNumber *groupColIdx = NULL;
                 Path       *cheapest_path;
                 Path       *sorted_path;
+               bool            use_hashed_grouping = false;
  
                 /* Preprocess targetlist in case we are inside an INSERT/UPDATE. */
                 tlist = preprocess_targetlist(tlist,
@@ -1209,6 +1210,29 @@ grouping_planner(Query *parse, double tuple_fraction)
                 group_pathkeys = canonicalize_pathkeys(parse, group_pathkeys);
                 sort_pathkeys = canonicalize_pathkeys(parse, sort_pathkeys);
  
+               /*
+                * Consider whether we might want to use hashed grouping.
+                */
+               if (parse->groupClause)
+               {
+                       /*
+                        * Executor doesn't support hashed aggregation with DISTINCT
+                        * aggregates.  (Doing so would imply storing *all* the input
+                        * values in the hash table, which seems like a certain loser.)
+                        */
+                       if (parse->hasAggs &&
+                               (contain_distinct_agg_clause((Node *) tlist) ||
+                                contain_distinct_agg_clause(parse->havingQual)))
+                               use_hashed_grouping = false;
+                       else
+                       {
+#if 0                                                  /* much more to do here */
+                               /* TEMPORARY HOTWIRE FOR TESTING */
+                               use_hashed_grouping = true;
+#endif
+                       }
+               }
+
                 /*
                  * Select the best path and create a plan to execute it.
                  *
@@ -1279,22 +1303,30 @@ grouping_planner(Query *parse, double tuple_fraction)
                 }
  
                 /*
-                * If any aggregate is present, insert the Agg node, plus an explicit
-                * sort if necessary.
+                * Insert AGG or GROUP node if needed, plus an explicit sort step
+                * if necessary.
                  *
                  * HAVING clause, if any, becomes qual of the Agg node
                  */
-               if (parse->hasAggs)
+               if (use_hashed_grouping)
                 {
+                       /* Hashed aggregate plan --- no sort needed */
+                       result_plan = (Plan *) make_agg(tlist,
+                                                                                       (List *) parse->havingQual,
+                                                                                       AGG_HASHED,
+                                                                                       length(parse->groupClause),
+                                                                                       groupColIdx,
+                                                                                       result_plan);
+                       /* Hashed aggregation produces randomly-ordered results */
+                       current_pathkeys = NIL;
+               }
+               else if (parse->hasAggs)
+               {
+                       /* Plain aggregate plan --- sort if needed */
                         AggStrategy aggstrategy;
  
                         if (parse->groupClause)
                         {
-                               aggstrategy = AGG_SORTED;
-                               /*
-                                * Add an explicit sort if we couldn't make the path come out
-                                * the way the AGG node needs it.
-                                */
                                 if (!pathkeys_contained_in(group_pathkeys, current_pathkeys))
                                 {
                                         result_plan = make_groupsortplan(parse,
@@ -1303,9 +1335,18 @@ grouping_planner(Query *parse, double tuple_fraction)
                                                                                                          result_plan);
                                         current_pathkeys = group_pathkeys;
                                 }
+                               aggstrategy = AGG_SORTED;
+                               /*
+                                * The AGG node will not change the sort ordering of its
+                                * groups, so current_pathkeys describes the result too.
+                                */
                         }
                         else
+                       {
                                 aggstrategy = AGG_PLAIN;
+                               /* Result will be only one row anyway; no sort order */
+                               current_pathkeys = NIL;
+                       }
  
                         result_plan = (Plan *) make_agg(tlist,
                                                                                         (List *) parse->havingQual,
@@ -1313,10 +1354,6 @@ grouping_planner(Query *parse, double tuple_fraction)
                                                                                         length(parse->groupClause),
                                                                                         groupColIdx,
                                                                                         result_plan);
-                       /*
-                        * Note: plain or grouped Agg does not affect any existing
-                        * sort order of the tuples
-                        */
                 }
                 else
                 {
diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c

index ee037974769a65de0c7707b1ef282ce2421a55c6..f55c988bfc5d4a32d8fc85c1cce7fdfb8b8de712 100644 (file)
--- a/src/backend/optimizer/util/clauses.c
+++ b/src/backend/optimizer/util/clauses.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/util/clauses.c,v 1.109 2002/09/11 14:48:54 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/util/clauses.c,v 1.110 2002/11/06 22:31:24 tgl Exp $
   *
   * HISTORY
   *       AUTHOR                        DATE                    MAJOR EVENT
@@ -46,6 +46,7 @@ typedef struct
  } check_subplans_for_ungrouped_vars_context;
  
  static bool contain_agg_clause_walker(Node *node, void *context);
+static bool contain_distinct_agg_clause_walker(Node *node, void *context);
  static bool pull_agg_clause_walker(Node *node, List **listptr);
  static bool expression_returns_set_walker(Node *node, void *context);
  static bool contain_subplans_walker(Node *node, void *context);
@@ -410,6 +411,32 @@ contain_agg_clause_walker(Node *node, void *context)
         return expression_tree_walker(node, contain_agg_clause_walker, context);
  }
  
+/*
+ * contain_distinct_agg_clause
+ *       Recursively search for DISTINCT Aggref nodes within a clause.
+ *
+ *       Returns true if any DISTINCT aggregate found.
+ */
+bool
+contain_distinct_agg_clause(Node *clause)
+{
+       return contain_distinct_agg_clause_walker(clause, NULL);
+}
+
+static bool
+contain_distinct_agg_clause_walker(Node *node, void *context)
+{
+       if (node == NULL)
+               return false;
+       if (IsA(node, Aggref))
+       {
+               if (((Aggref *) node)->aggdistinct)
+                       return true;            /* abort the tree traversal and return
+                                                                * true */
+       }
+       return expression_tree_walker(node, contain_distinct_agg_clause_walker, context);
+}
+
  /*
   * pull_agg_clause
   *       Recursively pulls all Aggref nodes from an expression tree.
diff --git a/src/include/executor/nodeHash.h b/src/include/executor/nodeHash.h

index aed6bb0cf6a22ef9ea0a3c23092d1a40511eb060..8bea51e8af05ee635c97103d81af14a94614dab0 100644 (file)
--- a/src/include/executor/nodeHash.h
+++ b/src/include/executor/nodeHash.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: nodeHash.h,v 1.24 2002/06/20 20:29:49 momjian Exp $
+ * $Id: nodeHash.h,v 1.25 2002/11/06 22:31:24 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -36,5 +36,6 @@ extern void ExecChooseHashTableSize(double ntuples, int tupwidth,
                                                 int *virtualbuckets,
                                                 int *physicalbuckets,
                                                 int *numbatches);
+extern uint32 ComputeHashFunc(Datum key, int typLen, bool byVal);
  
  #endif   /* NODEHASH_H */
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h

index 533d296186a8ecd50f9dc716e8a6cabd2d4a8af9..f62d1cb8159f06a649b609765666ce85d99742bd 100644 (file)
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: execnodes.h,v 1.76 2002/11/06 00:00:44 tgl Exp $
+ * $Id: execnodes.h,v 1.77 2002/11/06 22:31:24 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -661,12 +661,18 @@ typedef struct MaterialState
   *
   *     csstate.css_ScanTupleSlot refers to output of underlying plan.
   *
- *     Note: the associated ExprContext contains ecxt_aggvalues and ecxt_aggnulls
- *     arrays, which hold the computed agg values for the current input group
- *     during evaluation of an Agg node's output tuple(s).
+ *     Note: csstate.cstate.cs_ExprContext contains ecxt_aggvalues and
+ *     ecxt_aggnulls arrays, which hold the computed agg values for the current
+ *     input group during evaluation of an Agg node's output tuple(s).  We
+ *     create a second ExprContext, tmpcontext, in which to evaluate input
+ *     expressions and run the aggregate transition functions.
   * -------------------------
   */
-typedef struct AggStatePerAggData *AggStatePerAgg;             /* private in nodeAgg.c */
+/* these structs are private in nodeAgg.c: */
+typedef struct AggStatePerAggData *AggStatePerAgg;
+typedef struct AggStatePerGroupData *AggStatePerGroup;
+typedef struct AggHashEntryData *AggHashEntry;
+typedef struct AggHashTableData *AggHashTable;
  
  typedef struct AggState
  {
@@ -674,13 +680,18 @@ typedef struct AggState
         List       *aggs;                       /* all Aggref nodes in targetlist & quals */
         int                     numaggs;                /* length of list (could be zero!) */
         FmgrInfo   *eqfunctions;        /* per-grouping-field equality fns */
-       HeapTuple       grp_firstTuple; /* copy of first tuple of current group */
-       AggStatePerAgg peragg;          /* per-Aggref working state */
-       MemoryContext tup_cxt;          /* context for per-output-tuple
-                                                                * expressions */
-       MemoryContext agg_cxt[2];       /* pair of expression eval memory contexts */
-       int                     which_cxt;              /* 0 or 1, indicates current agg_cxt */
+       AggStatePerAgg peragg;          /* per-Aggref information */
+       MemoryContext aggcontext;       /* memory context for long-lived data */
+       ExprContext *tmpcontext;        /* econtext for input expressions */
         bool            agg_done;               /* indicates completion of Agg scan */
+       /* these fields are used in AGG_PLAIN and AGG_SORTED modes: */
+       AggStatePerGroup pergroup;      /* per-Aggref-per-group working state */
+       HeapTuple       grp_firstTuple; /* copy of first tuple of current group */
+       /* these fields are used in AGG_HASHED mode: */
+       AggHashTable hashtable;         /* hash table with one entry per group */
+       bool            table_filled;   /* hash table filled yet? */
+       AggHashEntry next_hash_entry; /* next entry in current chain */
+       int                     next_hash_bucket; /* next chain */
  } AggState;
  
  /* ---------------------
@@ -691,9 +702,8 @@ typedef struct GroupState
  {
         CommonScanState csstate;        /* its first field is NodeTag */
         FmgrInfo   *eqfunctions;        /* per-field lookup data for equality fns */
-       bool            grp_useFirstTuple;              /* first tuple not processed yet */
-       bool            grp_done;
         HeapTuple       grp_firstTuple; /* copy of first tuple of current group */
+       bool            grp_done;               /* indicates completion of Group scan */
  } GroupState;
  
  /* ----------------
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h

index 63c8f20d807de76728f5ee975131364a86bc5f92..0cf9d0bac913defb16e190a76e59d0d4e4003b76 100644 (file)
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: plannodes.h,v 1.59 2002/11/06 00:00:44 tgl Exp $
+ * $Id: plannodes.h,v 1.60 2002/11/06 22:31:24 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -349,6 +349,7 @@ typedef struct Agg
         AggStrategy     aggstrategy;
         int                     numCols;                /* number of grouping columns */
         AttrNumber *grpColIdx;          /* their indexes in the target list */
+       long            numGroups;              /* estimated number of groups in input */
         AggState   *aggstate;
  } Agg;
  
diff --git a/src/include/optimizer/clauses.h b/src/include/optimizer/clauses.h

index 844e7d949023bebad96230a32ffb13597d73b988..1cf8fbaf839df09946d8a615cc0a10c4d1f5c87c 100644 (file)
--- a/src/include/optimizer/clauses.h
+++ b/src/include/optimizer/clauses.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: clauses.h,v 1.54 2002/09/11 14:48:55 tgl Exp $
+ * $Id: clauses.h,v 1.55 2002/11/06 22:31:24 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -40,6 +40,7 @@ extern Expr *make_ands_explicit(List *andclauses);
  extern List *make_ands_implicit(Expr *clause);
  
  extern bool contain_agg_clause(Node *clause);
+extern bool contain_distinct_agg_clause(Node *clause);
  extern List *pull_agg_clause(Node *clause);
  
  extern bool expression_returns_set(Node *clause);
author	Tom Lane <tgl@sss.pgh.pa.us>
	Wed, 6 Nov 2002 22:31:24 +0000 (22:31 +0000)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Wed, 6 Nov 2002 22:31:24 +0000 (22:31 +0000)
src/backend/executor/nodeAgg.c		patch \| blob \| blame \| history
src/backend/executor/nodeGroup.c		patch \| blob \| blame \| history
src/backend/executor/nodeHash.c		patch \| blob \| blame \| history
src/backend/nodes/copyfuncs.c		patch \| blob \| blame \| history
src/backend/nodes/outfuncs.c		patch \| blob \| blame \| history
src/backend/optimizer/plan/createplan.c		patch \| blob \| blame \| history
src/backend/optimizer/plan/planner.c		patch \| blob \| blame \| history
src/backend/optimizer/util/clauses.c		patch \| blob \| blame \| history
src/include/executor/nodeHash.h		patch \| blob \| blame \| history
src/include/nodes/execnodes.h		patch \| blob \| blame \| history
src/include/nodes/plannodes.h		patch \| blob \| blame \| history
src/include/optimizer/clauses.h		patch \| blob \| blame \| history