Create a new file executor/execGrouping.c to centralize utility routines

author Tom Lane <tgl@sss.pgh.pa.us>

Fri, 10 Jan 2003 23:54:24 +0000 (23:54 +0000)

committer Tom Lane <tgl@sss.pgh.pa.us>

Fri, 10 Jan 2003 23:54:24 +0000 (23:54 +0000)
author Tom Lane <tgl@sss.pgh.pa.us>
Fri, 10 Jan 2003 23:54:24 +0000 (23:54 +0000)
committer Tom Lane <tgl@sss.pgh.pa.us>
Fri, 10 Jan 2003 23:54:24 +0000 (23:54 +0000)
diff --git a/src/backend/executor/Makefile b/src/backend/executor/Makefile

index b875259bc1a6c294301ad982acfa0e8c4ce46f49..7e3f5d2d2c83545b6c2195b91e9934ab6dac772c 100644 (file)
--- a/src/backend/executor/Makefile
+++ b/src/backend/executor/Makefile
@@ -4,7 +4,7 @@
  #    Makefile for executor
  #
  # IDENTIFICATION
-#    $Header: /cvsroot/pgsql/src/backend/executor/Makefile,v 1.19 2002/05/12 23:43:02 tgl Exp $
+#    $Header: /cvsroot/pgsql/src/backend/executor/Makefile,v 1.20 2003/01/10 23:54:24 tgl Exp $
  #
  #-------------------------------------------------------------------------
  
@@ -12,7 +12,7 @@ subdir = src/backend/executor
  top_builddir = ../../..
  include $(top_builddir)/src/Makefile.global
  
-OBJS = execAmi.o execJunk.o execMain.o \
+OBJS = execAmi.o execGrouping.o execJunk.o execMain.o \
         execProcnode.o execQual.o execScan.o execTuples.o \
         execUtils.o functions.o instrument.o nodeAppend.o nodeAgg.o nodeHash.o \
         nodeHashjoin.o nodeIndexscan.o nodeMaterial.o nodeMergejoin.o \
diff --git a/src/backend/executor/execGrouping.c b/src/backend/executor/execGrouping.c

new file mode 100644 (file)

index 0000000..e3f7720
--- /dev/null
+++ b/src/backend/executor/execGrouping.c
@@ -0,0 +1,369 @@
+/*-------------------------------------------------------------------------
+ *
+ * execGrouping.c
+ *       executor utility routines for grouping, hashing, and aggregation
+ *
+ * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *       $Header: /cvsroot/pgsql/src/backend/executor/execGrouping.c,v 1.1 2003/01/10 23:54:24 tgl Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/hash.h"
+#include "access/heapam.h"
+#include "executor/executor.h"
+#include "parser/parse_oper.h"
+#include "utils/memutils.h"
+
+
+/*****************************************************************************
+ *             Utility routines for grouping tuples together
+ *
+ * These routines actually implement SQL's notion of "distinct/not distinct".
+ * Two tuples match if they are not distinct in all the compared columns,
+ * i.e., the column values are either both null, or both non-null and equal.
+ *****************************************************************************/
+
+/*
+ * execTuplesMatch
+ *             Return true if two tuples match in all the indicated fields.
+ *             This is used to detect group boundaries in nodeGroup and nodeAgg,
+ *             and to decide whether two tuples are distinct or not in nodeUnique.
+ *
+ * tuple1, tuple2: the tuples to compare
+ * tupdesc: tuple descriptor applying to both tuples
+ * numCols: the number of attributes to be examined
+ * matchColIdx: array of attribute column numbers
+ * eqFunctions: array of fmgr lookup info for the equality functions to use
+ * evalContext: short-term memory context for executing the functions
+ *
+ * NB: evalContext is reset each time!
+ */
+bool
+execTuplesMatch(HeapTuple tuple1,
+                               HeapTuple tuple2,
+                               TupleDesc tupdesc,
+                               int numCols,
+                               AttrNumber *matchColIdx,
+                               FmgrInfo *eqfunctions,
+                               MemoryContext evalContext)
+{
+       MemoryContext oldContext;
+       bool            result;
+       int                     i;
+
+       /* Reset and switch into the temp context. */
+       MemoryContextReset(evalContext);
+       oldContext = MemoryContextSwitchTo(evalContext);
+
+       /*
+        * We cannot report a match without checking all the fields, but we
+        * can report a non-match as soon as we find unequal fields.  So,
+        * start comparing at the last field (least significant sort key).
+        * That's the most likely to be different if we are dealing with
+        * sorted input.
+        */
+       result = true;
+
+       for (i = numCols; --i >= 0;)
+       {
+               AttrNumber      att = matchColIdx[i];
+               Datum           attr1,
+                                       attr2;
+               bool            isNull1,
+                                       isNull2;
+
+               attr1 = heap_getattr(tuple1,
+                                                        att,
+                                                        tupdesc,
+                                                        &isNull1);
+
+               attr2 = heap_getattr(tuple2,
+                                                        att,
+                                                        tupdesc,
+                                                        &isNull2);
+
+               if (isNull1 != isNull2)
+               {
+                       result = false;         /* one null and one not; they aren't equal */
+                       break;
+               }
+
+               if (isNull1)
+                       continue;                       /* both are null, treat as equal */
+
+               /* Apply the type-specific equality function */
+
+               if (!DatumGetBool(FunctionCall2(&eqfunctions[i],
+                                                                               attr1, attr2)))
+               {
+                       result = false;         /* they aren't equal */
+                       break;
+               }
+       }
+
+       MemoryContextSwitchTo(oldContext);
+
+       return result;
+}
+
+
+/*
+ * execTuplesMatchPrepare
+ *             Look up the equality functions needed for execTuplesMatch.
+ *             The result is a palloc'd array.
+ */
+FmgrInfo *
+execTuplesMatchPrepare(TupleDesc tupdesc,
+                                          int numCols,
+                                          AttrNumber *matchColIdx)
+{
+       FmgrInfo   *eqfunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo));
+       int                     i;
+
+       for (i = 0; i < numCols; i++)
+       {
+               AttrNumber      att = matchColIdx[i];
+               Oid                     typid = tupdesc->attrs[att - 1]->atttypid;
+               Oid                     eq_function;
+
+               eq_function = equality_oper_funcid(typid);
+               fmgr_info(eq_function, &eqfunctions[i]);
+       }
+
+       return eqfunctions;
+}
+
+
+/*****************************************************************************
+ *             Utility routines for hashing
+ *****************************************************************************/
+
+/*
+ * ComputeHashFunc
+ *
+ *             the hash function for hash joins (also used for hash aggregation)
+ *
+ *             XXX this probably ought to be replaced with datatype-specific
+ *             hash functions, such as those already implemented for hash indexes.
+ */
+uint32
+ComputeHashFunc(Datum key, int typLen, bool byVal)
+{
+       unsigned char *k;
+
+       if (byVal)
+       {
+               /*
+                * If it's a by-value data type, just hash the whole Datum value.
+                * This assumes that datatypes narrower than Datum are
+                * consistently padded (either zero-extended or sign-extended, but
+                * not random bits) to fill Datum; see the XXXGetDatum macros in
+                * postgres.h. NOTE: it would not work to do hash_any(&key, len)
+                * since this would get the wrong bytes on a big-endian machine.
+                */
+               k = (unsigned char *) &key;
+               typLen = sizeof(Datum);
+       }
+       else
+       {
+               if (typLen > 0)
+               {
+                       /* fixed-width pass-by-reference type */
+                       k = (unsigned char *) DatumGetPointer(key);
+               }
+               else if (typLen == -1)
+               {
+                       /*
+                        * It's a varlena type, so 'key' points to a "struct varlena".
+                        * NOTE: VARSIZE returns the "real" data length plus the
+                        * sizeof the "vl_len" attribute of varlena (the length
+                        * information). 'key' points to the beginning of the varlena
+                        * struct, so we have to use "VARDATA" to find the beginning
+                        * of the "real" data.  Also, we have to be careful to detoast
+                        * the datum if it's toasted.  (We don't worry about freeing
+                        * the detoasted copy; that happens for free when the
+                        * per-tuple memory context is reset in ExecHashGetBucket.)
+                        */
+                       struct varlena *vkey = PG_DETOAST_DATUM(key);
+
+                       typLen = VARSIZE(vkey) - VARHDRSZ;
+                       k = (unsigned char *) VARDATA(vkey);
+               }
+               else if (typLen == -2)
+               {
+                       /* It's a null-terminated C string */
+                       typLen = strlen(DatumGetCString(key)) + 1;
+                       k = (unsigned char *) DatumGetPointer(key);
+               }
+               else
+               {
+                       elog(ERROR, "ComputeHashFunc: Invalid typLen %d", typLen);
+                       k = NULL;                       /* keep compiler quiet */
+               }
+       }
+
+       return DatumGetUInt32(hash_any(k, typLen));
+}
+
+
+/*****************************************************************************
+ *             Utility routines for all-in-memory hash tables
+ *
+ * These routines build hash tables for grouping tuples together (eg, for
+ * hash aggregation).  There is one entry for each not-distinct set of tuples
+ * presented.
+ *****************************************************************************/
+
+/*
+ * Construct an empty TupleHashTable
+ *
+ *     numCols, keyColIdx: identify the tuple fields to use as lookup key
+ *     eqfunctions: equality comparison functions to use
+ *     nbuckets: number of buckets to make
+ *     entrysize: size of each entry (at least sizeof(TupleHashEntryData))
+ *     tablecxt: memory context in which to store table and table entries
+ *     tempcxt: short-lived context for evaluation hash and comparison functions
+ *
+ * The eqfunctions array may be made with execTuplesMatchPrepare().
+ *
+ * Note that keyColIdx and eqfunctions must be allocated in storage that
+ * will live as long as the hashtable does.
+ */
+TupleHashTable
+BuildTupleHashTable(int numCols, AttrNumber *keyColIdx,
+                                       FmgrInfo *eqfunctions,
+                                       int nbuckets, Size entrysize,
+                                       MemoryContext tablecxt, MemoryContext tempcxt)
+{
+       TupleHashTable  hashtable;
+       Size                    tabsize;
+
+       Assert(nbuckets > 0);
+       Assert(entrysize >= sizeof(TupleHashEntryData));
+
+       tabsize = sizeof(TupleHashTableData) +
+               (nbuckets - 1) * sizeof(TupleHashEntry);
+       hashtable = (TupleHashTable) MemoryContextAllocZero(tablecxt, tabsize);
+
+       hashtable->numCols = numCols;
+       hashtable->keyColIdx = keyColIdx;
+       hashtable->eqfunctions = eqfunctions;
+       hashtable->tablecxt = tablecxt;
+       hashtable->tempcxt = tempcxt;
+       hashtable->entrysize = entrysize;
+       hashtable->nbuckets = nbuckets;
+
+       return hashtable;
+}
+
+/*
+ * Find or create a hashtable entry for the tuple group containing the
+ * given tuple.
+ *
+ * On return, *isnew is true if the entry is newly created, false if it
+ * existed already.  Any extra space in a new entry has been zeroed.
+ */
+TupleHashEntry
+LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
+                                        bool *isnew)
+{
+       int                     numCols = hashtable->numCols;
+       AttrNumber *keyColIdx = hashtable->keyColIdx;
+       HeapTuple       tuple = slot->val;
+       TupleDesc       tupdesc = slot->ttc_tupleDescriptor;
+       uint32          hashkey = 0;
+       int                     i;
+       int                     bucketno;
+       TupleHashEntry entry;
+       MemoryContext oldContext;
+
+       /* Need to run the hash function in short-lived context */
+       oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
+
+       for (i = 0; i < numCols; i++)
+       {
+               AttrNumber      att = keyColIdx[i];
+               Datum           attr;
+               bool            isNull;
+
+               /* rotate hashkey left 1 bit at each step */
+               hashkey = (hashkey << 1) | ((hashkey & 0x80000000) ? 1 : 0);
+
+               attr = heap_getattr(tuple, att, tupdesc, &isNull);
+               if (isNull)
+                       continue;                       /* treat nulls as having hash key 0 */
+               hashkey ^= ComputeHashFunc(attr,
+                                                                  (int) tupdesc->attrs[att - 1]->attlen,
+                                                                  tupdesc->attrs[att - 1]->attbyval);
+       }
+       bucketno = hashkey % (uint32) hashtable->nbuckets;
+
+       for (entry = hashtable->buckets[bucketno];
+                entry != NULL;
+                entry = entry->next)
+       {
+               /* Quick check using hashkey */
+               if (entry->hashkey != hashkey)
+                       continue;
+               if (execTuplesMatch(entry->firstTuple,
+                                                       tuple,
+                                                       tupdesc,
+                                                       numCols, keyColIdx,
+                                                       hashtable->eqfunctions,
+                                                       hashtable->tempcxt))
+               {
+                       MemoryContextSwitchTo(oldContext);
+                       *isnew = false;
+                       return entry;
+               }
+       }
+
+       /* Not there, so build a new one */
+       MemoryContextSwitchTo(hashtable->tablecxt);
+
+       entry = (TupleHashEntry) palloc0(hashtable->entrysize);
+
+       entry->hashkey = hashkey;
+       entry->firstTuple = heap_copytuple(tuple);
+
+       entry->next = hashtable->buckets[bucketno];
+       hashtable->buckets[bucketno] = entry;
+
+       MemoryContextSwitchTo(oldContext);
+
+       *isnew = true;
+
+       return entry;
+}
+
+/*
+ * Walk through all the entries of a hash table, in no special order.
+ * Returns NULL when no more entries remain.
+ *
+ * Iterator state must be initialized with ResetTupleHashIterator() macro.
+ */
+TupleHashEntry
+ScanTupleHashTable(TupleHashTable hashtable, TupleHashIterator *state)
+{
+       TupleHashEntry  entry;
+
+       entry = state->next_entry;
+       while (entry == NULL)
+       {
+               if (state->next_bucket >= hashtable->nbuckets)
+               {
+                       /* No more entries in hashtable, so done */
+                       return NULL;
+               }
+               entry = hashtable->buckets[state->next_bucket++];
+       }
+       state->next_entry = entry->next;
+
+       return entry;
+}
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c

index 769e88a839779347305416088488ba8e20ce24fa..d8eeae15ad7f5bf7e2f7fdb52f47175accff3458 100644 (file)
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -45,7 +45,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/executor/nodeAgg.c,v 1.101 2002/12/15 16:17:46 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/executor/nodeAgg.c,v 1.102 2003/01/10 23:54:24 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -57,8 +57,6 @@
  #include "catalog/pg_operator.h"
  #include "executor/executor.h"
  #include "executor/nodeAgg.h"
-#include "executor/nodeGroup.h"
-#include "executor/nodeHash.h"
  #include "miscadmin.h"
  #include "optimizer/clauses.h"
  #include "parser/parse_coerce.h"
@@ -182,21 +180,15 @@ typedef struct AggStatePerGroupData
   * distinct set of GROUP BY column values.  We compute the hash key from
   * the GROUP BY columns.
   */
+typedef struct AggHashEntryData *AggHashEntry;
+
  typedef struct AggHashEntryData
  {
-       AggHashEntry    next;           /* next entry in same hash bucket */
-       uint32          hashkey;                /* exact hash key of this entry */
-       HeapTuple       firstTuple;             /* copy of first tuple in this group */
+       TupleHashEntryData shared;      /* common header for hash table entries */
         /* per-aggregate transition status array - must be last! */
         AggStatePerGroupData pergroup[1];       /* VARIABLE LENGTH ARRAY */
  } AggHashEntryData;                            /* VARIABLE LENGTH STRUCT */
  
-typedef struct AggHashTableData
-{
-       int                     nbuckets;               /* number of buckets in hash table */
-       AggHashEntry buckets[1];        /* VARIABLE LENGTH ARRAY */
-} AggHashTableData;                            /* VARIABLE LENGTH STRUCT */
-
  
  static void initialize_aggregates(AggState *aggstate,
                                                                   AggStatePerAgg peragg,
@@ -578,18 +570,22 @@ static void
  build_hash_table(AggState *aggstate)
  {
         Agg                        *node = (Agg *) aggstate->ss.ps.plan;
-       AggHashTable    hashtable;
-       Size                    tabsize;
+       MemoryContext   tmpmem = aggstate->tmpcontext->ecxt_per_tuple_memory;
+       Size                    entrysize;
  
         Assert(node->aggstrategy == AGG_HASHED);
         Assert(node->numGroups > 0);
-       tabsize = sizeof(AggHashTableData) +
-               (node->numGroups - 1) * sizeof(AggHashEntry);
-       hashtable = (AggHashTable) MemoryContextAlloc(aggstate->aggcontext,
-                                                                                                 tabsize);
-       MemSet(hashtable, 0, tabsize);
-       hashtable->nbuckets = node->numGroups;
-       aggstate->hashtable = hashtable;
+
+       entrysize = sizeof(AggHashEntryData) +
+               (aggstate->numaggs - 1) * sizeof(AggStatePerGroupData);
+
+       aggstate->hashtable = BuildTupleHashTable(node->numCols,
+                                                                                         node->grpColIdx,
+                                                                                         aggstate->eqfunctions,
+                                                                                         node->numGroups,
+                                                                                         entrysize,
+                                                                                         aggstate->aggcontext,
+                                                                                         tmpmem);
  }
  
  /*
@@ -601,75 +597,19 @@ build_hash_table(AggState *aggstate)
  static AggHashEntry
  lookup_hash_entry(AggState *aggstate, TupleTableSlot *slot)
  {
-       Agg                *node = (Agg *) aggstate->ss.ps.plan;
-       AggHashTable hashtable = aggstate->hashtable;
-       MemoryContext   tmpmem = aggstate->tmpcontext->ecxt_per_tuple_memory;
-       HeapTuple       tuple = slot->val;
-       TupleDesc       tupdesc = slot->ttc_tupleDescriptor;
-       uint32          hashkey = 0;
-       int                     i;
-       int                     bucketno;
-       AggHashEntry    entry;
-       MemoryContext oldContext;
-       Size            entrysize;
-
-       /* Need to run the hash function in short-lived context */
-       oldContext = MemoryContextSwitchTo(tmpmem);
-
-       for (i = 0; i < node->numCols; i++)
-       {
-               AttrNumber      att = node->grpColIdx[i];
-               Datum           attr;
-               bool            isNull;
+       AggHashEntry entry;
+       bool            isnew;
  
-               /* rotate hashkey left 1 bit at each step */
-               hashkey = (hashkey << 1) | ((hashkey & 0x80000000) ? 1 : 0);
+       entry = (AggHashEntry) LookupTupleHashEntry(aggstate->hashtable,
+                                                                                               slot,
+                                                                                               &isnew);
  
-               attr = heap_getattr(tuple, att, tupdesc, &isNull);
-               if (isNull)
-                       continue;                       /* treat nulls as having hash key 0 */
-               hashkey ^= ComputeHashFunc(attr,
-                                                                  (int) tupdesc->attrs[att - 1]->attlen,
-                                                                  tupdesc->attrs[att - 1]->attbyval);
-       }
-       bucketno = hashkey % (uint32) hashtable->nbuckets;
-
-       for (entry = hashtable->buckets[bucketno];
-                entry != NULL;
-                entry = entry->next)
+       if (isnew)
         {
-               /* Quick check using hashkey */
-               if (entry->hashkey != hashkey)
-                       continue;
-               if (execTuplesMatch(entry->firstTuple,
-                                                       tuple,
-                                                       tupdesc,
-                                                       node->numCols, node->grpColIdx,
-                                                       aggstate->eqfunctions,
-                                                       tmpmem))
-               {
-                       MemoryContextSwitchTo(oldContext);
-                       return entry;
-               }
+               /* initialize aggregates for new tuple group */
+               initialize_aggregates(aggstate, aggstate->peragg, entry->pergroup);
         }
  
-       /* Not there, so build a new one */
-       MemoryContextSwitchTo(aggstate->aggcontext);
-       entrysize = sizeof(AggHashEntryData) +
-               (aggstate->numaggs - 1) * sizeof(AggStatePerGroupData);
-       entry = (AggHashEntry) palloc0(entrysize);
-
-       entry->hashkey = hashkey;
-       entry->firstTuple = heap_copytuple(tuple);
-
-       entry->next = hashtable->buckets[bucketno];
-       hashtable->buckets[bucketno] = entry;
-
-       MemoryContextSwitchTo(oldContext);
-
-       /* initialize aggregates for new tuple group */
-       initialize_aggregates(aggstate, aggstate->peragg, entry->pergroup);
-
         return entry;
  }
  
@@ -964,8 +904,7 @@ agg_fill_hash_table(AggState *aggstate)
  
         aggstate->table_filled = true;
         /* Initialize to walk the hash table */
-       aggstate->next_hash_entry = NULL;
-       aggstate->next_hash_bucket = 0;
+       ResetTupleHashIterator(&aggstate->hashiter);
  }
  
  /*
@@ -980,7 +919,7 @@ agg_retrieve_hash_table(AggState *aggstate)
         bool       *aggnulls;
         AggStatePerAgg peragg;
         AggStatePerGroup pergroup;
-       AggHashTable    hashtable;
+       TupleHashTable  hashtable;
         AggHashEntry    entry;
         TupleTableSlot *firstSlot;
         TupleTableSlot *resultSlot;
@@ -1010,18 +949,14 @@ agg_retrieve_hash_table(AggState *aggstate)
                 /*
                  * Find the next entry in the hash table
                  */
-               entry = aggstate->next_hash_entry;
-               while (entry == NULL)
+               entry = (AggHashEntry) ScanTupleHashTable(hashtable,
+                                                                                                 &aggstate->hashiter);
+               if (entry == NULL)
                 {
-                       if (aggstate->next_hash_bucket >= hashtable->nbuckets)
-                       {
-                               /* No more entries in hashtable, so done */
-                               aggstate->agg_done = TRUE;
-                               return NULL;
-                       }
-                       entry = hashtable->buckets[aggstate->next_hash_bucket++];
+                       /* No more entries in hashtable, so done */
+                       aggstate->agg_done = TRUE;
+                       return NULL;
                 }
-               aggstate->next_hash_entry = entry->next;
  
                 /*
                  * Clear the per-output-tuple context for each group
@@ -1032,7 +967,7 @@ agg_retrieve_hash_table(AggState *aggstate)
                  * Store the copied first input tuple in the tuple table slot
                  * reserved for it, so that it can be used in ExecProject.
                  */
-               ExecStoreTuple(entry->firstTuple,
+               ExecStoreTuple(entry->shared.firstTuple,
                                            firstSlot,
                                            InvalidBuffer,
                                            false);
@@ -1187,6 +1122,17 @@ ExecInitAgg(Agg *node, EState *estate)
                 numaggs = 1;
         }
  
+       /*
+        * If we are grouping, precompute fmgr lookup data for inner loop
+        */
+       if (node->numCols > 0)
+       {
+               aggstate->eqfunctions =
+                       execTuplesMatchPrepare(ExecGetScanType(&aggstate->ss),
+                                                                  node->numCols,
+                                                                  node->grpColIdx);
+       }
+
         /*
          * Set up aggregate-result storage in the output expr context, and also
          * allocate my private per-agg working storage
@@ -1211,17 +1157,6 @@ ExecInitAgg(Agg *node, EState *estate)
                 aggstate->pergroup = pergroup;
         }
  
-       /*
-        * If we are grouping, precompute fmgr lookup data for inner loop
-        */
-       if (node->numCols > 0)
-       {
-               aggstate->eqfunctions =
-                       execTuplesMatchPrepare(ExecGetScanType(&aggstate->ss),
-                                                                  node->numCols,
-                                                                  node->grpColIdx);
-       }
-
         /*
          * Perform lookups of aggregate function info, and initialize the
          * unchanging fields of the per-agg data
diff --git a/src/backend/executor/nodeGroup.c b/src/backend/executor/nodeGroup.c

index 58f6c1b34e906d97f156ecb826ce745b571121a7..b480e388a2119aef3ec0beb7b41306236d92c5c0 100644 (file)
--- a/src/backend/executor/nodeGroup.c
+++ b/src/backend/executor/nodeGroup.c
@@ -15,7 +15,7 @@
   *       locate group boundaries.
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/executor/nodeGroup.c,v 1.53 2002/12/15 16:17:46 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/executor/nodeGroup.c,v 1.54 2003/01/10 23:54:24 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -23,13 +23,8 @@
  #include "postgres.h"
  
  #include "access/heapam.h"
-#include "catalog/pg_operator.h"
  #include "executor/executor.h"
  #include "executor/nodeGroup.h"
-#include "parser/parse_oper.h"
-#include "utils/builtins.h"
-#include "utils/lsyscache.h"
-#include "utils/syscache.h"
  
  
  /*
@@ -241,116 +236,3 @@ ExecReScanGroup(GroupState *node, ExprContext *exprCtxt)
                 ((PlanState *) node)->lefttree->chgParam == NULL)
                 ExecReScan(((PlanState *) node)->lefttree, exprCtxt);
  }
-
-/*****************************************************************************
- *             Code shared with nodeUnique.c and nodeAgg.c
- *****************************************************************************/
-
-/*
- * execTuplesMatch
- *             Return true if two tuples match in all the indicated fields.
- *             This is used to detect group boundaries in nodeGroup and nodeAgg,
- *             and to decide whether two tuples are distinct or not in nodeUnique.
- *
- * tuple1, tuple2: the tuples to compare
- * tupdesc: tuple descriptor applying to both tuples
- * numCols: the number of attributes to be examined
- * matchColIdx: array of attribute column numbers
- * eqFunctions: array of fmgr lookup info for the equality functions to use
- * evalContext: short-term memory context for executing the functions
- *
- * NB: evalContext is reset each time!
- */
-bool
-execTuplesMatch(HeapTuple tuple1,
-                               HeapTuple tuple2,
-                               TupleDesc tupdesc,
-                               int numCols,
-                               AttrNumber *matchColIdx,
-                               FmgrInfo *eqfunctions,
-                               MemoryContext evalContext)
-{
-       MemoryContext oldContext;
-       bool            result;
-       int                     i;
-
-       /* Reset and switch into the temp context. */
-       MemoryContextReset(evalContext);
-       oldContext = MemoryContextSwitchTo(evalContext);
-
-       /*
-        * We cannot report a match without checking all the fields, but we
-        * can report a non-match as soon as we find unequal fields.  So,
-        * start comparing at the last field (least significant sort key).
-        * That's the most likely to be different if we are dealing with
-        * sorted input.
-        */
-       result = true;
-
-       for (i = numCols; --i >= 0;)
-       {
-               AttrNumber      att = matchColIdx[i];
-               Datum           attr1,
-                                       attr2;
-               bool            isNull1,
-                                       isNull2;
-
-               attr1 = heap_getattr(tuple1,
-                                                        att,
-                                                        tupdesc,
-                                                        &isNull1);
-
-               attr2 = heap_getattr(tuple2,
-                                                        att,
-                                                        tupdesc,
-                                                        &isNull2);
-
-               if (isNull1 != isNull2)
-               {
-                       result = false;         /* one null and one not; they aren't equal */
-                       break;
-               }
-
-               if (isNull1)
-                       continue;                       /* both are null, treat as equal */
-
-               /* Apply the type-specific equality function */
-
-               if (!DatumGetBool(FunctionCall2(&eqfunctions[i],
-                                                                               attr1, attr2)))
-               {
-                       result = false;         /* they aren't equal */
-                       break;
-               }
-       }
-
-       MemoryContextSwitchTo(oldContext);
-
-       return result;
-}
-
-/*
- * execTuplesMatchPrepare
- *             Look up the equality functions needed for execTuplesMatch.
- *             The result is a palloc'd array.
- */
-FmgrInfo *
-execTuplesMatchPrepare(TupleDesc tupdesc,
-                                          int numCols,
-                                          AttrNumber *matchColIdx)
-{
-       FmgrInfo   *eqfunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo));
-       int                     i;
-
-       for (i = 0; i < numCols; i++)
-       {
-               AttrNumber      att = matchColIdx[i];
-               Oid                     typid = tupdesc->attrs[att - 1]->atttypid;
-               Oid                     eq_function;
-
-               eq_function = equality_oper_funcid(typid);
-               fmgr_info(eq_function, &eqfunctions[i]);
-       }
-
-       return eqfunctions;
-}
diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c

index bea89630993d372a4bc553a07e001e2aafcebb28..31152a3d8552d770570417c0a3060e16b1caa21d 100644 (file)
--- a/src/backend/executor/nodeHash.c
+++ b/src/backend/executor/nodeHash.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/executor/nodeHash.c,v 1.73 2002/12/30 15:21:18 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/executor/nodeHash.c,v 1.74 2003/01/10 23:54:24 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -20,10 +20,6 @@
   */
  #include "postgres.h"
  
-#include <limits.h>
-#include <math.h>
-
-#include "access/hash.h"
  #include "executor/execdebug.h"
  #include "executor/nodeHash.h"
  #include "executor/nodeHashjoin.h"
@@ -642,74 +638,6 @@ ExecScanHashBucket(HashJoinState *hjstate,
         return NULL;
  }
  
-/* ----------------------------------------------------------------
- *             ComputeHashFunc
- *
- *             the hash function for hash joins (also used for hash aggregation)
- *
- *             XXX this probably ought to be replaced with datatype-specific
- *             hash functions, such as those already implemented for hash indexes.
- * ----------------------------------------------------------------
- */
-uint32
-ComputeHashFunc(Datum key, int typLen, bool byVal)
-{
-       unsigned char *k;
-
-       if (byVal)
-       {
-               /*
-                * If it's a by-value data type, just hash the whole Datum value.
-                * This assumes that datatypes narrower than Datum are
-                * consistently padded (either zero-extended or sign-extended, but
-                * not random bits) to fill Datum; see the XXXGetDatum macros in
-                * postgres.h. NOTE: it would not work to do hash_any(&key, len)
-                * since this would get the wrong bytes on a big-endian machine.
-                */
-               k = (unsigned char *) &key;
-               typLen = sizeof(Datum);
-       }
-       else
-       {
-               if (typLen > 0)
-               {
-                       /* fixed-width pass-by-reference type */
-                       k = (unsigned char *) DatumGetPointer(key);
-               }
-               else if (typLen == -1)
-               {
-                       /*
-                        * It's a varlena type, so 'key' points to a "struct varlena".
-                        * NOTE: VARSIZE returns the "real" data length plus the
-                        * sizeof the "vl_len" attribute of varlena (the length
-                        * information). 'key' points to the beginning of the varlena
-                        * struct, so we have to use "VARDATA" to find the beginning
-                        * of the "real" data.  Also, we have to be careful to detoast
-                        * the datum if it's toasted.  (We don't worry about freeing
-                        * the detoasted copy; that happens for free when the
-                        * per-tuple memory context is reset in ExecHashGetBucket.)
-                        */
-                       struct varlena *vkey = PG_DETOAST_DATUM(key);
-
-                       typLen = VARSIZE(vkey) - VARHDRSZ;
-                       k = (unsigned char *) VARDATA(vkey);
-               }
-               else if (typLen == -2)
-               {
-                       /* It's a null-terminated C string */
-                       typLen = strlen(DatumGetCString(key)) + 1;
-                       k = (unsigned char *) DatumGetPointer(key);
-               }
-               else
-               {
-                       elog(ERROR, "ComputeHashFunc: Invalid typLen %d", typLen);
-                       k = NULL;                       /* keep compiler quiet */
-               }
-       }
-
-       return DatumGetUInt32(hash_any(k, typLen));
-}
-
  /* ----------------------------------------------------------------
   *             ExecHashTableReset
   *
diff --git a/src/backend/executor/nodeSetOp.c b/src/backend/executor/nodeSetOp.c

index 965a2a6466aff80572eba0189e56853e972b1d21..3946cd00246108f2fa2f85345ce83be732c71323 100644 (file)
--- a/src/backend/executor/nodeSetOp.c
+++ b/src/backend/executor/nodeSetOp.c
@@ -21,7 +21,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/executor/nodeSetOp.c,v 1.8 2002/12/15 16:17:46 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/executor/nodeSetOp.c,v 1.9 2003/01/10 23:54:24 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -36,9 +36,9 @@
  
  #include "access/heapam.h"
  #include "executor/executor.h"
-#include "executor/nodeGroup.h"
  #include "executor/nodeSetOp.h"
  
+
  /* ----------------------------------------------------------------
   *             ExecSetOp
   * ----------------------------------------------------------------
diff --git a/src/backend/executor/nodeUnique.c b/src/backend/executor/nodeUnique.c

index 415594f92c4ddc2a6d6a9a18142a8fba04769bcd..7a0ccb0b14c8ce01b8bc3f77fc8dd5a4f08051ec 100644 (file)
--- a/src/backend/executor/nodeUnique.c
+++ b/src/backend/executor/nodeUnique.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/executor/nodeUnique.c,v 1.36 2002/12/15 16:17:46 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/executor/nodeUnique.c,v 1.37 2003/01/10 23:54:24 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -27,9 +27,9 @@
  
  #include "access/heapam.h"
  #include "executor/executor.h"
-#include "executor/nodeGroup.h"
  #include "executor/nodeUnique.h"
  
+
  /* ----------------------------------------------------------------
   *             ExecUnique
   *
diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h

index 31cc2107536aa7a096ef948ae4bf3e29f186d248..fb300fc044339990234ecccb70e8ccc57dcda5d5 100644 (file)
--- a/src/include/executor/executor.h
+++ b/src/include/executor/executor.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: executor.h,v 1.85 2002/12/15 21:01:34 tgl Exp $
+ * $Id: executor.h,v 1.86 2003/01/10 23:54:24 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -36,6 +36,31 @@ extern void ExecMarkPos(PlanState *node);
  extern void ExecRestrPos(PlanState *node);
  extern bool ExecSupportsMarkRestore(NodeTag plantype);
  
+/*
+ * prototypes from functions in execGrouping.c
+ */
+extern bool execTuplesMatch(HeapTuple tuple1,
+                               HeapTuple tuple2,
+                               TupleDesc tupdesc,
+                               int numCols,
+                               AttrNumber *matchColIdx,
+                               FmgrInfo *eqfunctions,
+                               MemoryContext evalContext);
+extern FmgrInfo *execTuplesMatchPrepare(TupleDesc tupdesc,
+                                          int numCols,
+                                          AttrNumber *matchColIdx);
+extern uint32 ComputeHashFunc(Datum key, int typLen, bool byVal);
+extern TupleHashTable BuildTupleHashTable(int numCols, AttrNumber *keyColIdx,
+                                                                                 FmgrInfo *eqfunctions,
+                                                                                 int nbuckets, Size entrysize,
+                                                                                 MemoryContext tablecxt,
+                                                                                 MemoryContext tempcxt);
+extern TupleHashEntry LookupTupleHashEntry(TupleHashTable hashtable,
+                                                                                  TupleTableSlot *slot,
+                                                                                  bool *isnew);
+extern TupleHashEntry ScanTupleHashTable(TupleHashTable hashtable,
+                                                                                TupleHashIterator *state);
+
  /*
   * prototypes from functions in execJunk.c
   */
diff --git a/src/include/executor/nodeAgg.h b/src/include/executor/nodeAgg.h

index 036d67ccaadf0ba9ef6f5bd8c0f419797204cef2..a2817306da0adc0e8ae20c453e13323139e091a2 100644 (file)
--- a/src/include/executor/nodeAgg.h
+++ b/src/include/executor/nodeAgg.h
@@ -1,13 +1,13 @@
  /*-------------------------------------------------------------------------
   *
   * nodeAgg.h
- *
+ *       prototypes for nodeAgg.c
   *
   *
   * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: nodeAgg.h,v 1.18 2002/12/05 15:50:36 tgl Exp $
+ * $Id: nodeAgg.h,v 1.19 2003/01/10 23:54:24 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
diff --git a/src/include/executor/nodeGroup.h b/src/include/executor/nodeGroup.h

index 211e55b6cadfe3a0bb80aef808a0b9a14a5aab6b..2a6b733c9d2a4314856f04f7c9a937b597b6f5c2 100644 (file)
--- a/src/include/executor/nodeGroup.h
+++ b/src/include/executor/nodeGroup.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: nodeGroup.h,v 1.23 2002/12/05 15:50:37 tgl Exp $
+ * $Id: nodeGroup.h,v 1.24 2003/01/10 23:54:24 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -22,15 +22,4 @@ extern TupleTableSlot *ExecGroup(GroupState *node);
  extern void ExecEndGroup(GroupState *node);
  extern void ExecReScanGroup(GroupState *node, ExprContext *exprCtxt);
  
-extern bool execTuplesMatch(HeapTuple tuple1,
-                               HeapTuple tuple2,
-                               TupleDesc tupdesc,
-                               int numCols,
-                               AttrNumber *matchColIdx,
-                               FmgrInfo *eqfunctions,
-                               MemoryContext evalContext);
-extern FmgrInfo *execTuplesMatchPrepare(TupleDesc tupdesc,
-                                          int numCols,
-                                          AttrNumber *matchColIdx);
-
  #endif   /* NODEGROUP_H */
diff --git a/src/include/executor/nodeHash.h b/src/include/executor/nodeHash.h

index 02e56355263b66272a814d93bc2877a976d7f2b2..da1113b32daf13005270432d1c6ad24805b903f9 100644 (file)
--- a/src/include/executor/nodeHash.h
+++ b/src/include/executor/nodeHash.h
@@ -1,13 +1,13 @@
  /*-------------------------------------------------------------------------
   *
   * nodeHash.h
- *
+ *       prototypes for nodeHash.c
   *
   *
   * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: nodeHash.h,v 1.28 2002/12/30 15:21:23 tgl Exp $
+ * $Id: nodeHash.h,v 1.29 2003/01/10 23:54:24 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -38,6 +38,5 @@ extern void ExecChooseHashTableSize(double ntuples, int tupwidth,
                                                 int *virtualbuckets,
                                                 int *physicalbuckets,
                                                 int *numbatches);
-extern uint32 ComputeHashFunc(Datum key, int typLen, bool byVal);
  
  #endif   /* NODEHASH_H */
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h

index 1ce0635c632c7540c28a6d07b6150e11ab647d7c..9c43660c610ede090e2e6d3e9d92b866ff509101 100644 (file)
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: execnodes.h,v 1.89 2003/01/10 21:08:15 tgl Exp $
+ * $Id: execnodes.h,v 1.90 2003/01/10 23:54:24 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -324,6 +324,46 @@ typedef struct EState
  } EState;
  
  
+/* ----------------------------------------------------------------
+ *                              Tuple Hash Tables
+ *
+ * All-in-memory tuple hash tables are used for a number of purposes.
+ * ----------------------------------------------------------------
+ */
+typedef struct TupleHashEntryData *TupleHashEntry;
+typedef struct TupleHashTableData *TupleHashTable;
+
+typedef struct TupleHashEntryData
+{
+       TupleHashEntry next;            /* next entry in same hash bucket */
+       uint32          hashkey;                /* exact hash key of this entry */
+       HeapTuple       firstTuple;             /* copy of first tuple in this group */
+       /* there may be additional data beyond the end of this struct */
+} TupleHashEntryData;                  /* VARIABLE LENGTH STRUCT */
+
+typedef struct TupleHashTableData
+{
+       int                     numCols;                /* number of columns in lookup key */
+       AttrNumber *keyColIdx;          /* attr numbers of key columns */
+       FmgrInfo   *eqfunctions;        /* lookup data for comparison functions */
+       MemoryContext tablecxt;         /* memory context containing table */
+       MemoryContext tempcxt;          /* context for function evaluations */
+       Size            entrysize;              /* actual size to make each hash entry */
+       int                     nbuckets;               /* number of buckets in hash table */
+       TupleHashEntry buckets[1];      /* VARIABLE LENGTH ARRAY */
+} TupleHashTableData;                  /* VARIABLE LENGTH STRUCT */
+
+typedef struct
+{
+       TupleHashEntry next_entry;      /* next entry in current chain */
+       int                     next_bucket;    /* next chain */
+} TupleHashIterator;
+
+#define ResetTupleHashIterator(iter) \
+       ((iter)->next_entry = NULL, \
+        (iter)->next_bucket = 0)
+
+
  /* ----------------------------------------------------------------
   *                              Expression State Trees
   *
@@ -445,9 +485,6 @@ typedef struct BoolExprState
   *             SubPlanState node
   * ----------------
   */
-/* this struct is private in nodeSubplan.c: */
-typedef struct SubPlanHashTableData *SubPlanHashTable;
-
  typedef struct SubPlanState
  {
         ExprState       xprstate;
@@ -458,8 +495,8 @@ typedef struct SubPlanState
         bool            needShutdown;   /* TRUE = need to shutdown subplan */
         HeapTuple       curTuple;               /* copy of most recent tuple from subplan */
         /* these are used when hashing the subselect's output: */
-       SubPlanHashTable hashtable;     /* hash table for no-nulls subselect rows */
-       SubPlanHashTable hashnulls;     /* hash table for rows with null(s) */
+       TupleHashTable hashtable;       /* hash table for no-nulls subselect rows */
+       TupleHashTable hashnulls;       /* hash table for rows with null(s) */
  } SubPlanState;
  
  /* ----------------
@@ -877,8 +914,6 @@ typedef struct GroupState
  /* these structs are private in nodeAgg.c: */
  typedef struct AggStatePerAggData *AggStatePerAgg;
  typedef struct AggStatePerGroupData *AggStatePerGroup;
-typedef struct AggHashEntryData *AggHashEntry;
-typedef struct AggHashTableData *AggHashTable;
  
  typedef struct AggState
  {
@@ -894,10 +929,9 @@ typedef struct AggState
         AggStatePerGroup pergroup;      /* per-Aggref-per-group working state */
         HeapTuple       grp_firstTuple; /* copy of first tuple of current group */
         /* these fields are used in AGG_HASHED mode: */
-       AggHashTable hashtable;         /* hash table with one entry per group */
+       TupleHashTable hashtable;       /* hash table with one entry per group */
         bool            table_filled;   /* hash table filled yet? */
-       AggHashEntry next_hash_entry; /* next entry in current chain */
-       int                     next_hash_bucket; /* next chain */
+       TupleHashIterator hashiter;     /* for iterating through hash table */
  } AggState;
  
  /* ----------------
author	Tom Lane <tgl@sss.pgh.pa.us>
	Fri, 10 Jan 2003 23:54:24 +0000 (23:54 +0000)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Fri, 10 Jan 2003 23:54:24 +0000 (23:54 +0000)
src/backend/executor/Makefile		patch \| blob \| blame \| history
src/backend/executor/execGrouping.c	[new file with mode: 0644]	patch \| blob
src/backend/executor/nodeAgg.c		patch \| blob \| blame \| history
src/backend/executor/nodeGroup.c		patch \| blob \| blame \| history
src/backend/executor/nodeHash.c		patch \| blob \| blame \| history
src/backend/executor/nodeSetOp.c		patch \| blob \| blame \| history
src/backend/executor/nodeUnique.c		patch \| blob \| blame \| history
src/include/executor/executor.h		patch \| blob \| blame \| history
src/include/executor/nodeAgg.h		patch \| blob \| blame \| history
src/include/executor/nodeGroup.h		patch \| blob \| blame \| history
src/include/executor/nodeHash.h		patch \| blob \| blame \| history
src/include/nodes/execnodes.h		patch \| blob \| blame \| history