]> git.ipfire.org Git - thirdparty/postgresql.git/commitdiff
Create a new file executor/execGrouping.c to centralize utility routines
authorTom Lane <tgl@sss.pgh.pa.us>
Fri, 10 Jan 2003 23:54:24 +0000 (23:54 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Fri, 10 Jan 2003 23:54:24 +0000 (23:54 +0000)
shared by nodeGroup, nodeAgg, and soon nodeSubplan.

12 files changed:
src/backend/executor/Makefile
src/backend/executor/execGrouping.c [new file with mode: 0644]
src/backend/executor/nodeAgg.c
src/backend/executor/nodeGroup.c
src/backend/executor/nodeHash.c
src/backend/executor/nodeSetOp.c
src/backend/executor/nodeUnique.c
src/include/executor/executor.h
src/include/executor/nodeAgg.h
src/include/executor/nodeGroup.h
src/include/executor/nodeHash.h
src/include/nodes/execnodes.h

index b875259bc1a6c294301ad982acfa0e8c4ce46f49..7e3f5d2d2c83545b6c2195b91e9934ab6dac772c 100644 (file)
@@ -4,7 +4,7 @@
 #    Makefile for executor
 #
 # IDENTIFICATION
-#    $Header: /cvsroot/pgsql/src/backend/executor/Makefile,v 1.19 2002/05/12 23:43:02 tgl Exp $
+#    $Header: /cvsroot/pgsql/src/backend/executor/Makefile,v 1.20 2003/01/10 23:54:24 tgl Exp $
 #
 #-------------------------------------------------------------------------
 
@@ -12,7 +12,7 @@ subdir = src/backend/executor
 top_builddir = ../../..
 include $(top_builddir)/src/Makefile.global
 
-OBJS = execAmi.o execJunk.o execMain.o \
+OBJS = execAmi.o execGrouping.o execJunk.o execMain.o \
        execProcnode.o execQual.o execScan.o execTuples.o \
        execUtils.o functions.o instrument.o nodeAppend.o nodeAgg.o nodeHash.o \
        nodeHashjoin.o nodeIndexscan.o nodeMaterial.o nodeMergejoin.o \
diff --git a/src/backend/executor/execGrouping.c b/src/backend/executor/execGrouping.c
new file mode 100644 (file)
index 0000000..e3f7720
--- /dev/null
@@ -0,0 +1,369 @@
+/*-------------------------------------------------------------------------
+ *
+ * execGrouping.c
+ *       executor utility routines for grouping, hashing, and aggregation
+ *
+ * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *       $Header: /cvsroot/pgsql/src/backend/executor/execGrouping.c,v 1.1 2003/01/10 23:54:24 tgl Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/hash.h"
+#include "access/heapam.h"
+#include "executor/executor.h"
+#include "parser/parse_oper.h"
+#include "utils/memutils.h"
+
+
+/*****************************************************************************
+ *             Utility routines for grouping tuples together
+ *
+ * These routines actually implement SQL's notion of "distinct/not distinct".
+ * Two tuples match if they are not distinct in all the compared columns,
+ * i.e., the column values are either both null, or both non-null and equal.
+ *****************************************************************************/
+
+/*
+ * execTuplesMatch
+ *             Return true if two tuples match in all the indicated fields.
+ *             This is used to detect group boundaries in nodeGroup and nodeAgg,
+ *             and to decide whether two tuples are distinct or not in nodeUnique.
+ *
+ * tuple1, tuple2: the tuples to compare
+ * tupdesc: tuple descriptor applying to both tuples
+ * numCols: the number of attributes to be examined
+ * matchColIdx: array of attribute column numbers
+ * eqFunctions: array of fmgr lookup info for the equality functions to use
+ * evalContext: short-term memory context for executing the functions
+ *
+ * NB: evalContext is reset each time!
+ */
+bool
+execTuplesMatch(HeapTuple tuple1,
+                               HeapTuple tuple2,
+                               TupleDesc tupdesc,
+                               int numCols,
+                               AttrNumber *matchColIdx,
+                               FmgrInfo *eqfunctions,
+                               MemoryContext evalContext)
+{
+       MemoryContext oldContext;
+       bool            result;
+       int                     i;
+
+       /* Reset and switch into the temp context. */
+       MemoryContextReset(evalContext);
+       oldContext = MemoryContextSwitchTo(evalContext);
+
+       /*
+        * We cannot report a match without checking all the fields, but we
+        * can report a non-match as soon as we find unequal fields.  So,
+        * start comparing at the last field (least significant sort key).
+        * That's the most likely to be different if we are dealing with
+        * sorted input.
+        */
+       result = true;
+
+       for (i = numCols; --i >= 0;)
+       {
+               AttrNumber      att = matchColIdx[i];
+               Datum           attr1,
+                                       attr2;
+               bool            isNull1,
+                                       isNull2;
+
+               attr1 = heap_getattr(tuple1,
+                                                        att,
+                                                        tupdesc,
+                                                        &isNull1);
+
+               attr2 = heap_getattr(tuple2,
+                                                        att,
+                                                        tupdesc,
+                                                        &isNull2);
+
+               if (isNull1 != isNull2)
+               {
+                       result = false;         /* one null and one not; they aren't equal */
+                       break;
+               }
+
+               if (isNull1)
+                       continue;                       /* both are null, treat as equal */
+
+               /* Apply the type-specific equality function */
+
+               if (!DatumGetBool(FunctionCall2(&eqfunctions[i],
+                                                                               attr1, attr2)))
+               {
+                       result = false;         /* they aren't equal */
+                       break;
+               }
+       }
+
+       MemoryContextSwitchTo(oldContext);
+
+       return result;
+}
+
+
+/*
+ * execTuplesMatchPrepare
+ *             Look up the equality functions needed for execTuplesMatch.
+ *             The result is a palloc'd array.
+ */
+FmgrInfo *
+execTuplesMatchPrepare(TupleDesc tupdesc,
+                                          int numCols,
+                                          AttrNumber *matchColIdx)
+{
+       FmgrInfo   *eqfunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo));
+       int                     i;
+
+       for (i = 0; i < numCols; i++)
+       {
+               AttrNumber      att = matchColIdx[i];
+               Oid                     typid = tupdesc->attrs[att - 1]->atttypid;
+               Oid                     eq_function;
+
+               eq_function = equality_oper_funcid(typid);
+               fmgr_info(eq_function, &eqfunctions[i]);
+       }
+
+       return eqfunctions;
+}
+
+
+/*****************************************************************************
+ *             Utility routines for hashing
+ *****************************************************************************/
+
+/*
+ * ComputeHashFunc
+ *
+ *             the hash function for hash joins (also used for hash aggregation)
+ *
+ *             XXX this probably ought to be replaced with datatype-specific
+ *             hash functions, such as those already implemented for hash indexes.
+ */
+uint32
+ComputeHashFunc(Datum key, int typLen, bool byVal)
+{
+       unsigned char *k;
+
+       if (byVal)
+       {
+               /*
+                * If it's a by-value data type, just hash the whole Datum value.
+                * This assumes that datatypes narrower than Datum are
+                * consistently padded (either zero-extended or sign-extended, but
+                * not random bits) to fill Datum; see the XXXGetDatum macros in
+                * postgres.h. NOTE: it would not work to do hash_any(&key, len)
+                * since this would get the wrong bytes on a big-endian machine.
+                */
+               k = (unsigned char *) &key;
+               typLen = sizeof(Datum);
+       }
+       else
+       {
+               if (typLen > 0)
+               {
+                       /* fixed-width pass-by-reference type */
+                       k = (unsigned char *) DatumGetPointer(key);
+               }
+               else if (typLen == -1)
+               {
+                       /*
+                        * It's a varlena type, so 'key' points to a "struct varlena".
+                        * NOTE: VARSIZE returns the "real" data length plus the
+                        * sizeof the "vl_len" attribute of varlena (the length
+                        * information). 'key' points to the beginning of the varlena
+                        * struct, so we have to use "VARDATA" to find the beginning
+                        * of the "real" data.  Also, we have to be careful to detoast
+                        * the datum if it's toasted.  (We don't worry about freeing
+                        * the detoasted copy; that happens for free when the
+                        * per-tuple memory context is reset in ExecHashGetBucket.)
+                        */
+                       struct varlena *vkey = PG_DETOAST_DATUM(key);
+
+                       typLen = VARSIZE(vkey) - VARHDRSZ;
+                       k = (unsigned char *) VARDATA(vkey);
+               }
+               else if (typLen == -2)
+               {
+                       /* It's a null-terminated C string */
+                       typLen = strlen(DatumGetCString(key)) + 1;
+                       k = (unsigned char *) DatumGetPointer(key);
+               }
+               else
+               {
+                       elog(ERROR, "ComputeHashFunc: Invalid typLen %d", typLen);
+                       k = NULL;                       /* keep compiler quiet */
+               }
+       }
+
+       return DatumGetUInt32(hash_any(k, typLen));
+}
+
+
+/*****************************************************************************
+ *             Utility routines for all-in-memory hash tables
+ *
+ * These routines build hash tables for grouping tuples together (eg, for
+ * hash aggregation).  There is one entry for each not-distinct set of tuples
+ * presented.
+ *****************************************************************************/
+
+/*
+ * Construct an empty TupleHashTable
+ *
+ *     numCols, keyColIdx: identify the tuple fields to use as lookup key
+ *     eqfunctions: equality comparison functions to use
+ *     nbuckets: number of buckets to make
+ *     entrysize: size of each entry (at least sizeof(TupleHashEntryData))
+ *     tablecxt: memory context in which to store table and table entries
+ *     tempcxt: short-lived context for evaluation hash and comparison functions
+ *
+ * The eqfunctions array may be made with execTuplesMatchPrepare().
+ *
+ * Note that keyColIdx and eqfunctions must be allocated in storage that
+ * will live as long as the hashtable does.
+ */
+TupleHashTable
+BuildTupleHashTable(int numCols, AttrNumber *keyColIdx,
+                                       FmgrInfo *eqfunctions,
+                                       int nbuckets, Size entrysize,
+                                       MemoryContext tablecxt, MemoryContext tempcxt)
+{
+       TupleHashTable  hashtable;
+       Size                    tabsize;
+
+       Assert(nbuckets > 0);
+       Assert(entrysize >= sizeof(TupleHashEntryData));
+
+       tabsize = sizeof(TupleHashTableData) +
+               (nbuckets - 1) * sizeof(TupleHashEntry);
+       hashtable = (TupleHashTable) MemoryContextAllocZero(tablecxt, tabsize);
+
+       hashtable->numCols = numCols;
+       hashtable->keyColIdx = keyColIdx;
+       hashtable->eqfunctions = eqfunctions;
+       hashtable->tablecxt = tablecxt;
+       hashtable->tempcxt = tempcxt;
+       hashtable->entrysize = entrysize;
+       hashtable->nbuckets = nbuckets;
+
+       return hashtable;
+}
+
+/*
+ * Find or create a hashtable entry for the tuple group containing the
+ * given tuple.
+ *
+ * On return, *isnew is true if the entry is newly created, false if it
+ * existed already.  Any extra space in a new entry has been zeroed.
+ */
+TupleHashEntry
+LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
+                                        bool *isnew)
+{
+       int                     numCols = hashtable->numCols;
+       AttrNumber *keyColIdx = hashtable->keyColIdx;
+       HeapTuple       tuple = slot->val;
+       TupleDesc       tupdesc = slot->ttc_tupleDescriptor;
+       uint32          hashkey = 0;
+       int                     i;
+       int                     bucketno;
+       TupleHashEntry entry;
+       MemoryContext oldContext;
+
+       /* Need to run the hash function in short-lived context */
+       oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
+
+       for (i = 0; i < numCols; i++)
+       {
+               AttrNumber      att = keyColIdx[i];
+               Datum           attr;
+               bool            isNull;
+
+               /* rotate hashkey left 1 bit at each step */
+               hashkey = (hashkey << 1) | ((hashkey & 0x80000000) ? 1 : 0);
+
+               attr = heap_getattr(tuple, att, tupdesc, &isNull);
+               if (isNull)
+                       continue;                       /* treat nulls as having hash key 0 */
+               hashkey ^= ComputeHashFunc(attr,
+                                                                  (int) tupdesc->attrs[att - 1]->attlen,
+                                                                  tupdesc->attrs[att - 1]->attbyval);
+       }
+       bucketno = hashkey % (uint32) hashtable->nbuckets;
+
+       for (entry = hashtable->buckets[bucketno];
+                entry != NULL;
+                entry = entry->next)
+       {
+               /* Quick check using hashkey */
+               if (entry->hashkey != hashkey)
+                       continue;
+               if (execTuplesMatch(entry->firstTuple,
+                                                       tuple,
+                                                       tupdesc,
+                                                       numCols, keyColIdx,
+                                                       hashtable->eqfunctions,
+                                                       hashtable->tempcxt))
+               {
+                       MemoryContextSwitchTo(oldContext);
+                       *isnew = false;
+                       return entry;
+               }
+       }
+
+       /* Not there, so build a new one */
+       MemoryContextSwitchTo(hashtable->tablecxt);
+
+       entry = (TupleHashEntry) palloc0(hashtable->entrysize);
+
+       entry->hashkey = hashkey;
+       entry->firstTuple = heap_copytuple(tuple);
+
+       entry->next = hashtable->buckets[bucketno];
+       hashtable->buckets[bucketno] = entry;
+
+       MemoryContextSwitchTo(oldContext);
+
+       *isnew = true;
+
+       return entry;
+}
+
+/*
+ * Walk through all the entries of a hash table, in no special order.
+ * Returns NULL when no more entries remain.
+ *
+ * Iterator state must be initialized with ResetTupleHashIterator() macro.
+ */
+TupleHashEntry
+ScanTupleHashTable(TupleHashTable hashtable, TupleHashIterator *state)
+{
+       TupleHashEntry  entry;
+
+       entry = state->next_entry;
+       while (entry == NULL)
+       {
+               if (state->next_bucket >= hashtable->nbuckets)
+               {
+                       /* No more entries in hashtable, so done */
+                       return NULL;
+               }
+               entry = hashtable->buckets[state->next_bucket++];
+       }
+       state->next_entry = entry->next;
+
+       return entry;
+}
index 769e88a839779347305416088488ba8e20ce24fa..d8eeae15ad7f5bf7e2f7fdb52f47175accff3458 100644 (file)
@@ -45,7 +45,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/executor/nodeAgg.c,v 1.101 2002/12/15 16:17:46 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/executor/nodeAgg.c,v 1.102 2003/01/10 23:54:24 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -57,8 +57,6 @@
 #include "catalog/pg_operator.h"
 #include "executor/executor.h"
 #include "executor/nodeAgg.h"
-#include "executor/nodeGroup.h"
-#include "executor/nodeHash.h"
 #include "miscadmin.h"
 #include "optimizer/clauses.h"
 #include "parser/parse_coerce.h"
@@ -182,21 +180,15 @@ typedef struct AggStatePerGroupData
  * distinct set of GROUP BY column values.  We compute the hash key from
  * the GROUP BY columns.
  */
+typedef struct AggHashEntryData *AggHashEntry;
+
 typedef struct AggHashEntryData
 {
-       AggHashEntry    next;           /* next entry in same hash bucket */
-       uint32          hashkey;                /* exact hash key of this entry */
-       HeapTuple       firstTuple;             /* copy of first tuple in this group */
+       TupleHashEntryData shared;      /* common header for hash table entries */
        /* per-aggregate transition status array - must be last! */
        AggStatePerGroupData pergroup[1];       /* VARIABLE LENGTH ARRAY */
 } AggHashEntryData;                            /* VARIABLE LENGTH STRUCT */
 
-typedef struct AggHashTableData
-{
-       int                     nbuckets;               /* number of buckets in hash table */
-       AggHashEntry buckets[1];        /* VARIABLE LENGTH ARRAY */
-} AggHashTableData;                            /* VARIABLE LENGTH STRUCT */
-
 
 static void initialize_aggregates(AggState *aggstate,
                                                                  AggStatePerAgg peragg,
@@ -578,18 +570,22 @@ static void
 build_hash_table(AggState *aggstate)
 {
        Agg                        *node = (Agg *) aggstate->ss.ps.plan;
-       AggHashTable    hashtable;
-       Size                    tabsize;
+       MemoryContext   tmpmem = aggstate->tmpcontext->ecxt_per_tuple_memory;
+       Size                    entrysize;
 
        Assert(node->aggstrategy == AGG_HASHED);
        Assert(node->numGroups > 0);
-       tabsize = sizeof(AggHashTableData) +
-               (node->numGroups - 1) * sizeof(AggHashEntry);
-       hashtable = (AggHashTable) MemoryContextAlloc(aggstate->aggcontext,
-                                                                                                 tabsize);
-       MemSet(hashtable, 0, tabsize);
-       hashtable->nbuckets = node->numGroups;
-       aggstate->hashtable = hashtable;
+
+       entrysize = sizeof(AggHashEntryData) +
+               (aggstate->numaggs - 1) * sizeof(AggStatePerGroupData);
+
+       aggstate->hashtable = BuildTupleHashTable(node->numCols,
+                                                                                         node->grpColIdx,
+                                                                                         aggstate->eqfunctions,
+                                                                                         node->numGroups,
+                                                                                         entrysize,
+                                                                                         aggstate->aggcontext,
+                                                                                         tmpmem);
 }
 
 /*
@@ -601,75 +597,19 @@ build_hash_table(AggState *aggstate)
 static AggHashEntry
 lookup_hash_entry(AggState *aggstate, TupleTableSlot *slot)
 {
-       Agg                *node = (Agg *) aggstate->ss.ps.plan;
-       AggHashTable hashtable = aggstate->hashtable;
-       MemoryContext   tmpmem = aggstate->tmpcontext->ecxt_per_tuple_memory;
-       HeapTuple       tuple = slot->val;
-       TupleDesc       tupdesc = slot->ttc_tupleDescriptor;
-       uint32          hashkey = 0;
-       int                     i;
-       int                     bucketno;
-       AggHashEntry    entry;
-       MemoryContext oldContext;
-       Size            entrysize;
-
-       /* Need to run the hash function in short-lived context */
-       oldContext = MemoryContextSwitchTo(tmpmem);
-
-       for (i = 0; i < node->numCols; i++)
-       {
-               AttrNumber      att = node->grpColIdx[i];
-               Datum           attr;
-               bool            isNull;
+       AggHashEntry entry;
+       bool            isnew;
 
-               /* rotate hashkey left 1 bit at each step */
-               hashkey = (hashkey << 1) | ((hashkey & 0x80000000) ? 1 : 0);
+       entry = (AggHashEntry) LookupTupleHashEntry(aggstate->hashtable,
+                                                                                               slot,
+                                                                                               &isnew);
 
-               attr = heap_getattr(tuple, att, tupdesc, &isNull);
-               if (isNull)
-                       continue;                       /* treat nulls as having hash key 0 */
-               hashkey ^= ComputeHashFunc(attr,
-                                                                  (int) tupdesc->attrs[att - 1]->attlen,
-                                                                  tupdesc->attrs[att - 1]->attbyval);
-       }
-       bucketno = hashkey % (uint32) hashtable->nbuckets;
-
-       for (entry = hashtable->buckets[bucketno];
-                entry != NULL;
-                entry = entry->next)
+       if (isnew)
        {
-               /* Quick check using hashkey */
-               if (entry->hashkey != hashkey)
-                       continue;
-               if (execTuplesMatch(entry->firstTuple,
-                                                       tuple,
-                                                       tupdesc,
-                                                       node->numCols, node->grpColIdx,
-                                                       aggstate->eqfunctions,
-                                                       tmpmem))
-               {
-                       MemoryContextSwitchTo(oldContext);
-                       return entry;
-               }
+               /* initialize aggregates for new tuple group */
+               initialize_aggregates(aggstate, aggstate->peragg, entry->pergroup);
        }
 
-       /* Not there, so build a new one */
-       MemoryContextSwitchTo(aggstate->aggcontext);
-       entrysize = sizeof(AggHashEntryData) +
-               (aggstate->numaggs - 1) * sizeof(AggStatePerGroupData);
-       entry = (AggHashEntry) palloc0(entrysize);
-
-       entry->hashkey = hashkey;
-       entry->firstTuple = heap_copytuple(tuple);
-
-       entry->next = hashtable->buckets[bucketno];
-       hashtable->buckets[bucketno] = entry;
-
-       MemoryContextSwitchTo(oldContext);
-
-       /* initialize aggregates for new tuple group */
-       initialize_aggregates(aggstate, aggstate->peragg, entry->pergroup);
-
        return entry;
 }
 
@@ -964,8 +904,7 @@ agg_fill_hash_table(AggState *aggstate)
 
        aggstate->table_filled = true;
        /* Initialize to walk the hash table */
-       aggstate->next_hash_entry = NULL;
-       aggstate->next_hash_bucket = 0;
+       ResetTupleHashIterator(&aggstate->hashiter);
 }
 
 /*
@@ -980,7 +919,7 @@ agg_retrieve_hash_table(AggState *aggstate)
        bool       *aggnulls;
        AggStatePerAgg peragg;
        AggStatePerGroup pergroup;
-       AggHashTable    hashtable;
+       TupleHashTable  hashtable;
        AggHashEntry    entry;
        TupleTableSlot *firstSlot;
        TupleTableSlot *resultSlot;
@@ -1010,18 +949,14 @@ agg_retrieve_hash_table(AggState *aggstate)
                /*
                 * Find the next entry in the hash table
                 */
-               entry = aggstate->next_hash_entry;
-               while (entry == NULL)
+               entry = (AggHashEntry) ScanTupleHashTable(hashtable,
+                                                                                                 &aggstate->hashiter);
+               if (entry == NULL)
                {
-                       if (aggstate->next_hash_bucket >= hashtable->nbuckets)
-                       {
-                               /* No more entries in hashtable, so done */
-                               aggstate->agg_done = TRUE;
-                               return NULL;
-                       }
-                       entry = hashtable->buckets[aggstate->next_hash_bucket++];
+                       /* No more entries in hashtable, so done */
+                       aggstate->agg_done = TRUE;
+                       return NULL;
                }
-               aggstate->next_hash_entry = entry->next;
 
                /*
                 * Clear the per-output-tuple context for each group
@@ -1032,7 +967,7 @@ agg_retrieve_hash_table(AggState *aggstate)
                 * Store the copied first input tuple in the tuple table slot
                 * reserved for it, so that it can be used in ExecProject.
                 */
-               ExecStoreTuple(entry->firstTuple,
+               ExecStoreTuple(entry->shared.firstTuple,
                                           firstSlot,
                                           InvalidBuffer,
                                           false);
@@ -1187,6 +1122,17 @@ ExecInitAgg(Agg *node, EState *estate)
                numaggs = 1;
        }
 
+       /*
+        * If we are grouping, precompute fmgr lookup data for inner loop
+        */
+       if (node->numCols > 0)
+       {
+               aggstate->eqfunctions =
+                       execTuplesMatchPrepare(ExecGetScanType(&aggstate->ss),
+                                                                  node->numCols,
+                                                                  node->grpColIdx);
+       }
+
        /*
         * Set up aggregate-result storage in the output expr context, and also
         * allocate my private per-agg working storage
@@ -1211,17 +1157,6 @@ ExecInitAgg(Agg *node, EState *estate)
                aggstate->pergroup = pergroup;
        }
 
-       /*
-        * If we are grouping, precompute fmgr lookup data for inner loop
-        */
-       if (node->numCols > 0)
-       {
-               aggstate->eqfunctions =
-                       execTuplesMatchPrepare(ExecGetScanType(&aggstate->ss),
-                                                                  node->numCols,
-                                                                  node->grpColIdx);
-       }
-
        /*
         * Perform lookups of aggregate function info, and initialize the
         * unchanging fields of the per-agg data
index 58f6c1b34e906d97f156ecb826ce745b571121a7..b480e388a2119aef3ec0beb7b41306236d92c5c0 100644 (file)
@@ -15,7 +15,7 @@
  *       locate group boundaries.
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/executor/nodeGroup.c,v 1.53 2002/12/15 16:17:46 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/executor/nodeGroup.c,v 1.54 2003/01/10 23:54:24 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 #include "postgres.h"
 
 #include "access/heapam.h"
-#include "catalog/pg_operator.h"
 #include "executor/executor.h"
 #include "executor/nodeGroup.h"
-#include "parser/parse_oper.h"
-#include "utils/builtins.h"
-#include "utils/lsyscache.h"
-#include "utils/syscache.h"
 
 
 /*
@@ -241,116 +236,3 @@ ExecReScanGroup(GroupState *node, ExprContext *exprCtxt)
                ((PlanState *) node)->lefttree->chgParam == NULL)
                ExecReScan(((PlanState *) node)->lefttree, exprCtxt);
 }
-
-/*****************************************************************************
- *             Code shared with nodeUnique.c and nodeAgg.c
- *****************************************************************************/
-
-/*
- * execTuplesMatch
- *             Return true if two tuples match in all the indicated fields.
- *             This is used to detect group boundaries in nodeGroup and nodeAgg,
- *             and to decide whether two tuples are distinct or not in nodeUnique.
- *
- * tuple1, tuple2: the tuples to compare
- * tupdesc: tuple descriptor applying to both tuples
- * numCols: the number of attributes to be examined
- * matchColIdx: array of attribute column numbers
- * eqFunctions: array of fmgr lookup info for the equality functions to use
- * evalContext: short-term memory context for executing the functions
- *
- * NB: evalContext is reset each time!
- */
-bool
-execTuplesMatch(HeapTuple tuple1,
-                               HeapTuple tuple2,
-                               TupleDesc tupdesc,
-                               int numCols,
-                               AttrNumber *matchColIdx,
-                               FmgrInfo *eqfunctions,
-                               MemoryContext evalContext)
-{
-       MemoryContext oldContext;
-       bool            result;
-       int                     i;
-
-       /* Reset and switch into the temp context. */
-       MemoryContextReset(evalContext);
-       oldContext = MemoryContextSwitchTo(evalContext);
-
-       /*
-        * We cannot report a match without checking all the fields, but we
-        * can report a non-match as soon as we find unequal fields.  So,
-        * start comparing at the last field (least significant sort key).
-        * That's the most likely to be different if we are dealing with
-        * sorted input.
-        */
-       result = true;
-
-       for (i = numCols; --i >= 0;)
-       {
-               AttrNumber      att = matchColIdx[i];
-               Datum           attr1,
-                                       attr2;
-               bool            isNull1,
-                                       isNull2;
-
-               attr1 = heap_getattr(tuple1,
-                                                        att,
-                                                        tupdesc,
-                                                        &isNull1);
-
-               attr2 = heap_getattr(tuple2,
-                                                        att,
-                                                        tupdesc,
-                                                        &isNull2);
-
-               if (isNull1 != isNull2)
-               {
-                       result = false;         /* one null and one not; they aren't equal */
-                       break;
-               }
-
-               if (isNull1)
-                       continue;                       /* both are null, treat as equal */
-
-               /* Apply the type-specific equality function */
-
-               if (!DatumGetBool(FunctionCall2(&eqfunctions[i],
-                                                                               attr1, attr2)))
-               {
-                       result = false;         /* they aren't equal */
-                       break;
-               }
-       }
-
-       MemoryContextSwitchTo(oldContext);
-
-       return result;
-}
-
-/*
- * execTuplesMatchPrepare
- *             Look up the equality functions needed for execTuplesMatch.
- *             The result is a palloc'd array.
- */
-FmgrInfo *
-execTuplesMatchPrepare(TupleDesc tupdesc,
-                                          int numCols,
-                                          AttrNumber *matchColIdx)
-{
-       FmgrInfo   *eqfunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo));
-       int                     i;
-
-       for (i = 0; i < numCols; i++)
-       {
-               AttrNumber      att = matchColIdx[i];
-               Oid                     typid = tupdesc->attrs[att - 1]->atttypid;
-               Oid                     eq_function;
-
-               eq_function = equality_oper_funcid(typid);
-               fmgr_info(eq_function, &eqfunctions[i]);
-       }
-
-       return eqfunctions;
-}
index bea89630993d372a4bc553a07e001e2aafcebb28..31152a3d8552d770570417c0a3060e16b1caa21d 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/executor/nodeHash.c,v 1.73 2002/12/30 15:21:18 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/executor/nodeHash.c,v 1.74 2003/01/10 23:54:24 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
  */
 #include "postgres.h"
 
-#include <limits.h>
-#include <math.h>
-
-#include "access/hash.h"
 #include "executor/execdebug.h"
 #include "executor/nodeHash.h"
 #include "executor/nodeHashjoin.h"
@@ -642,74 +638,6 @@ ExecScanHashBucket(HashJoinState *hjstate,
        return NULL;
 }
 
-/* ----------------------------------------------------------------
- *             ComputeHashFunc
- *
- *             the hash function for hash joins (also used for hash aggregation)
- *
- *             XXX this probably ought to be replaced with datatype-specific
- *             hash functions, such as those already implemented for hash indexes.
- * ----------------------------------------------------------------
- */
-uint32
-ComputeHashFunc(Datum key, int typLen, bool byVal)
-{
-       unsigned char *k;
-
-       if (byVal)
-       {
-               /*
-                * If it's a by-value data type, just hash the whole Datum value.
-                * This assumes that datatypes narrower than Datum are
-                * consistently padded (either zero-extended or sign-extended, but
-                * not random bits) to fill Datum; see the XXXGetDatum macros in
-                * postgres.h. NOTE: it would not work to do hash_any(&key, len)
-                * since this would get the wrong bytes on a big-endian machine.
-                */
-               k = (unsigned char *) &key;
-               typLen = sizeof(Datum);
-       }
-       else
-       {
-               if (typLen > 0)
-               {
-                       /* fixed-width pass-by-reference type */
-                       k = (unsigned char *) DatumGetPointer(key);
-               }
-               else if (typLen == -1)
-               {
-                       /*
-                        * It's a varlena type, so 'key' points to a "struct varlena".
-                        * NOTE: VARSIZE returns the "real" data length plus the
-                        * sizeof the "vl_len" attribute of varlena (the length
-                        * information). 'key' points to the beginning of the varlena
-                        * struct, so we have to use "VARDATA" to find the beginning
-                        * of the "real" data.  Also, we have to be careful to detoast
-                        * the datum if it's toasted.  (We don't worry about freeing
-                        * the detoasted copy; that happens for free when the
-                        * per-tuple memory context is reset in ExecHashGetBucket.)
-                        */
-                       struct varlena *vkey = PG_DETOAST_DATUM(key);
-
-                       typLen = VARSIZE(vkey) - VARHDRSZ;
-                       k = (unsigned char *) VARDATA(vkey);
-               }
-               else if (typLen == -2)
-               {
-                       /* It's a null-terminated C string */
-                       typLen = strlen(DatumGetCString(key)) + 1;
-                       k = (unsigned char *) DatumGetPointer(key);
-               }
-               else
-               {
-                       elog(ERROR, "ComputeHashFunc: Invalid typLen %d", typLen);
-                       k = NULL;                       /* keep compiler quiet */
-               }
-       }
-
-       return DatumGetUInt32(hash_any(k, typLen));
-}
-
 /* ----------------------------------------------------------------
  *             ExecHashTableReset
  *
index 965a2a6466aff80572eba0189e56853e972b1d21..3946cd00246108f2fa2f85345ce83be732c71323 100644 (file)
@@ -21,7 +21,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/executor/nodeSetOp.c,v 1.8 2002/12/15 16:17:46 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/executor/nodeSetOp.c,v 1.9 2003/01/10 23:54:24 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -36,9 +36,9 @@
 
 #include "access/heapam.h"
 #include "executor/executor.h"
-#include "executor/nodeGroup.h"
 #include "executor/nodeSetOp.h"
 
+
 /* ----------------------------------------------------------------
  *             ExecSetOp
  * ----------------------------------------------------------------
index 415594f92c4ddc2a6d6a9a18142a8fba04769bcd..7a0ccb0b14c8ce01b8bc3f77fc8dd5a4f08051ec 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/executor/nodeUnique.c,v 1.36 2002/12/15 16:17:46 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/executor/nodeUnique.c,v 1.37 2003/01/10 23:54:24 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -27,9 +27,9 @@
 
 #include "access/heapam.h"
 #include "executor/executor.h"
-#include "executor/nodeGroup.h"
 #include "executor/nodeUnique.h"
 
+
 /* ----------------------------------------------------------------
  *             ExecUnique
  *
index 31cc2107536aa7a096ef948ae4bf3e29f186d248..fb300fc044339990234ecccb70e8ccc57dcda5d5 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: executor.h,v 1.85 2002/12/15 21:01:34 tgl Exp $
+ * $Id: executor.h,v 1.86 2003/01/10 23:54:24 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -36,6 +36,31 @@ extern void ExecMarkPos(PlanState *node);
 extern void ExecRestrPos(PlanState *node);
 extern bool ExecSupportsMarkRestore(NodeTag plantype);
 
+/*
+ * prototypes from functions in execGrouping.c
+ */
+extern bool execTuplesMatch(HeapTuple tuple1,
+                               HeapTuple tuple2,
+                               TupleDesc tupdesc,
+                               int numCols,
+                               AttrNumber *matchColIdx,
+                               FmgrInfo *eqfunctions,
+                               MemoryContext evalContext);
+extern FmgrInfo *execTuplesMatchPrepare(TupleDesc tupdesc,
+                                          int numCols,
+                                          AttrNumber *matchColIdx);
+extern uint32 ComputeHashFunc(Datum key, int typLen, bool byVal);
+extern TupleHashTable BuildTupleHashTable(int numCols, AttrNumber *keyColIdx,
+                                                                                 FmgrInfo *eqfunctions,
+                                                                                 int nbuckets, Size entrysize,
+                                                                                 MemoryContext tablecxt,
+                                                                                 MemoryContext tempcxt);
+extern TupleHashEntry LookupTupleHashEntry(TupleHashTable hashtable,
+                                                                                  TupleTableSlot *slot,
+                                                                                  bool *isnew);
+extern TupleHashEntry ScanTupleHashTable(TupleHashTable hashtable,
+                                                                                TupleHashIterator *state);
+
 /*
  * prototypes from functions in execJunk.c
  */
index 036d67ccaadf0ba9ef6f5bd8c0f419797204cef2..a2817306da0adc0e8ae20c453e13323139e091a2 100644 (file)
@@ -1,13 +1,13 @@
 /*-------------------------------------------------------------------------
  *
  * nodeAgg.h
- *
+ *       prototypes for nodeAgg.c
  *
  *
  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: nodeAgg.h,v 1.18 2002/12/05 15:50:36 tgl Exp $
+ * $Id: nodeAgg.h,v 1.19 2003/01/10 23:54:24 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
index 211e55b6cadfe3a0bb80aef808a0b9a14a5aab6b..2a6b733c9d2a4314856f04f7c9a937b597b6f5c2 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: nodeGroup.h,v 1.23 2002/12/05 15:50:37 tgl Exp $
+ * $Id: nodeGroup.h,v 1.24 2003/01/10 23:54:24 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -22,15 +22,4 @@ extern TupleTableSlot *ExecGroup(GroupState *node);
 extern void ExecEndGroup(GroupState *node);
 extern void ExecReScanGroup(GroupState *node, ExprContext *exprCtxt);
 
-extern bool execTuplesMatch(HeapTuple tuple1,
-                               HeapTuple tuple2,
-                               TupleDesc tupdesc,
-                               int numCols,
-                               AttrNumber *matchColIdx,
-                               FmgrInfo *eqfunctions,
-                               MemoryContext evalContext);
-extern FmgrInfo *execTuplesMatchPrepare(TupleDesc tupdesc,
-                                          int numCols,
-                                          AttrNumber *matchColIdx);
-
 #endif   /* NODEGROUP_H */
index 02e56355263b66272a814d93bc2877a976d7f2b2..da1113b32daf13005270432d1c6ad24805b903f9 100644 (file)
@@ -1,13 +1,13 @@
 /*-------------------------------------------------------------------------
  *
  * nodeHash.h
- *
+ *       prototypes for nodeHash.c
  *
  *
  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: nodeHash.h,v 1.28 2002/12/30 15:21:23 tgl Exp $
+ * $Id: nodeHash.h,v 1.29 2003/01/10 23:54:24 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -38,6 +38,5 @@ extern void ExecChooseHashTableSize(double ntuples, int tupwidth,
                                                int *virtualbuckets,
                                                int *physicalbuckets,
                                                int *numbatches);
-extern uint32 ComputeHashFunc(Datum key, int typLen, bool byVal);
 
 #endif   /* NODEHASH_H */
index 1ce0635c632c7540c28a6d07b6150e11ab647d7c..9c43660c610ede090e2e6d3e9d92b866ff509101 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: execnodes.h,v 1.89 2003/01/10 21:08:15 tgl Exp $
+ * $Id: execnodes.h,v 1.90 2003/01/10 23:54:24 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -324,6 +324,46 @@ typedef struct EState
 } EState;
 
 
+/* ----------------------------------------------------------------
+ *                              Tuple Hash Tables
+ *
+ * All-in-memory tuple hash tables are used for a number of purposes.
+ * ----------------------------------------------------------------
+ */
+typedef struct TupleHashEntryData *TupleHashEntry;
+typedef struct TupleHashTableData *TupleHashTable;
+
+typedef struct TupleHashEntryData
+{
+       TupleHashEntry next;            /* next entry in same hash bucket */
+       uint32          hashkey;                /* exact hash key of this entry */
+       HeapTuple       firstTuple;             /* copy of first tuple in this group */
+       /* there may be additional data beyond the end of this struct */
+} TupleHashEntryData;                  /* VARIABLE LENGTH STRUCT */
+
+typedef struct TupleHashTableData
+{
+       int                     numCols;                /* number of columns in lookup key */
+       AttrNumber *keyColIdx;          /* attr numbers of key columns */
+       FmgrInfo   *eqfunctions;        /* lookup data for comparison functions */
+       MemoryContext tablecxt;         /* memory context containing table */
+       MemoryContext tempcxt;          /* context for function evaluations */
+       Size            entrysize;              /* actual size to make each hash entry */
+       int                     nbuckets;               /* number of buckets in hash table */
+       TupleHashEntry buckets[1];      /* VARIABLE LENGTH ARRAY */
+} TupleHashTableData;                  /* VARIABLE LENGTH STRUCT */
+
+typedef struct
+{
+       TupleHashEntry next_entry;      /* next entry in current chain */
+       int                     next_bucket;    /* next chain */
+} TupleHashIterator;
+
+#define ResetTupleHashIterator(iter) \
+       ((iter)->next_entry = NULL, \
+        (iter)->next_bucket = 0)
+
+
 /* ----------------------------------------------------------------
  *                              Expression State Trees
  *
@@ -445,9 +485,6 @@ typedef struct BoolExprState
  *             SubPlanState node
  * ----------------
  */
-/* this struct is private in nodeSubplan.c: */
-typedef struct SubPlanHashTableData *SubPlanHashTable;
-
 typedef struct SubPlanState
 {
        ExprState       xprstate;
@@ -458,8 +495,8 @@ typedef struct SubPlanState
        bool            needShutdown;   /* TRUE = need to shutdown subplan */
        HeapTuple       curTuple;               /* copy of most recent tuple from subplan */
        /* these are used when hashing the subselect's output: */
-       SubPlanHashTable hashtable;     /* hash table for no-nulls subselect rows */
-       SubPlanHashTable hashnulls;     /* hash table for rows with null(s) */
+       TupleHashTable hashtable;       /* hash table for no-nulls subselect rows */
+       TupleHashTable hashnulls;       /* hash table for rows with null(s) */
 } SubPlanState;
 
 /* ----------------
@@ -877,8 +914,6 @@ typedef struct GroupState
 /* these structs are private in nodeAgg.c: */
 typedef struct AggStatePerAggData *AggStatePerAgg;
 typedef struct AggStatePerGroupData *AggStatePerGroup;
-typedef struct AggHashEntryData *AggHashEntry;
-typedef struct AggHashTableData *AggHashTable;
 
 typedef struct AggState
 {
@@ -894,10 +929,9 @@ typedef struct AggState
        AggStatePerGroup pergroup;      /* per-Aggref-per-group working state */
        HeapTuple       grp_firstTuple; /* copy of first tuple of current group */
        /* these fields are used in AGG_HASHED mode: */
-       AggHashTable hashtable;         /* hash table with one entry per group */
+       TupleHashTable hashtable;       /* hash table with one entry per group */
        bool            table_filled;   /* hash table filled yet? */
-       AggHashEntry next_hash_entry; /* next entry in current chain */
-       int                     next_hash_bucket; /* next chain */
+       TupleHashIterator hashiter;     /* for iterating through hash table */
 } AggState;
 
 /* ----------------