]> git.ipfire.org Git - thirdparty/postgresql.git/commitdiff
Revert "Fix race in Parallel Hash Join batch cleanup."
authorThomas Munro <tmunro@postgresql.org>
Wed, 17 Mar 2021 11:35:04 +0000 (00:35 +1300)
committerThomas Munro <tmunro@postgresql.org>
Wed, 17 Mar 2021 12:10:55 +0000 (01:10 +1300)
This reverts commit 378802e3713c6c0fce31d2390c134cd5d7c30157.
This reverts commit 3b8981b6e1a2aea0f18384c803e21e9391de669a.

Discussion: https://postgr.es/m/CA%2BhUKGJmcqAE3MZeDCLLXa62cWM0AJbKmp2JrJYaJ86bz36LFA%40mail.gmail.com

src/backend/executor/nodeHash.c
src/backend/executor/nodeHashjoin.c
src/backend/postmaster/pgstat.c
src/include/executor/hashjoin.h
src/include/pgstat.h

index 98db9c2fb0d61a9715cc14ddcfaa976b2fb4bd18..c5f2d1d22b16a111a6d877221b80318b9e552a0b 100644 (file)
@@ -246,10 +246,10 @@ MultiExecParallelHash(HashState *node)
         */
        pstate = hashtable->parallel_state;
        build_barrier = &pstate->build_barrier;
-       Assert(BarrierPhase(build_barrier) >= PHJ_BUILD_ALLOCATE);
+       Assert(BarrierPhase(build_barrier) >= PHJ_BUILD_ALLOCATING);
        switch (BarrierPhase(build_barrier))
        {
-               case PHJ_BUILD_ALLOCATE:
+               case PHJ_BUILD_ALLOCATING:
 
                        /*
                         * Either I just allocated the initial hash table in
@@ -259,7 +259,7 @@ MultiExecParallelHash(HashState *node)
                        BarrierArriveAndWait(build_barrier, WAIT_EVENT_HASH_BUILD_ALLOCATE);
                        /* Fall through. */
 
-               case PHJ_BUILD_HASH_INNER:
+               case PHJ_BUILD_HASHING_INNER:
 
                        /*
                         * It's time to begin hashing, or if we just arrived here then
@@ -271,10 +271,10 @@ MultiExecParallelHash(HashState *node)
                         * below.
                         */
                        if (PHJ_GROW_BATCHES_PHASE(BarrierAttach(&pstate->grow_batches_barrier)) !=
-                               PHJ_GROW_BATCHES_ELECT)
+                               PHJ_GROW_BATCHES_ELECTING)
                                ExecParallelHashIncreaseNumBatches(hashtable);
                        if (PHJ_GROW_BUCKETS_PHASE(BarrierAttach(&pstate->grow_buckets_barrier)) !=
-                               PHJ_GROW_BUCKETS_ELECT)
+                               PHJ_GROW_BUCKETS_ELECTING)
                                ExecParallelHashIncreaseNumBuckets(hashtable);
                        ExecParallelHashEnsureBatchAccessors(hashtable);
                        ExecParallelHashTableSetCurrentBatch(hashtable, 0);
@@ -333,22 +333,15 @@ MultiExecParallelHash(HashState *node)
        hashtable->nbuckets = pstate->nbuckets;
        hashtable->log2_nbuckets = my_log2(hashtable->nbuckets);
        hashtable->totalTuples = pstate->total_tuples;
-
-       /*
-        * Unless we're completely done and the batch state has been freed, make
-        * sure we have accessors.
-        */
-       if (BarrierPhase(build_barrier) < PHJ_BUILD_FREE)
-               ExecParallelHashEnsureBatchAccessors(hashtable);
+       ExecParallelHashEnsureBatchAccessors(hashtable);
 
        /*
         * The next synchronization point is in ExecHashJoin's HJ_BUILD_HASHTABLE
-        * case, which will bring the build phase to PHJ_BUILD_RUN (if it isn't
+        * case, which will bring the build phase to PHJ_BUILD_DONE (if it isn't
         * there already).
         */
-       Assert(BarrierPhase(build_barrier) == PHJ_BUILD_HASH_OUTER ||
-                  BarrierPhase(build_barrier) == PHJ_BUILD_RUN ||
-                  BarrierPhase(build_barrier) == PHJ_BUILD_FREE);
+       Assert(BarrierPhase(build_barrier) == PHJ_BUILD_HASHING_OUTER ||
+                  BarrierPhase(build_barrier) == PHJ_BUILD_DONE);
 }
 
 /* ----------------------------------------------------------------
@@ -596,8 +589,8 @@ ExecHashTableCreate(HashState *state, List *hashOperators, List *hashCollations,
                 * Attach to the build barrier.  The corresponding detach operation is
                 * in ExecHashTableDetach.  Note that we won't attach to the
                 * batch_barrier for batch 0 yet.  We'll attach later and start it out
-                * in PHJ_BATCH_PROBE phase, because batch 0 is allocated up front and
-                * then loaded while hashing (the standard hybrid hash join
+                * in PHJ_BATCH_PROBING phase, because batch 0 is allocated up front
+                * and then loaded while hashing (the standard hybrid hash join
                 * algorithm), and we'll coordinate that using build_barrier.
                 */
                build_barrier = &pstate->build_barrier;
@@ -610,7 +603,7 @@ ExecHashTableCreate(HashState *state, List *hashOperators, List *hashCollations,
                 * SharedHashJoinBatch objects and the hash table for batch 0.  One
                 * backend will be elected to do that now if necessary.
                 */
-               if (BarrierPhase(build_barrier) == PHJ_BUILD_ELECT &&
+               if (BarrierPhase(build_barrier) == PHJ_BUILD_ELECTING &&
                        BarrierArriveAndWait(build_barrier, WAIT_EVENT_HASH_BUILD_ELECT))
                {
                        pstate->nbatch = nbatch;
@@ -631,7 +624,7 @@ ExecHashTableCreate(HashState *state, List *hashOperators, List *hashCollations,
                /*
                 * The next Parallel Hash synchronization point is in
                 * MultiExecParallelHash(), which will progress it all the way to
-                * PHJ_BUILD_RUN.  The caller must not return control from this
+                * PHJ_BUILD_DONE.  The caller must not return control from this
                 * executor node between now and then.
                 */
        }
@@ -1067,7 +1060,7 @@ ExecParallelHashIncreaseNumBatches(HashJoinTable hashtable)
        ParallelHashJoinState *pstate = hashtable->parallel_state;
        int                     i;
 
-       Assert(BarrierPhase(&pstate->build_barrier) == PHJ_BUILD_HASH_INNER);
+       Assert(BarrierPhase(&pstate->build_barrier) == PHJ_BUILD_HASHING_INNER);
 
        /*
         * It's unlikely, but we need to be prepared for new participants to show
@@ -1076,7 +1069,7 @@ ExecParallelHashIncreaseNumBatches(HashJoinTable hashtable)
         */
        switch (PHJ_GROW_BATCHES_PHASE(BarrierPhase(&pstate->grow_batches_barrier)))
        {
-               case PHJ_GROW_BATCHES_ELECT:
+               case PHJ_GROW_BATCHES_ELECTING:
 
                        /*
                         * Elect one participant to prepare to grow the number of batches.
@@ -1194,13 +1187,13 @@ ExecParallelHashIncreaseNumBatches(HashJoinTable hashtable)
                        }
                        /* Fall through. */
 
-               case PHJ_GROW_BATCHES_REALLOCATE:
+               case PHJ_GROW_BATCHES_ALLOCATING:
                        /* Wait for the above to be finished. */
                        BarrierArriveAndWait(&pstate->grow_batches_barrier,
-                                                                WAIT_EVENT_HASH_GROW_BATCHES_REALLOCATE);
+                                                                WAIT_EVENT_HASH_GROW_BATCHES_ALLOCATE);
                        /* Fall through. */
 
-               case PHJ_GROW_BATCHES_REPARTITION:
+               case PHJ_GROW_BATCHES_REPARTITIONING:
                        /* Make sure that we have the current dimensions and buckets. */
                        ExecParallelHashEnsureBatchAccessors(hashtable);
                        ExecParallelHashTableSetCurrentBatch(hashtable, 0);
@@ -1213,7 +1206,7 @@ ExecParallelHashIncreaseNumBatches(HashJoinTable hashtable)
                                                                 WAIT_EVENT_HASH_GROW_BATCHES_REPARTITION);
                        /* Fall through. */
 
-               case PHJ_GROW_BATCHES_DECIDE:
+               case PHJ_GROW_BATCHES_DECIDING:
 
                        /*
                         * Elect one participant to clean up and decide whether further
@@ -1268,7 +1261,7 @@ ExecParallelHashIncreaseNumBatches(HashJoinTable hashtable)
                        }
                        /* Fall through. */
 
-               case PHJ_GROW_BATCHES_FINISH:
+               case PHJ_GROW_BATCHES_FINISHING:
                        /* Wait for the above to complete. */
                        BarrierArriveAndWait(&pstate->grow_batches_barrier,
                                                                 WAIT_EVENT_HASH_GROW_BATCHES_FINISH);
@@ -1508,7 +1501,7 @@ ExecParallelHashIncreaseNumBuckets(HashJoinTable hashtable)
        HashMemoryChunk chunk;
        dsa_pointer chunk_s;
 
-       Assert(BarrierPhase(&pstate->build_barrier) == PHJ_BUILD_HASH_INNER);
+       Assert(BarrierPhase(&pstate->build_barrier) == PHJ_BUILD_HASHING_INNER);
 
        /*
         * It's unlikely, but we need to be prepared for new participants to show
@@ -1517,7 +1510,7 @@ ExecParallelHashIncreaseNumBuckets(HashJoinTable hashtable)
         */
        switch (PHJ_GROW_BUCKETS_PHASE(BarrierPhase(&pstate->grow_buckets_barrier)))
        {
-               case PHJ_GROW_BUCKETS_ELECT:
+               case PHJ_GROW_BUCKETS_ELECTING:
                        /* Elect one participant to prepare to increase nbuckets. */
                        if (BarrierArriveAndWait(&pstate->grow_buckets_barrier,
                                                                         WAIT_EVENT_HASH_GROW_BUCKETS_ELECT))
@@ -1546,13 +1539,13 @@ ExecParallelHashIncreaseNumBuckets(HashJoinTable hashtable)
                        }
                        /* Fall through. */
 
-               case PHJ_GROW_BUCKETS_REALLOCATE:
+               case PHJ_GROW_BUCKETS_ALLOCATING:
                        /* Wait for the above to complete. */
                        BarrierArriveAndWait(&pstate->grow_buckets_barrier,
-                                                                WAIT_EVENT_HASH_GROW_BUCKETS_REALLOCATE);
+                                                                WAIT_EVENT_HASH_GROW_BUCKETS_ALLOCATE);
                        /* Fall through. */
 
-               case PHJ_GROW_BUCKETS_REINSERT:
+               case PHJ_GROW_BUCKETS_REINSERTING:
                        /* Reinsert all tuples into the hash table. */
                        ExecParallelHashEnsureBatchAccessors(hashtable);
                        ExecParallelHashTableSetCurrentBatch(hashtable, 0);
@@ -1708,7 +1701,7 @@ retry:
 
                /* Try to load it into memory. */
                Assert(BarrierPhase(&hashtable->parallel_state->build_barrier) ==
-                          PHJ_BUILD_HASH_INNER);
+                          PHJ_BUILD_HASHING_INNER);
                hashTuple = ExecParallelHashTupleAlloc(hashtable,
                                                                                           HJTUPLE_OVERHEAD + tuple->t_len,
                                                                                           &shared);
@@ -2862,7 +2855,7 @@ ExecParallelHashTupleAlloc(HashJoinTable hashtable, size_t size,
        if (pstate->growth != PHJ_GROWTH_DISABLED)
        {
                Assert(curbatch == 0);
-               Assert(BarrierPhase(&pstate->build_barrier) == PHJ_BUILD_HASH_INNER);
+               Assert(BarrierPhase(&pstate->build_barrier) == PHJ_BUILD_HASHING_INNER);
 
                /*
                 * Check if our space limit would be exceeded.  To avoid choking on
@@ -2982,7 +2975,7 @@ ExecParallelHashJoinSetUpBatches(HashJoinTable hashtable, int nbatch)
                {
                        /* Batch 0 doesn't need to be loaded. */
                        BarrierAttach(&shared->batch_barrier);
-                       while (BarrierPhase(&shared->batch_barrier) < PHJ_BATCH_PROBE)
+                       while (BarrierPhase(&shared->batch_barrier) < PHJ_BATCH_PROBING)
                                BarrierArriveAndWait(&shared->batch_barrier, 0);
                        BarrierDetach(&shared->batch_barrier);
                }
@@ -3055,11 +3048,14 @@ ExecParallelHashEnsureBatchAccessors(HashJoinTable hashtable)
        }
 
        /*
-        * We should never see a state where the batch-tracking array is freed,
-        * because we should have given up sooner if we join when the build
-        * barrier has reached the PHJ_BUILD_FREE phase.
+        * It's possible for a backend to start up very late so that the whole
+        * join is finished and the shm state for tracking batches has already
+        * been freed by ExecHashTableDetach().  In that case we'll just leave
+        * hashtable->batches as NULL so that ExecParallelHashJoinNewBatch() gives
+        * up early.
         */
-       Assert(DsaPointerIsValid(pstate->batches));
+       if (!DsaPointerIsValid(pstate->batches))
+               return;
 
        /* Use hash join memory context. */
        oldcxt = MemoryContextSwitchTo(hashtable->hashCxt);
@@ -3140,7 +3136,7 @@ ExecHashTableDetachBatch(HashJoinTable hashtable)
                         * longer attached, but since there is no way it's moving after
                         * this point it seems safe to make the following assertion.
                         */
-                       Assert(BarrierPhase(&batch->batch_barrier) == PHJ_BATCH_FREE);
+                       Assert(BarrierPhase(&batch->batch_barrier) == PHJ_BATCH_DONE);
 
                        /* Free shared chunks and buckets. */
                        while (DsaPointerIsValid(batch->chunks))
@@ -3179,17 +3175,9 @@ ExecHashTableDetachBatch(HashJoinTable hashtable)
 void
 ExecHashTableDetach(HashJoinTable hashtable)
 {
-       ParallelHashJoinState *pstate = hashtable->parallel_state;
-
-       /*
-        * If we're involved in a parallel query, we must either have got all the
-        * way to PHJ_BUILD_RUN, or joined too late and be in PHJ_BUILD_FREE.
-        */
-       Assert(!pstate ||
-                  BarrierPhase(&pstate->build_barrier) >= PHJ_BUILD_RUN);
-
-       if (pstate && BarrierPhase(&pstate->build_barrier) == PHJ_BUILD_RUN)
+       if (hashtable->parallel_state)
        {
+               ParallelHashJoinState *pstate = hashtable->parallel_state;
                int                     i;
 
                /* Make sure any temporary files are closed. */
@@ -3205,22 +3193,17 @@ ExecHashTableDetach(HashJoinTable hashtable)
                }
 
                /* If we're last to detach, clean up shared memory. */
-               if (BarrierArriveAndDetach(&pstate->build_barrier))
+               if (BarrierDetach(&pstate->build_barrier))
                {
-                       /*
-                        * Late joining processes will see this state and give up
-                        * immediately.
-                        */
-                       Assert(BarrierPhase(&pstate->build_barrier) == PHJ_BUILD_FREE);
-
                        if (DsaPointerIsValid(pstate->batches))
                        {
                                dsa_free(hashtable->area, pstate->batches);
                                pstate->batches = InvalidDsaPointer;
                        }
                }
+
+               hashtable->parallel_state = NULL;
        }
-       hashtable->parallel_state = NULL;
 }
 
 /*
index 3b1553fefefe5d1de5e7c0148f03a6e5c8bf3877..510bdd39adc39f9eaff39901e7b7a6304873c8ad 100644 (file)
  *
  * One barrier called build_barrier is used to coordinate the hashing phases.
  * The phase is represented by an integer which begins at zero and increments
- * one by one, but in the code it is referred to by symbolic names as follows.
- * An asterisk indicates a phase that is performed by a single arbitrarily
- * chosen process.
+ * one by one, but in the code it is referred to by symbolic names as follows:
  *
- *   PHJ_BUILD_ELECT                 -- initial state
- *   PHJ_BUILD_ALLOCATE*             -- one sets up the batches and table 0
- *   PHJ_BUILD_HASH_INNER            -- all hash the inner rel
- *   PHJ_BUILD_HASH_OUTER            -- (multi-batch only) all hash the outer
- *   PHJ_BUILD_RUN                   -- building done, probing can begin
- *   PHJ_BUILD_FREE*                 -- all work complete, one frees batches
+ *   PHJ_BUILD_ELECTING              -- initial state
+ *   PHJ_BUILD_ALLOCATING            -- one sets up the batches and table 0
+ *   PHJ_BUILD_HASHING_INNER         -- all hash the inner rel
+ *   PHJ_BUILD_HASHING_OUTER         -- (multi-batch only) all hash the outer
+ *   PHJ_BUILD_DONE                  -- building done, probing can begin
  *
- * While in the phase PHJ_BUILD_HASH_INNER a separate pair of barriers may
+ * While in the phase PHJ_BUILD_HASHING_INNER a separate pair of barriers may
  * be used repeatedly as required to coordinate expansions in the number of
  * batches or buckets.  Their phases are as follows:
  *
- *   PHJ_GROW_BATCHES_ELECT          -- initial state
- *   PHJ_GROW_BATCHES_REALLOCATE*    -- one allocates new batches
- *   PHJ_GROW_BATCHES_REPARTITION    -- all repartition
- *   PHJ_GROW_BATCHES_DECIDE*        -- one detects skew and cleans up
- *   PHJ_GROW_BATCHES_FINISH         -- finished one growth cycle
+ *   PHJ_GROW_BATCHES_ELECTING       -- initial state
+ *   PHJ_GROW_BATCHES_ALLOCATING     -- one allocates new batches
+ *   PHJ_GROW_BATCHES_REPARTITIONING -- all repartition
+ *   PHJ_GROW_BATCHES_FINISHING      -- one cleans up, detects skew
  *
- *   PHJ_GROW_BUCKETS_ELECT          -- initial state
- *   PHJ_GROW_BUCKETS_REALLOCATE*    -- one allocates new buckets
- *   PHJ_GROW_BUCKETS_REINSERT       -- all insert tuples
+ *   PHJ_GROW_BUCKETS_ELECTING       -- initial state
+ *   PHJ_GROW_BUCKETS_ALLOCATING     -- one allocates new buckets
+ *   PHJ_GROW_BUCKETS_REINSERTING    -- all insert tuples
  *
  * If the planner got the number of batches and buckets right, those won't be
  * necessary, but on the other hand we might finish up needing to expand the
  * within our memory budget and load factor target.  For that reason it's a
  * separate pair of barriers using circular phases.
  *
- * The PHJ_BUILD_HASH_OUTER phase is required only for multi-batch joins,
+ * The PHJ_BUILD_HASHING_OUTER phase is required only for multi-batch joins,
  * because we need to divide the outer relation into batches up front in order
  * to be able to process batches entirely independently.  In contrast, the
  * parallel-oblivious algorithm simply throws tuples 'forward' to 'later'
  * batches whenever it encounters them while scanning and probing, which it
  * can do because it processes batches in serial order.
  *
- * Once PHJ_BUILD_RUN is reached, backends then split up and process
+ * Once PHJ_BUILD_DONE is reached, backends then split up and process
  * different batches, or gang up and work together on probing batches if there
  * aren't enough to go around.  For each batch there is a separate barrier
  * with the following phases:
  *
- *  PHJ_BATCH_ELECT          -- initial state
- *  PHJ_BATCH_ALLOCATE*      -- one allocates buckets
- *  PHJ_BATCH_LOAD           -- all load the hash table from disk
- *  PHJ_BATCH_PROBE          -- all probe
- *  PHJ_BATCH_FREE*          -- one frees memory
+ *  PHJ_BATCH_ELECTING       -- initial state
+ *  PHJ_BATCH_ALLOCATING     -- one allocates buckets
+ *  PHJ_BATCH_LOADING        -- all load the hash table from disk
+ *  PHJ_BATCH_PROBING        -- all probe
+ *  PHJ_BATCH_DONE           -- end
  *
  * Batch 0 is a special case, because it starts out in phase
- * PHJ_BATCH_PROBE; populating batch 0's hash table is done during
- * PHJ_BUILD_HASH_INNER so we can skip loading.
+ * PHJ_BATCH_PROBING; populating batch 0's hash table is done during
+ * PHJ_BUILD_HASHING_INNER so we can skip loading.
  *
  * Initially we try to plan for a single-batch hash join using the combined
  * hash_mem of all participants to create a large shared hash table.  If that
  *
  * To avoid deadlocks, we never wait for any barrier unless it is known that
  * all other backends attached to it are actively executing the node or have
- * finished.  Practically, that means that we never emit a tuple while attached
- * to a barrier, unless the barrier has reached a phase that means that no
- * process will wait on it again.  We emit tuples while attached to the build
- * barrier in phase PHJ_BUILD_RUN, and to a per-batch barrier in phase
- * PHJ_BATCH_PROBE.  These are advanced to PHJ_BUILD_FREE and PHJ_BATCH_FREE
- * respectively without waiting, using BarrierArriveAndDetach().  The last to
- * detach receives a different return value so that it knows that it's safe to
- * clean up.  Any straggler process that attaches after that phase is reached
- * will see that it's too late to participate or access the relevant shared
- * memory objects.
+ * already arrived.  Practically, that means that we never return a tuple
+ * while attached to a barrier, unless the barrier has reached its final
+ * state.  In the slightly special case of the per-batch barrier, we return
+ * tuples while in PHJ_BATCH_PROBING phase, but that's OK because we use
+ * BarrierArriveAndDetach() to advance it to PHJ_BATCH_DONE without waiting.
  *
  *-------------------------------------------------------------------------
  */
@@ -325,10 +316,9 @@ ExecHashJoinImpl(PlanState *pstate, bool parallel)
                                        Barrier    *build_barrier;
 
                                        build_barrier = &parallel_state->build_barrier;
-                                       Assert(BarrierPhase(build_barrier) == PHJ_BUILD_HASH_OUTER ||
-                                                  BarrierPhase(build_barrier) == PHJ_BUILD_RUN ||
-                                                  BarrierPhase(build_barrier) == PHJ_BUILD_FREE);
-                                       if (BarrierPhase(build_barrier) == PHJ_BUILD_HASH_OUTER)
+                                       Assert(BarrierPhase(build_barrier) == PHJ_BUILD_HASHING_OUTER ||
+                                                  BarrierPhase(build_barrier) == PHJ_BUILD_DONE);
+                                       if (BarrierPhase(build_barrier) == PHJ_BUILD_HASHING_OUTER)
                                        {
                                                /*
                                                 * If multi-batch, we need to hash the outer relation
@@ -339,18 +329,9 @@ ExecHashJoinImpl(PlanState *pstate, bool parallel)
                                                BarrierArriveAndWait(build_barrier,
                                                                                         WAIT_EVENT_HASH_BUILD_HASH_OUTER);
                                        }
-                                       else if (BarrierPhase(build_barrier) == PHJ_BUILD_FREE)
-                                       {
-                                               /*
-                                                * If we attached so late that the job is finished and
-                                                * the batch state has been freed, we can return
-                                                * immediately.
-                                                */
-                                               return NULL;
-                                       }
+                                       Assert(BarrierPhase(build_barrier) == PHJ_BUILD_DONE);
 
                                        /* Each backend should now select a batch to work on. */
-                                       Assert(BarrierPhase(build_barrier) == PHJ_BUILD_RUN);
                                        hashtable->curbatch = -1;
                                        node->hj_JoinState = HJ_NEED_NEW_BATCH;
 
@@ -1109,6 +1090,14 @@ ExecParallelHashJoinNewBatch(HashJoinState *hjstate)
        int                     start_batchno;
        int                     batchno;
 
+       /*
+        * If we started up so late that the batch tracking array has been freed
+        * already by ExecHashTableDetach(), then we are finished.  See also
+        * ExecParallelHashEnsureBatchAccessors().
+        */
+       if (hashtable->batches == NULL)
+               return false;
+
        /*
         * If we were already attached to a batch, remember not to bother checking
         * it again, and detach from it (possibly freeing the hash table if we are
@@ -1142,7 +1131,7 @@ ExecParallelHashJoinNewBatch(HashJoinState *hjstate)
 
                        switch (BarrierAttach(batch_barrier))
                        {
-                               case PHJ_BATCH_ELECT:
+                               case PHJ_BATCH_ELECTING:
 
                                        /* One backend allocates the hash table. */
                                        if (BarrierArriveAndWait(batch_barrier,
@@ -1150,13 +1139,13 @@ ExecParallelHashJoinNewBatch(HashJoinState *hjstate)
                                                ExecParallelHashTableAlloc(hashtable, batchno);
                                        /* Fall through. */
 
-                               case PHJ_BATCH_ALLOCATE:
+                               case PHJ_BATCH_ALLOCATING:
                                        /* Wait for allocation to complete. */
                                        BarrierArriveAndWait(batch_barrier,
                                                                                 WAIT_EVENT_HASH_BATCH_ALLOCATE);
                                        /* Fall through. */
 
-                               case PHJ_BATCH_LOAD:
+                               case PHJ_BATCH_LOADING:
                                        /* Start (or join in) loading tuples. */
                                        ExecParallelHashTableSetCurrentBatch(hashtable, batchno);
                                        inner_tuples = hashtable->batches[batchno].inner_tuples;
@@ -1176,7 +1165,7 @@ ExecParallelHashJoinNewBatch(HashJoinState *hjstate)
                                                                                 WAIT_EVENT_HASH_BATCH_LOAD);
                                        /* Fall through. */
 
-                               case PHJ_BATCH_PROBE:
+                               case PHJ_BATCH_PROBING:
 
                                        /*
                                         * This batch is ready to probe.  Return control to
@@ -1186,13 +1175,13 @@ ExecParallelHashJoinNewBatch(HashJoinState *hjstate)
                                         * this barrier again (or else a deadlock could occur).
                                         * All attached participants must eventually call
                                         * BarrierArriveAndDetach() so that the final phase
-                                        * PHJ_BATCH_FREE can be reached.
+                                        * PHJ_BATCH_DONE can be reached.
                                         */
                                        ExecParallelHashTableSetCurrentBatch(hashtable, batchno);
                                        sts_begin_parallel_scan(hashtable->batches[batchno].outer_tuples);
                                        return true;
 
-                               case PHJ_BATCH_FREE:
+                               case PHJ_BATCH_DONE:
 
                                        /*
                                         * Already done.  Detach and go around again (if any
@@ -1519,7 +1508,7 @@ ExecHashJoinReInitializeDSM(HashJoinState *state, ParallelContext *cxt)
        /*
         * It would be possible to reuse the shared hash table in single-batch
         * cases by resetting and then fast-forwarding build_barrier to
-        * PHJ_BUILD_FREE and batch 0's batch_barrier to PHJ_BATCH_PROBE, but
+        * PHJ_BUILD_DONE and batch 0's batch_barrier to PHJ_BATCH_PROBING, but
         * currently shared hash tables are already freed by now (by the last
         * participant to detach from the batch).  We could consider keeping it
         * around for single-batch joins.  We'd also need to adjust
@@ -1538,7 +1527,7 @@ ExecHashJoinReInitializeDSM(HashJoinState *state, ParallelContext *cxt)
        /* Clear any shared batch files. */
        SharedFileSetDeleteAll(&pstate->fileset);
 
-       /* Reset build_barrier to PHJ_BUILD_ELECT so we can go around again. */
+       /* Reset build_barrier to PHJ_BUILD_ELECTING so we can go around again. */
        BarrierInit(&pstate->build_barrier, 0);
 }
 
index c374e2c6c82b30646ed2ef8fc592c0986080acd3..b1e2d94951d3e0290ee7b407d9e1414dad7a45d7 100644 (file)
@@ -4043,8 +4043,8 @@ pgstat_get_wait_ipc(WaitEventIPC w)
                case WAIT_EVENT_HASH_BUILD_HASH_OUTER:
                        event_name = "HashBuildHashOuter";
                        break;
-               case WAIT_EVENT_HASH_GROW_BATCHES_REALLOCATE:
-                       event_name = "HashGrowBatchesReallocate";
+               case WAIT_EVENT_HASH_GROW_BATCHES_ALLOCATE:
+                       event_name = "HashGrowBatchesAllocate";
                        break;
                case WAIT_EVENT_HASH_GROW_BATCHES_DECIDE:
                        event_name = "HashGrowBatchesDecide";
@@ -4058,8 +4058,8 @@ pgstat_get_wait_ipc(WaitEventIPC w)
                case WAIT_EVENT_HASH_GROW_BATCHES_REPARTITION:
                        event_name = "HashGrowBatchesRepartition";
                        break;
-               case WAIT_EVENT_HASH_GROW_BUCKETS_REALLOCATE:
-                       event_name = "HashGrowBucketsReallocate";
+               case WAIT_EVENT_HASH_GROW_BUCKETS_ALLOCATE:
+                       event_name = "HashGrowBucketsAllocate";
                        break;
                case WAIT_EVENT_HASH_GROW_BUCKETS_ELECT:
                        event_name = "HashGrowBucketsElect";
index 176fbef149e4605571b221a0799881622809b15b..d74034f64f8214df1ea83cbfccf08d4b68bd24ec 100644 (file)
@@ -254,32 +254,31 @@ typedef struct ParallelHashJoinState
 } ParallelHashJoinState;
 
 /* The phases for building batches, used by build_barrier. */
-#define PHJ_BUILD_ELECT                                        0
-#define PHJ_BUILD_ALLOCATE                             1
-#define PHJ_BUILD_HASH_INNER                   2
-#define PHJ_BUILD_HASH_OUTER                   3
-#define PHJ_BUILD_RUN                                  4
-#define PHJ_BUILD_FREE                                 5
+#define PHJ_BUILD_ELECTING                             0
+#define PHJ_BUILD_ALLOCATING                   1
+#define PHJ_BUILD_HASHING_INNER                        2
+#define PHJ_BUILD_HASHING_OUTER                        3
+#define PHJ_BUILD_DONE                                 4
 
 /* The phases for probing each batch, used by for batch_barrier. */
-#define PHJ_BATCH_ELECT                                        0
-#define PHJ_BATCH_ALLOCATE                             1
-#define PHJ_BATCH_LOAD                                 2
-#define PHJ_BATCH_PROBE                                        3
-#define PHJ_BATCH_FREE                                 4
+#define PHJ_BATCH_ELECTING                             0
+#define PHJ_BATCH_ALLOCATING                   1
+#define PHJ_BATCH_LOADING                              2
+#define PHJ_BATCH_PROBING                              3
+#define PHJ_BATCH_DONE                                 4
 
 /* The phases of batch growth while hashing, for grow_batches_barrier. */
-#define PHJ_GROW_BATCHES_ELECT                 0
-#define PHJ_GROW_BATCHES_REALLOCATE            1
-#define PHJ_GROW_BATCHES_REPARTITION   2
-#define PHJ_GROW_BATCHES_DECIDE                        3
-#define PHJ_GROW_BATCHES_FINISH                        4
+#define PHJ_GROW_BATCHES_ELECTING              0
+#define PHJ_GROW_BATCHES_ALLOCATING            1
+#define PHJ_GROW_BATCHES_REPARTITIONING 2
+#define PHJ_GROW_BATCHES_DECIDING              3
+#define PHJ_GROW_BATCHES_FINISHING             4
 #define PHJ_GROW_BATCHES_PHASE(n)              ((n) % 5)       /* circular phases */
 
 /* The phases of bucket growth while hashing, for grow_buckets_barrier. */
-#define PHJ_GROW_BUCKETS_ELECT                 0
-#define PHJ_GROW_BUCKETS_REALLOCATE            1
-#define PHJ_GROW_BUCKETS_REINSERT              2
+#define PHJ_GROW_BUCKETS_ELECTING              0
+#define PHJ_GROW_BUCKETS_ALLOCATING            1
+#define PHJ_GROW_BUCKETS_REINSERTING   2
 #define PHJ_GROW_BUCKETS_PHASE(n)              ((n) % 3)       /* circular phases */
 
 typedef struct HashJoinTableData
index 371d3e911fd11eb202e2be4aee33145836b39370..be43c04802897d6b4f7f64c4383cb8d513dd88c1 100644 (file)
@@ -982,12 +982,12 @@ typedef enum
        WAIT_EVENT_HASH_BUILD_ELECT,
        WAIT_EVENT_HASH_BUILD_HASH_INNER,
        WAIT_EVENT_HASH_BUILD_HASH_OUTER,
-       WAIT_EVENT_HASH_GROW_BATCHES_REALLOCATE,
+       WAIT_EVENT_HASH_GROW_BATCHES_ALLOCATE,
        WAIT_EVENT_HASH_GROW_BATCHES_DECIDE,
        WAIT_EVENT_HASH_GROW_BATCHES_ELECT,
        WAIT_EVENT_HASH_GROW_BATCHES_FINISH,
        WAIT_EVENT_HASH_GROW_BATCHES_REPARTITION,
-       WAIT_EVENT_HASH_GROW_BUCKETS_REALLOCATE,
+       WAIT_EVENT_HASH_GROW_BUCKETS_ALLOCATE,
        WAIT_EVENT_HASH_GROW_BUCKETS_ELECT,
        WAIT_EVENT_HASH_GROW_BUCKETS_REINSERT,
        WAIT_EVENT_LOGICAL_SYNC_DATA,