From: Álvaro Herrera <alvherre@kurilemu.de>
Date: Tue, 19 May 2026 18:37:46 +0000 (-0700)
Subject: Fix REPACK decoding worker not cleaned up on FATAL exit
X-Git-Tag: REL_19_BETA1~71
X-Git-Url: http://git.ipfire.org/gitweb/index.cgi?a=commitdiff_plain;h=0160143ad9a686a7e705348da1f90e63a2a31536;p=thirdparty%2Fpostgresql.git

Fix REPACK decoding worker not cleaned up on FATAL exit

When the launching backend of REPACK (CONCURRENTLY) is terminated via
pg_terminate_backend(), ProcDiePending causes ereport(FATAL) which
bypasses PG_FINALLY blocks.  As a result, stop_repack_decoding_worker()
is never called, leaving the decoding worker running indefinitely and
holding its temporary replication slot.

Fix by using PG_ENSURE_ERROR_CLEANUP, which handles both ERROR and
FATAL exits.

Author: Baji Shaik <baji.pgdev@gmail.com>
Reviewed-by: Sami Imseih <samimseih@gmail.com>
Reviewed-by: Álvaro Herrera <alvherre@kurilemu.de>
Discussion: https://postgr.es/m/CA+fm-RNoPxL2N7db_A0anMXV_aDu6jWj4PNOPtMtBUAPDPvSXQ@mail.gmail.com
---

diff --git a/src/backend/commands/repack.c b/src/backend/commands/repack.c
index 351a3cc32e8..22a1307b38d 100644
--- a/src/backend/commands/repack.c
+++ b/src/backend/commands/repack.c
@@ -64,6 +64,7 @@
 #include "pgstat.h"
 #include "replication/logicalrelation.h"
 #include "storage/bufmgr.h"
+#include "storage/ipc.h"
 #include "storage/lmgr.h"
 #include "storage/predicate.h"
 #include "storage/proc.h"
@@ -211,6 +212,7 @@ static Oid	determine_clustered_index(Relation rel, bool usingindex,
 
 static void start_repack_decoding_worker(Oid relid);
 static void stop_repack_decoding_worker(void);
+static void stop_repack_decoding_worker_cb(int code, Datum arg);
 static Snapshot get_initial_snapshot(DecodingWorker *worker);
 
 static void ProcessRepackMessage(StringInfo msg);
@@ -666,27 +668,26 @@ cluster_rel(RepackCommand cmd, Relation OldHeap, Oid indexOid,
 	if (!concurrent)
 		TransferPredicateLocksToHeapRelation(OldHeap);
 
-	/* rebuild_relation does all the dirty work */
-	PG_TRY();
-	{
-		rebuild_relation(OldHeap, index, verbose, ident_idx);
-	}
-	PG_FINALLY();
+	/*
+	 * rebuild_relation does all the dirty work, and closes OldHeap and index,
+	 * if valid.
+	 *
+	 * In concurrent mode, make sure the worker terminates; normally it does
+	 * so by itself, but a PG_ENSURE_ERROR_CLEANUP callback ensures that this
+	 * happens even in case this backend dies early on a FATAL exit.  Normal
+	 * mode doesn't need that overhead.
+	 */
+	if (concurrent)
 	{
-		if (concurrent)
+		PG_ENSURE_ERROR_CLEANUP(stop_repack_decoding_worker_cb, 0);
 		{
-			/*
-			 * Since during normal operation the worker was already asked to
-			 * exit, stopping it explicitly is especially important on ERROR.
-			 * However it still seems a good practice to make sure that the
-			 * worker never survives the REPACK command.
-			 */
-			stop_repack_decoding_worker();
+			rebuild_relation(OldHeap, index, verbose, ident_idx);
 		}
+		PG_END_ENSURE_ERROR_CLEANUP(stop_repack_decoding_worker_cb, 0);
+		stop_repack_decoding_worker();
 	}
-	PG_END_TRY();
-
-	/* rebuild_relation closes OldHeap, and index if valid */
+	else
+		rebuild_relation(OldHeap, index, verbose, ident_idx);
 
 out:
 	/* Roll back any GUC changes executed by index functions */
@@ -3534,6 +3535,13 @@ stop_repack_decoding_worker(void)
 	decoding_worker = NULL;
 }
 
+/* stop_repack_decoding_worker, wrapped as a before_shmem_exit callback */
+static void
+stop_repack_decoding_worker_cb(int code, Datum arg)
+{
+	stop_repack_decoding_worker();
+}
+
 /*
  * Get the initial snapshot from the decoding worker.
  */