From: Álvaro Herrera Date: Tue, 19 May 2026 18:37:46 +0000 (-0700) Subject: Fix REPACK decoding worker not cleaned up on FATAL exit X-Git-Tag: REL_19_BETA1~71 X-Git-Url: http://git.ipfire.org/gitweb/index.cgi?a=commitdiff_plain;h=0160143ad9a686a7e705348da1f90e63a2a31536;p=thirdparty%2Fpostgresql.git Fix REPACK decoding worker not cleaned up on FATAL exit When the launching backend of REPACK (CONCURRENTLY) is terminated via pg_terminate_backend(), ProcDiePending causes ereport(FATAL) which bypasses PG_FINALLY blocks. As a result, stop_repack_decoding_worker() is never called, leaving the decoding worker running indefinitely and holding its temporary replication slot. Fix by using PG_ENSURE_ERROR_CLEANUP, which handles both ERROR and FATAL exits. Author: Baji Shaik Reviewed-by: Sami Imseih Reviewed-by: Álvaro Herrera Discussion: https://postgr.es/m/CA+fm-RNoPxL2N7db_A0anMXV_aDu6jWj4PNOPtMtBUAPDPvSXQ@mail.gmail.com --- diff --git a/src/backend/commands/repack.c b/src/backend/commands/repack.c index 351a3cc32e8..22a1307b38d 100644 --- a/src/backend/commands/repack.c +++ b/src/backend/commands/repack.c @@ -64,6 +64,7 @@ #include "pgstat.h" #include "replication/logicalrelation.h" #include "storage/bufmgr.h" +#include "storage/ipc.h" #include "storage/lmgr.h" #include "storage/predicate.h" #include "storage/proc.h" @@ -211,6 +212,7 @@ static Oid determine_clustered_index(Relation rel, bool usingindex, static void start_repack_decoding_worker(Oid relid); static void stop_repack_decoding_worker(void); +static void stop_repack_decoding_worker_cb(int code, Datum arg); static Snapshot get_initial_snapshot(DecodingWorker *worker); static void ProcessRepackMessage(StringInfo msg); @@ -666,27 +668,26 @@ cluster_rel(RepackCommand cmd, Relation OldHeap, Oid indexOid, if (!concurrent) TransferPredicateLocksToHeapRelation(OldHeap); - /* rebuild_relation does all the dirty work */ - PG_TRY(); - { - rebuild_relation(OldHeap, index, verbose, ident_idx); - } - PG_FINALLY(); + /* + * rebuild_relation does all the dirty work, and closes OldHeap and index, + * if valid. + * + * In concurrent mode, make sure the worker terminates; normally it does + * so by itself, but a PG_ENSURE_ERROR_CLEANUP callback ensures that this + * happens even in case this backend dies early on a FATAL exit. Normal + * mode doesn't need that overhead. + */ + if (concurrent) { - if (concurrent) + PG_ENSURE_ERROR_CLEANUP(stop_repack_decoding_worker_cb, 0); { - /* - * Since during normal operation the worker was already asked to - * exit, stopping it explicitly is especially important on ERROR. - * However it still seems a good practice to make sure that the - * worker never survives the REPACK command. - */ - stop_repack_decoding_worker(); + rebuild_relation(OldHeap, index, verbose, ident_idx); } + PG_END_ENSURE_ERROR_CLEANUP(stop_repack_decoding_worker_cb, 0); + stop_repack_decoding_worker(); } - PG_END_TRY(); - - /* rebuild_relation closes OldHeap, and index if valid */ + else + rebuild_relation(OldHeap, index, verbose, ident_idx); out: /* Roll back any GUC changes executed by index functions */ @@ -3534,6 +3535,13 @@ stop_repack_decoding_worker(void) decoding_worker = NULL; } +/* stop_repack_decoding_worker, wrapped as a before_shmem_exit callback */ +static void +stop_repack_decoding_worker_cb(int code, Datum arg) +{ + stop_repack_decoding_worker(); +} + /* * Get the initial snapshot from the decoding worker. */