From: Amit Kapila Date: Wed, 24 Sep 2025 03:38:27 +0000 (+0000) Subject: Fix LOCK_TIMEOUT handling during parallel apply. X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=a54c7a1133a4338c09b435f7870152f305bbf3ad;p=thirdparty%2Fpostgresql.git Fix LOCK_TIMEOUT handling during parallel apply. Previously, the parallel apply worker used SIGINT to receive a graceful shutdown signal from the leader apply worker. However, SIGINT is also used by the LOCK_TIMEOUT handler to trigger a query-cancel interrupt. This overlap caused the parallel apply worker to miss LOCK_TIMEOUT signals, leading to incorrect behavior during lock wait/contention. This patch resolves the conflict by switching the graceful shutdown signal from SIGINT to SIGUSR2. Reported-by: Zane Duffield Diagnosed-by: Zhijie Hou Author: Hayato Kuroda Reviewed-by: Amit Kapila Backpatch-through: 16, where it was introduced Discussion: https://postgr.es/m/CACMiCkXyC4au74kvE2g6Y=mCEF8X6r-Ne_ty4r7qWkUjRE4+oQ@mail.gmail.com --- diff --git a/src/backend/postmaster/interrupt.c b/src/backend/postmaster/interrupt.c index 6d4bd76bf81..3041c4a0107 100644 --- a/src/backend/postmaster/interrupt.c +++ b/src/backend/postmaster/interrupt.c @@ -98,9 +98,8 @@ SignalHandlerForCrashExit(SIGNAL_ARGS) * shut down and exit. * * Typically, this handler would be used for SIGTERM, but some processes use - * other signals. In particular, the checkpointer exits on SIGUSR2, and the WAL - * writer and the logical replication parallel apply worker exits on either - * SIGINT or SIGTERM. + * other signals. In particular, the checkpointer and parallel apply worker + * exit on SIGUSR2, and the WAL writer exits on either SIGINT or SIGTERM. * * ShutdownRequestPending should be checked at a convenient place within the * main loop, or else the main loop should call HandleMainLoopInterrupts. diff --git a/src/backend/replication/logical/applyparallelworker.c b/src/backend/replication/logical/applyparallelworker.c index a24709ec7a5..4a2c67f9e40 100644 --- a/src/backend/replication/logical/applyparallelworker.c +++ b/src/backend/replication/logical/applyparallelworker.c @@ -872,10 +872,17 @@ ParallelApplyWorkerMain(Datum main_arg) InitializingApplyWorker = true; - /* Setup signal handling. */ + /* + * Setup signal handling. + * + * Note: We intentionally used SIGUSR2 to trigger a graceful shutdown + * initiated by the leader apply worker. This helps to differentiate it + * from the case where we abort the current transaction and exit on + * receiving SIGTERM. + */ pqsignal(SIGHUP, SignalHandlerForConfigReload); - pqsignal(SIGINT, SignalHandlerForShutdownRequest); pqsignal(SIGTERM, die); + pqsignal(SIGUSR2, SignalHandlerForShutdownRequest); BackgroundWorkerUnblockSignals(); /* @@ -974,9 +981,9 @@ ParallelApplyWorkerMain(Datum main_arg) /* * The parallel apply worker must not get here because the parallel apply - * worker will only stop when it receives a SIGTERM or SIGINT from the - * leader, or when there is an error. None of these cases will allow the - * code to reach here. + * worker will only stop when it receives a SIGTERM or SIGUSR2 from the + * leader, or SIGINT from itself, or when there is an error. None of these + * cases will allow the code to reach here. */ Assert(false); } diff --git a/src/backend/replication/logical/launcher.c b/src/backend/replication/logical/launcher.c index 423d85fd40c..a49c6648cc2 100644 --- a/src/backend/replication/logical/launcher.c +++ b/src/backend/replication/logical/launcher.c @@ -624,7 +624,7 @@ logicalrep_worker_stop(Oid subid, Oid relid) /* * Stop the given logical replication parallel apply worker. * - * Node that the function sends SIGINT instead of SIGTERM to the parallel apply + * Node that the function sends SIGUSR2 instead of SIGTERM to the parallel apply * worker so that the worker exits cleanly. */ void @@ -662,7 +662,7 @@ logicalrep_pa_worker_stop(ParallelApplyWorkerInfo *winfo) * Only stop the worker if the generation matches and the worker is alive. */ if (worker->generation == generation && worker->proc) - logicalrep_worker_stop_internal(worker, SIGINT); + logicalrep_worker_stop_internal(worker, SIGUSR2); LWLockRelease(LogicalRepWorkerLock); }