As connections that failed abort cleanup can't safely be further used,
if a remote query tries to get such a connection, we reject it.
Previously, this rejection involved dropping the connection if it was
open, without accounting for the possibility of open cursors using it,
causing a server crash when such an open cursor tried to use an
already-dropped connection, as a cursor-handling function
(create_cursor, fetch_more_data, or close_cursor) was called on a freed
PGconn. To fix, delay dropping failed connections until abort cleanup
of the main transaction, to ensure open cursors using such a connection
can safely refer to the PGconn for it.
Oversight in commit
8bf58c0d9.
Reported-by: Zhibai Song <songzhibai1234@gmail.com>
Diagnosed-by: Zhibai Song <songzhibai1234@gmail.com>
Author: Etsuro Fujita <etsuro.fujita@gmail.com>
Reviewed-by: Michael Paquier <michael@paquier.xyz>
Reviewed-by: Chao Li <li.evan.chao@gmail.com>
Reviewed-by: Matheus Alcantara <matheusssilv97@gmail.com>
Discussion: https://postgr.es/m/CAPmGK176y6JP017-Cn%2BhS9CEJx_6iVhRoYbAqzuLU4d8-XPPNg%40mail.gmail.com
Backpatch-through: 14
* Such connections can't safely be further used. Re-establishing the
* connection would change the snapshot and roll back any writes already
* performed, so that's not an option, either. Thus, we must abort.
+ *
+ * Note: there might be open cursors that use the connection, so even if the
+ * connection cache entry is marked as such, we will retain it until abort
+ * cleanup of the main transaction, to ensure such open cursors can safely
+ * refer to the PGconn for the connection.
*/
static void
pgfdw_reject_incomplete_xact_state_change(ConnCacheEntry *entry)
if (entry->conn == NULL || !entry->changing_xact_state)
return;
- /* make sure this entry is inactive */
- disconnect_pg_server(entry);
-
/* find server name to be shown in the message below */
server = GetForeignServer(entry->serverid);
ereport(ERROR,
(errcode(ERRCODE_CONNECTION_EXCEPTION),
- errmsg("connection to server \"%s\" was lost",
+ errmsg("connection to server \"%s\" cannot be used due to abort cleanup failure",
server->servername)));
}
-- Clean up
\set VERBOSITY default
RESET debug_discard_caches;
+-- ===================================================================
+-- test cleanup of failed connections on abort
+-- ===================================================================
+CREATE VIEW my_backend_pid (pid) AS SELECT pg_backend_pid();
+CREATE FOREIGN TABLE remote_backend_pid (pid int)
+ SERVER loopback OPTIONS (table_name 'my_backend_pid');
+CREATE FUNCTION wait_for_backend_termination(int) RETURNS void AS $$
+ BEGIN
+ WHILE (SELECT count(*) FROM pg_stat_activity WHERE pid = $1) > 0
+ LOOP
+ PERFORM pg_stat_clear_snapshot();
+ END LOOP;
+ END
+$$ LANGUAGE plpgsql;
+SET client_min_messages = 'ERROR';
+BEGIN;
+DECLARE c CURSOR FOR SELECT * FROM ft1 ORDER BY c1;
+SAVEPOINT s;
+SELECT pid AS remote_pid FROM remote_backend_pid \gset
+SELECT pg_terminate_backend(:remote_pid);
+ pg_terminate_backend
+----------------------
+ t
+(1 row)
+
+SELECT wait_for_backend_termination(:remote_pid);
+ wait_for_backend_termination
+------------------------------
+
+(1 row)
+
+ROLLBACK TO SAVEPOINT s;
+SELECT pid FROM remote_backend_pid;
+ERROR: connection to server "loopback" cannot be used due to abort cleanup failure
+ROLLBACK TO SAVEPOINT s;
+RELEASE SAVEPOINT s;
+FETCH c;
+ERROR: no connection to the server
+CONTEXT: remote SQL command: DECLARE c1 CURSOR FOR
+SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" ORDER BY "C 1" ASC NULLS LAST
+ABORT;
+BEGIN;
+DECLARE c CURSOR FOR SELECT * FROM ft1 ORDER BY c1;
+FETCH c;
+ c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8
+----+----+-------------------+------------------------------+--------------------------+----+------------+-----
+ 1 | 2 | 00001_trig_update | Fri Jan 02 00:00:00 1970 PST | Fri Jan 02 00:00:00 1970 | 1 | 1 | foo
+(1 row)
+
+SAVEPOINT s;
+SELECT pid AS remote_pid FROM remote_backend_pid \gset
+SELECT pg_terminate_backend(:remote_pid);
+ pg_terminate_backend
+----------------------
+ t
+(1 row)
+
+SELECT wait_for_backend_termination(:remote_pid);
+ wait_for_backend_termination
+------------------------------
+
+(1 row)
+
+ROLLBACK TO SAVEPOINT s;
+SELECT pid FROM remote_backend_pid;
+ERROR: connection to server "loopback" cannot be used due to abort cleanup failure
+ROLLBACK TO SAVEPOINT s;
+RELEASE SAVEPOINT s;
+CLOSE c;
+ERROR: no connection to the server
+CONTEXT: remote SQL command: CLOSE c1
+ABORT;
+RESET client_min_messages;
+DROP FUNCTION wait_for_backend_termination(int);
+DROP FOREIGN TABLE remote_backend_pid;
+DROP VIEW my_backend_pid;
-- Clean up
\set VERBOSITY default
RESET debug_discard_caches;
+
+-- ===================================================================
+-- test cleanup of failed connections on abort
+-- ===================================================================
+
+CREATE VIEW my_backend_pid (pid) AS SELECT pg_backend_pid();
+CREATE FOREIGN TABLE remote_backend_pid (pid int)
+ SERVER loopback OPTIONS (table_name 'my_backend_pid');
+CREATE FUNCTION wait_for_backend_termination(int) RETURNS void AS $$
+ BEGIN
+ WHILE (SELECT count(*) FROM pg_stat_activity WHERE pid = $1) > 0
+ LOOP
+ PERFORM pg_stat_clear_snapshot();
+ END LOOP;
+ END
+$$ LANGUAGE plpgsql;
+
+SET client_min_messages = 'ERROR';
+
+BEGIN;
+DECLARE c CURSOR FOR SELECT * FROM ft1 ORDER BY c1;
+SAVEPOINT s;
+SELECT pid AS remote_pid FROM remote_backend_pid \gset
+SELECT pg_terminate_backend(:remote_pid);
+SELECT wait_for_backend_termination(:remote_pid);
+ROLLBACK TO SAVEPOINT s;
+SELECT pid FROM remote_backend_pid;
+ROLLBACK TO SAVEPOINT s;
+RELEASE SAVEPOINT s;
+FETCH c;
+ABORT;
+
+BEGIN;
+DECLARE c CURSOR FOR SELECT * FROM ft1 ORDER BY c1;
+FETCH c;
+SAVEPOINT s;
+SELECT pid AS remote_pid FROM remote_backend_pid \gset
+SELECT pg_terminate_backend(:remote_pid);
+SELECT wait_for_backend_termination(:remote_pid);
+ROLLBACK TO SAVEPOINT s;
+SELECT pid FROM remote_backend_pid;
+ROLLBACK TO SAVEPOINT s;
+RELEASE SAVEPOINT s;
+CLOSE c;
+ABORT;
+
+RESET client_min_messages;
+
+DROP FUNCTION wait_for_backend_termination(int);
+DROP FOREIGN TABLE remote_backend_pid;
+DROP VIEW my_backend_pid;