From 403d82dd54b58f9c1c64f7f7b00f396ce903a257 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Tue, 31 Jan 2023 12:47:20 +0900 Subject: [PATCH] Remove recovery test 011_crash_recovery.pl This test has been added as of 857ee8e that has introduced the SQL function txid_status(), with the purpose of checking that a transaction ID still in-progress during a crash is correctly marked as aborted after recovery finishes. This test is unstable, and some configuration scenarios may that easier to reproduce (wal_level=minimal, wal_compression=on) because the WAL holding the information about the in-progress transaction ID may not have made it to disk yet, hence a post-crash recovery may cause the same XID to be reused, triggering a test failure. We have discussed a few approaches, like making this function force a WAL flush to make it reliable across crashes, but we don't want to pay a performance penalty in some scenarios, as well. The test could have been tweaked to enforce a checkpoint but that actually breaks the promise of the test to rely on a stable result of txid_status() after a crash. This issue has been reported a few times across the past years, with an original report from Kyotaro Horiguchi. The buildfarm machines tanager, hachi and gokiburi enable wal_compression, and fail on this test periodically. Discussion: https://postgr.es/m/3163112.1674762209@sss.pgh.pa.us Discussion: https://postgr.es/m/20210305.115011.558061052471425531.horikyota.ntt@gmail.com Backpatch-through: 11 --- src/test/recovery/t/011_crash_recovery.pl | 68 ----------------------- 1 file changed, 68 deletions(-) delete mode 100644 src/test/recovery/t/011_crash_recovery.pl diff --git a/src/test/recovery/t/011_crash_recovery.pl b/src/test/recovery/t/011_crash_recovery.pl deleted file mode 100644 index 5dc52412cad..00000000000 --- a/src/test/recovery/t/011_crash_recovery.pl +++ /dev/null @@ -1,68 +0,0 @@ -# -# Tests relating to PostgreSQL crash recovery and redo -# -use strict; -use warnings; -use PostgresNode; -use TestLib; -use Test::More; -use Config; -if ($Config{osname} eq 'MSWin32') -{ - - # some Windows Perls at least don't like IPC::Run's start/kill_kill regime. - plan skip_all => "Test fails on Windows perl"; -} -else -{ - plan tests => 3; -} - -my $node = get_new_node('master'); -$node->init(allows_streaming => 1); -$node->start; - -my ($stdin, $stdout, $stderr) = ('', '', ''); - -# Ensure that txid_status reports 'aborted' for xacts -# that were in-progress during crash. To do that, we need -# an xact to be in-progress when we crash and we need to know -# its xid. -my $tx = IPC::Run::start( - [ - 'psql', '-X', '-qAt', '-v', 'ON_ERROR_STOP=1', '-f', '-', '-d', - $node->connstr('postgres') - ], - '<', - \$stdin, - '>', - \$stdout, - '2>', - \$stderr); -$stdin .= q[ -BEGIN; -CREATE TABLE mine(x integer); -SELECT txid_current(); -]; -$tx->pump until $stdout =~ /[[:digit:]]+[\r\n]$/; - -# Status should be in-progress -my $xid = $stdout; -chomp($xid); - -is($node->safe_psql('postgres', qq[SELECT txid_status('$xid');]), - 'in progress', 'own xid is in-progres'); - -# Crash and restart the postmaster -$node->stop('immediate'); -$node->start; - -# Make sure we really got a new xid -cmp_ok($node->safe_psql('postgres', 'SELECT txid_current()'), - '>', $xid, 'new xid after restart is greater'); - -# and make sure we show the in-progress xact as aborted -is($node->safe_psql('postgres', qq[SELECT txid_status('$xid');]), - 'aborted', 'xid is aborted after crash'); - -$tx->kill_kill; -- 2.39.5