]> git.ipfire.org Git - thirdparty/postgresql.git/commitdiff
pg_verifybackup: Enable WAL parsing for tar-format backups
authorAndrew Dunstan <andrew@dunslane.net>
Fri, 20 Mar 2026 19:31:35 +0000 (15:31 -0400)
committerAndrew Dunstan <andrew@dunslane.net>
Fri, 20 Mar 2026 19:31:35 +0000 (15:31 -0400)
Now that pg_waldump supports reading WAL from tar archives, remove the
restriction that forced --no-parse-wal for tar-format backups.

pg_verifybackup now automatically locates the WAL archive: it looks for
a separate pg_wal.tar first, then falls back to the main base.tar.  A
new --wal-path option (replacing the old --wal-directory, which is kept
as a silent alias) accepts either a directory or a tar archive path.

The default WAL directory preparation is deferred until the backup
format is known, since tar-format backups resolve the WAL path
differently from plain-format ones.

Author: Amul Sul <sulamul@gmail.com>
Reviewed-by: Robert Haas <robertmhaas@gmail.com>
Reviewed-by: Jakub Wartak <jakub.wartak@enterprisedb.com>
Reviewed-by: Chao Li <li.evan.chao@gmail.com>
Reviewed-by: Euler Taveira <euler@eulerto.com>
Reviewed-by: Andrew Dunstan <andrew@dunslane.net>
discussion: https://postgr.es/m/CAAJ_b94bqdWN3h2J-PzzzQ2Npbwct5ZQHggn_QoYGhC2rn-=WQ@mail.gmail.com

doc/src/sgml/ref/pg_verifybackup.sgml
src/bin/pg_verifybackup/pg_verifybackup.c
src/bin/pg_verifybackup/t/002_algorithm.pl
src/bin/pg_verifybackup/t/003_corruption.pl
src/bin/pg_verifybackup/t/007_wal.pl
src/bin/pg_verifybackup/t/008_untar.pl
src/bin/pg_verifybackup/t/010_client_untar.pl

index 61c12975e4ad5b065691697a135d87c5e4ca28ad..1695cfe91c8bd58d55d53a7b92ab24253ca88036 100644 (file)
@@ -36,10 +36,7 @@ PostgreSQL documentation
    <literal>backup_manifest</literal> generated by the server at the time
    of the backup. The backup may be stored either in the "plain" or the "tar"
    format; this includes tar-format backups compressed with any algorithm
-   supported by <application>pg_basebackup</application>. However, at present,
-   <literal>WAL</literal> verification is supported only for plain-format
-   backups. Therefore, if the backup is stored in tar-format, the
-   <literal>-n, --no-parse-wal</literal> option should be used.
+   supported by <application>pg_basebackup</application>.
   </para>
 
   <para>
@@ -261,12 +258,13 @@ PostgreSQL documentation
 
      <varlistentry>
       <term><option>-w <replaceable class="parameter">path</replaceable></option></term>
-      <term><option>--wal-directory=<replaceable class="parameter">path</replaceable></option></term>
+      <term><option>--wal-path=<replaceable class="parameter">path</replaceable></option></term>
       <listitem>
        <para>
-        Try to parse WAL files stored in the specified directory, rather than
-        in <literal>pg_wal</literal>. This may be useful if the backup is
-        stored in a separate location from the WAL archive.
+        Try to parse WAL files stored in the specified directory or tar
+        archive, rather than in <literal>pg_wal</literal>. This may be
+        useful if the backup is stored in a separate location from the WAL
+        archive.
        </para>
       </listitem>
      </varlistentry>
index 31f606c45b1b4125178c80071cc35147a739d9b5..b60ab8739d5f1ef258517806bd4ed63a8e0a08cd 100644 (file)
@@ -74,7 +74,9 @@ pg_noreturn static void report_manifest_error(JsonManifestParseContext *context,
                                                                                          const char *fmt,...)
                        pg_attribute_printf(2, 3);
 
-static void verify_tar_backup(verifier_context *context, DIR *dir);
+static void verify_tar_backup(verifier_context *context, DIR *dir,
+                                                         char **base_archive_path,
+                                                         char **wal_archive_path);
 static void verify_plain_backup_directory(verifier_context *context,
                                                                                  char *relpath, char *fullpath,
                                                                                  DIR *dir);
@@ -83,7 +85,9 @@ static void verify_plain_backup_file(verifier_context *context, char *relpath,
 static void verify_control_file(const char *controlpath,
                                                                uint64 manifest_system_identifier);
 static void precheck_tar_backup_file(verifier_context *context, char *relpath,
-                                                                        char *fullpath, SimplePtrList *tarfiles);
+                                                                        char *fullpath, SimplePtrList *tarfiles,
+                                                                        char **base_archive_path,
+                                                                        char **wal_archive_path);
 static void verify_tar_file(verifier_context *context, char *relpath,
                                                        char *fullpath, astreamer *streamer);
 static void report_extra_backup_files(verifier_context *context);
@@ -93,7 +97,7 @@ static void verify_file_checksum(verifier_context *context,
                                                                 uint8 *buffer);
 static void parse_required_wal(verifier_context *context,
                                                           char *pg_waldump_path,
-                                                          char *wal_directory);
+                                                          char *wal_path);
 static astreamer *create_archive_verifier(verifier_context *context,
                                                                                  char *archive_name,
                                                                                  Oid tblspc_oid,
@@ -126,7 +130,8 @@ main(int argc, char **argv)
                {"progress", no_argument, NULL, 'P'},
                {"quiet", no_argument, NULL, 'q'},
                {"skip-checksums", no_argument, NULL, 's'},
-               {"wal-directory", required_argument, NULL, 'w'},
+               {"wal-path", required_argument, NULL, 'w'},
+               {"wal-directory", required_argument, NULL, 'w'},        /* deprecated */
                {NULL, 0, NULL, 0}
        };
 
@@ -135,7 +140,9 @@ main(int argc, char **argv)
        char       *manifest_path = NULL;
        bool            no_parse_wal = false;
        bool            quiet = false;
-       char       *wal_directory = NULL;
+       char       *wal_path = NULL;
+       char       *base_archive_path = NULL;
+       char       *wal_archive_path = NULL;
        char       *pg_waldump_path = NULL;
        DIR                *dir;
 
@@ -221,8 +228,8 @@ main(int argc, char **argv)
                                context.skip_checksums = true;
                                break;
                        case 'w':
-                               wal_directory = pstrdup(optarg);
-                               canonicalize_path(wal_directory);
+                               wal_path = pstrdup(optarg);
+                               canonicalize_path(wal_path);
                                break;
                        default:
                                /* getopt_long already emitted a complaint */
@@ -285,10 +292,6 @@ main(int argc, char **argv)
                manifest_path = psprintf("%s/backup_manifest",
                                                                 context.backup_directory);
 
-       /* By default, look for the WAL in the backup directory, too. */
-       if (wal_directory == NULL)
-               wal_directory = psprintf("%s/pg_wal", context.backup_directory);
-
        /*
         * Try to read the manifest. We treat any errors encountered while parsing
         * the manifest as fatal; there doesn't seem to be much point in trying to
@@ -331,17 +334,6 @@ main(int argc, char **argv)
                pfree(path);
        }
 
-       /*
-        * XXX: In the future, we should consider enhancing pg_waldump to read WAL
-        * files from an archive.
-        */
-       if (!no_parse_wal && context.format == 't')
-       {
-               pg_log_error("pg_waldump cannot read tar files");
-               pg_log_error_hint("You must use -n/--no-parse-wal when verifying a tar-format backup.");
-               exit(1);
-       }
-
        /*
         * Perform the appropriate type of verification appropriate based on the
         * backup format. This will close 'dir'.
@@ -350,7 +342,7 @@ main(int argc, char **argv)
                verify_plain_backup_directory(&context, NULL, context.backup_directory,
                                                                          dir);
        else
-               verify_tar_backup(&context, dir);
+               verify_tar_backup(&context, dir, &base_archive_path, &wal_archive_path);
 
        /*
         * The "matched" flag should now be set on every entry in the hash table.
@@ -368,12 +360,35 @@ main(int argc, char **argv)
        if (context.format == 'p' && !context.skip_checksums)
                verify_backup_checksums(&context);
 
+       /*
+        * By default, WAL files are expected to be found in the backup directory
+        * for plain-format backups. In the case of tar-format backups, if a
+        * separate WAL archive is not found, the WAL files are most likely
+        * included within the main data directory archive.
+        */
+       if (wal_path == NULL)
+       {
+               if (context.format == 'p')
+                       wal_path = psprintf("%s/pg_wal", context.backup_directory);
+               else if (wal_archive_path)
+                       wal_path = wal_archive_path;
+               else if (base_archive_path)
+                       wal_path = base_archive_path;
+               else
+               {
+                       pg_log_error("WAL archive not found");
+                       pg_log_error_hint("Specify the correct path using the option -w/--wal-path.  "
+                                                         "Or you must use -n/--no-parse-wal when verifying a tar-format backup.");
+                       exit(1);
+               }
+       }
+
        /*
         * Try to parse the required ranges of WAL records, unless we were told
         * not to do so.
         */
        if (!no_parse_wal)
-               parse_required_wal(&context, pg_waldump_path, wal_directory);
+               parse_required_wal(&context, pg_waldump_path, wal_path);
 
        /*
         * If everything looks OK, tell the user this, unless we were asked to
@@ -787,7 +802,8 @@ verify_control_file(const char *controlpath, uint64 manifest_system_identifier)
  * close when we're done with it.
  */
 static void
-verify_tar_backup(verifier_context *context, DIR *dir)
+verify_tar_backup(verifier_context *context, DIR *dir, char **base_archive_path,
+                                 char **wal_archive_path)
 {
        struct dirent *dirent;
        SimplePtrList tarfiles = {NULL, NULL};
@@ -816,7 +832,8 @@ verify_tar_backup(verifier_context *context, DIR *dir)
                        char       *fullpath;
 
                        fullpath = psprintf("%s/%s", context->backup_directory, filename);
-                       precheck_tar_backup_file(context, filename, fullpath, &tarfiles);
+                       precheck_tar_backup_file(context, filename, fullpath, &tarfiles,
+                                                                        base_archive_path, wal_archive_path);
                        pfree(fullpath);
                }
        }
@@ -875,17 +892,21 @@ verify_tar_backup(verifier_context *context, DIR *dir)
  *
  * The arguments to this function are mostly the same as the
  * verify_plain_backup_file. The additional argument outputs a list of valid
- * tar files.
+ * tar files, along with the full paths to the main archive and the WAL
+ * directory archive.
  */
 static void
 precheck_tar_backup_file(verifier_context *context, char *relpath,
-                                                char *fullpath, SimplePtrList *tarfiles)
+                                                char *fullpath, SimplePtrList *tarfiles,
+                                                char **base_archive_path, char **wal_archive_path)
 {
        struct stat sb;
        Oid                     tblspc_oid = InvalidOid;
        pg_compress_algorithm compress_algorithm;
        tar_file   *tar;
        char       *suffix = NULL;
+       bool            is_base_archive = false;
+       bool            is_wal_archive = false;
 
        /* Should be tar format backup */
        Assert(context->format == 't');
@@ -918,9 +939,15 @@ precheck_tar_backup_file(verifier_context *context, char *relpath,
         * extension such as .gz, .lz4, or .zst.
         */
        if (strncmp("base", relpath, 4) == 0)
+       {
                suffix = relpath + 4;
+               is_base_archive = true;
+       }
        else if (strncmp("pg_wal", relpath, 6) == 0)
+       {
                suffix = relpath + 6;
+               is_wal_archive = true;
+       }
        else
        {
                /* Expected a <tablespaceoid>.tar file here. */
@@ -953,8 +980,13 @@ precheck_tar_backup_file(verifier_context *context, char *relpath,
         * Ignore WALs, as reading and verification will be handled through
         * pg_waldump.
         */
-       if (strncmp("pg_wal", relpath, 6) == 0)
+       if (is_wal_archive)
+       {
+               *wal_archive_path = pstrdup(fullpath);
                return;
+       }
+       else if (is_base_archive)
+               *base_archive_path = pstrdup(fullpath);
 
        /*
         * Append the information to the list for complete verification at a later
@@ -1188,7 +1220,7 @@ verify_file_checksum(verifier_context *context, manifest_file *m,
  */
 static void
 parse_required_wal(verifier_context *context, char *pg_waldump_path,
-                                  char *wal_directory)
+                                  char *wal_path)
 {
        manifest_data *manifest = context->manifest;
        manifest_wal_range *this_wal_range = manifest->first_wal_range;
@@ -1198,7 +1230,7 @@ parse_required_wal(verifier_context *context, char *pg_waldump_path,
                char       *pg_waldump_cmd;
 
                pg_waldump_cmd = psprintf("\"%s\" --quiet --path=\"%s\" --timeline=%u --start=%X/%08X --end=%X/%08X\n",
-                                                                 pg_waldump_path, wal_directory, this_wal_range->tli,
+                                                                 pg_waldump_path, wal_path, this_wal_range->tli,
                                                                  LSN_FORMAT_ARGS(this_wal_range->start_lsn),
                                                                  LSN_FORMAT_ARGS(this_wal_range->end_lsn));
                fflush(NULL);
@@ -1366,7 +1398,7 @@ usage(void)
        printf(_("  -P, --progress              show progress information\n"));
        printf(_("  -q, --quiet                 do not print any output, except for errors\n"));
        printf(_("  -s, --skip-checksums        skip checksum verification\n"));
-       printf(_("  -w, --wal-directory=PATH    use specified path for WAL files\n"));
+       printf(_("  -w, --wal-path=PATH         use specified path for WAL files\n"));
        printf(_("  -V, --version               output version information, then exit\n"));
        printf(_("  -?, --help                  show this help, then exit\n"));
        printf(_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
index 0556191ec9d760570e44e39f31b891dd252e67eb..edc515d5904e2f087d4cfb80e73c947552f110c9 100644 (file)
@@ -30,10 +30,6 @@ sub test_checksums
        {
                # Add switch to get a tar-format backup
                push @backup, ('--format' => 'tar');
-
-               # Add switch to skip WAL verification, which is not yet supported for
-               # tar-format backups
-               push @verify, ('--no-parse-wal');
        }
 
        # A backup with a bogus algorithm should fail.
index b1d65b8aa0fa340b052eeb17b3277effa468ab39..882d75d9dc2a204f272527d4c5343aafed2a832d 100644 (file)
@@ -193,10 +193,8 @@ for my $scenario (@scenario)
                        command_ok([ $tar, '-cf' => "$tar_backup_path/base.tar", '.' ]);
                        chdir($cwd) || die "chdir: $!";
 
-                       # Now check that the backup no longer verifies. We must use -n
-                       # here, because pg_waldump can't yet read WAL from a tarfile.
                        command_fails_like(
-                               [ 'pg_verifybackup', '--no-parse-wal', $tar_backup_path ],
+                               [ 'pg_verifybackup', $tar_backup_path ],
                                $scenario->{'fails_like'},
                                "corrupt backup fails verification: $name");
 
index 79087a1f6be3a7022e12f4c07d7db6a70a40e937..0e0377bfaccaa8e1f02914a78dc2a391af470155 100644 (file)
@@ -42,10 +42,10 @@ command_ok([ 'pg_verifybackup', '--no-parse-wal', $backup_path ],
 command_ok(
        [
                'pg_verifybackup',
-               '--wal-directory' => $relocated_pg_wal,
+               '--wal-path' => $relocated_pg_wal,
                $backup_path
        ],
-       '--wal-directory can be used to specify WAL directory');
+       '--wal-path can be used to specify WAL directory');
 
 # Move directory back to original location.
 rename($relocated_pg_wal, $original_pg_wal) || die "rename pg_wal back: $!";
@@ -90,4 +90,20 @@ command_ok(
        [ 'pg_verifybackup', $backup_path2 ],
        'valid base backup with timeline > 1');
 
+# Test WAL verification for a tar-format backup with a separate pg_wal.tar,
+# as produced by pg_basebackup --format=tar --wal-method=stream.
+my $backup_path3 = $primary->backup_dir . '/test_tar_wal';
+$primary->command_ok(
+       [
+               'pg_basebackup',
+               '--pgdata' => $backup_path3,
+               '--no-sync',
+               '--format' => 'tar',
+               '--checkpoint' => 'fast'
+       ],
+       "tar backup with separate pg_wal.tar");
+command_ok(
+       [ 'pg_verifybackup', $backup_path3 ],
+       'WAL verification succeeds with separate pg_wal.tar');
+
 done_testing();
index ae67ae85a316a955440f18a4efafa9d62de1550a..161c08c190d1f45e393141858a6308cbc21b48d6 100644 (file)
@@ -47,7 +47,6 @@ my $tsoid = $primary->safe_psql(
                SELECT oid FROM pg_tablespace WHERE spcname = 'regress_ts1'));
 
 my $backup_path = $primary->backup_dir . '/server-backup';
-my $extract_path = $primary->backup_dir . '/extracted-backup';
 
 my @test_configuration = (
        {
@@ -123,14 +122,12 @@ for my $tc (@test_configuration)
                # Verify tar backup.
                $primary->command_ok(
                        [
-                               'pg_verifybackup', '--no-parse-wal',
-                               '--exit-on-error', $backup_path,
+                               'pg_verifybackup', '--exit-on-error', $backup_path,
                        ],
                        "verify backup, compression $method");
 
                # Cleanup.
                rmtree($backup_path);
-               rmtree($extract_path);
        }
 }
 
index 1ac7b5db75aa997a8fc0ef296ab4d3c0a03223fb..9670fbe4fda339fb19edf52d0f76d4976fa04a8d 100644 (file)
@@ -32,7 +32,6 @@ print $jf $junk_data;
 close $jf;
 
 my $backup_path = $primary->backup_dir . '/client-backup';
-my $extract_path = $primary->backup_dir . '/extracted-backup';
 
 my @test_configuration = (
        {
@@ -137,13 +136,11 @@ for my $tc (@test_configuration)
                # Verify tar backup.
                $primary->command_ok(
                        [
-                               'pg_verifybackup', '--no-parse-wal',
-                               '--exit-on-error', $backup_path,
+                               'pg_verifybackup', '--exit-on-error', $backup_path,
                        ],
                        "verify backup, compression $method");
 
                # Cleanup.
-               rmtree($extract_path);
                rmtree($backup_path);
        }
 }