]> git.ipfire.org Git - thirdparty/postgresql.git/commitdiff
Harden astreamer tar parsing logic against archives it can't handle.
authorTom Lane <tgl@sss.pgh.pa.us>
Thu, 2 Apr 2026 16:20:26 +0000 (12:20 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Thu, 2 Apr 2026 16:20:26 +0000 (12:20 -0400)
Previously, there was essentially no verification in this code that
the input is a tar file at all, let alone that it fits into the
subset of valid tar files that we can handle.  This was exposed by
the discovery that we couldn't handle files that FreeBSD's tar
makes, because it's fairly aggressive about converting sparse WAL
files into sparse tar entries.  To fix:

* Bail out if we find a pax extension header.  This covers the
sparse-file case, and also protects us against scenarios where
the pax header changes other file properties that we care about.
(Eventually we may extend the logic to actually handle such
headers, but that won't happen in time for v19.)

* Be more wary about tar file type codes in general: do not assume
that anything that's neither a directory nor a symlink must be a
regular file.  Instead, we just ignore entries that are none of the
three supported types.

* Apply pg_dump's isValidTarHeader to verify that a purported
header block is actually in tar format.  To make this possible,
move isValidTarHeader into src/port/tar.c, which is probably where
it should have been since that file was created.

I also took the opportunity to const-ify the arguments of
isValidTarHeader and tarChecksum, and to use symbols not hard-wired
constants inside tarChecksum.

Back-patch to v18 but not further.  Although this code exists inside
pg_basebackup in older branches, it's not really exposed in that
usage to tar files that weren't generated by our own code, so it
doesn't seem worth back-porting these changes across 3c9056981
and f80b09bac.  I did choose to include a back-patch of 5868372bb
into v18 though, to minimize cosmetic differences between these
two branches.

Author: Tom Lane <tgl@sss.pgh.pa.us>
Reviewed-by: Thomas Munro <thomas.munro@gmail.com>
Discussion: https://postgr.es/m/3049460.1775067940@sss.pgh.pa.us>
Backpatch-through: 18

src/bin/pg_basebackup/astreamer_inject.c
src/bin/pg_dump/pg_backup_archiver.c
src/bin/pg_dump/pg_backup_archiver.h
src/bin/pg_dump/pg_backup_tar.c
src/bin/pg_verifybackup/astreamer_verify.c
src/fe_utils/astreamer_file.c
src/fe_utils/astreamer_tar.c
src/include/fe_utils/astreamer.h
src/include/pgtar.h
src/port/tar.c

index 15334e458ad1e581be62d9a828659e8458f539a3..3fb3d4ecfd51082357d00dfb8d392878b0d824a5 100644 (file)
@@ -224,8 +224,9 @@ astreamer_inject_file(astreamer *streamer, char *pathname, char *data,
        strlcpy(member.pathname, pathname, MAXPGPATH);
        member.size = len;
        member.mode = pg_file_create_mode;
+       member.is_regular = true;
        member.is_directory = false;
-       member.is_link = false;
+       member.is_symlink = false;
        member.linktarget[0] = '\0';
 
        /*
index 8456992c33c065f5643135e377434d12f045ee3e..4293e20b20e05d7102b8a435ade96bde01887c1d 100644 (file)
@@ -42,6 +42,7 @@
 #include "pg_backup_archiver.h"
 #include "pg_backup_db.h"
 #include "pg_backup_utils.h"
+#include "pgtar.h"
 
 #define TEXT_DUMP_HEADER "--\n-- PostgreSQL database dump\n--\n\n"
 #define TEXT_DUMPALL_HEADER "--\n-- PostgreSQL database cluster dump\n--\n\n"
@@ -2349,7 +2350,7 @@ _discoverArchiveFormat(ArchiveHandle *AH)
                }
 
                if (!isValidTarHeader(AH->lookahead))
-                       pg_fatal("input file does not appear to be a valid archive");
+                       pg_fatal("input file does not appear to be a valid tar archive");
 
                AH->format = archTar;
        }
index 325b53fc9bd4bb0514259d467a666e0c93696142..c01d450697f9234fee6a822df6ac0d6237c80962 100644 (file)
@@ -464,8 +464,6 @@ extern void InitArchiveFmt_Null(ArchiveHandle *AH);
 extern void InitArchiveFmt_Directory(ArchiveHandle *AH);
 extern void InitArchiveFmt_Tar(ArchiveHandle *AH);
 
-extern bool isValidTarHeader(char *header);
-
 extern void ReconnectToServer(ArchiveHandle *AH, const char *dbname);
 extern void IssueCommandPerBlob(ArchiveHandle *AH, TocEntry *te,
                                                                const char *cmdBegin, const char *cmdEnd);
index b5ba3b46dd999f5578caed6d2237388d2078c72c..ec42a2cb19d049465d944a8e3f2c8f3b28c7a7c7 100644 (file)
@@ -984,31 +984,6 @@ tarPrintf(TAR_MEMBER *th, const char *fmt,...)
        return (int) cnt;
 }
 
-bool
-isValidTarHeader(char *header)
-{
-       int                     sum;
-       int                     chk = tarChecksum(header);
-
-       sum = read_tar_number(&header[TAR_OFFSET_CHECKSUM], 8);
-
-       if (sum != chk)
-               return false;
-
-       /* POSIX tar format */
-       if (memcmp(&header[TAR_OFFSET_MAGIC], "ustar\0", 6) == 0 &&
-               memcmp(&header[TAR_OFFSET_VERSION], "00", 2) == 0)
-               return true;
-       /* GNU tar format */
-       if (memcmp(&header[TAR_OFFSET_MAGIC], "ustar  \0", 8) == 0)
-               return true;
-       /* not-quite-POSIX format written by pre-9.3 pg_dump */
-       if (memcmp(&header[TAR_OFFSET_MAGIC], "ustar00\0", 8) == 0)
-               return true;
-
-       return false;
-}
-
 /* Given the member, write the TAR header & copy the file */
 static void
 _tarAddFile(ArchiveHandle *AH, TAR_MEMBER *th)
index 33cf67670a70d7557473cb605aa1daa01749da51..ba49a0520e22f8ebec9c74a297ba9c62e218d446 100644 (file)
@@ -165,7 +165,7 @@ member_verify_header(astreamer *streamer, astreamer_member *member)
        char            pathname[MAXPGPATH];
 
        /* We are only interested in normal files. */
-       if (member->is_directory || member->is_link)
+       if (!member->is_regular)
                return;
 
        /*
index b6cbd343f99b82f6b4ce259595fdb7a25c45f777..e1b339ecc8bd8eeb48482e1c5895dcf305cddce6 100644 (file)
@@ -228,9 +228,13 @@ astreamer_extractor_content(astreamer *streamer, astreamer_member *member,
                                mystreamer->filename[fnamelen - 1] = '\0';
 
                        /* Dispatch based on file type. */
-                       if (member->is_directory)
+                       if (member->is_regular)
+                               mystreamer->file =
+                                       create_file_for_extract(mystreamer->filename,
+                                                                                       member->mode);
+                       else if (member->is_directory)
                                extract_directory(mystreamer->filename, member->mode);
-                       else if (member->is_link)
+                       else if (member->is_symlink)
                        {
                                const char *linktarget = member->linktarget;
 
@@ -238,10 +242,6 @@ astreamer_extractor_content(astreamer *streamer, astreamer_member *member,
                                        linktarget = mystreamer->link_map(linktarget);
                                extract_link(mystreamer->filename, linktarget);
                        }
-                       else
-                               mystreamer->file =
-                                       create_file_for_extract(mystreamer->filename,
-                                                                                       member->mode);
 
                        /* Report output file change. */
                        if (mystreamer->report_output_file)
index 8390c0b49f10e6fbcce936c5ad3dc46a6b347981..ba446553e12c1e00fd7843e99c04c372b1679d5e 100644 (file)
@@ -237,12 +237,16 @@ astreamer_tar_parser_content(astreamer *streamer, astreamer_member *member,
 
                                /*
                                 * We've seen an end-of-archive indicator, so anything more is
-                                * buffered and sent as part of the archive trailer. But we
-                                * don't expect more than 2 blocks.
+                                * buffered and sent as part of the archive trailer.
+                                *
+                                * Per POSIX, the last physical block of a tar archive is
+                                * always full-sized, so there may be undefined data after the
+                                * two zero blocks that mark end-of-archive.  GNU tar, for
+                                * example, zero-pads to a 10kB boundary by default.  We just
+                                * buffer whatever we receive and pass it along at finalize
+                                * time.
                                 */
                                astreamer_buffer_bytes(streamer, &data, &len, len);
-                               if (len > 2 * TAR_BLOCK_SIZE)
-                                       pg_fatal("tar file trailer exceeds 2 blocks");
                                return;
 
                        default:
@@ -256,7 +260,8 @@ astreamer_tar_parser_content(astreamer *streamer, astreamer_member *member,
  * Parse a file header within a tar stream.
  *
  * The return value is true if we found a file header and passed it on to the
- * next astreamer; it is false if we have reached the archive trailer.
+ * next astreamer; it is false if we have found the archive trailer.
+ * We throw error if we see invalid data.
  */
 static bool
 astreamer_tar_header(astreamer_tar_parser *mystreamer)
@@ -268,6 +273,9 @@ astreamer_tar_header(astreamer_tar_parser *mystreamer)
 
        Assert(mystreamer->base.bbs_buffer.len == TAR_BLOCK_SIZE);
 
+       /* Zero out fields of *member, just for consistency. */
+       memset(member, 0, sizeof(astreamer_member));
+
        /* Check whether we've got a block of all zero bytes. */
        for (i = 0; i < TAR_BLOCK_SIZE; ++i)
        {
@@ -285,6 +293,12 @@ astreamer_tar_header(astreamer_tar_parser *mystreamer)
        if (!has_nonzero_byte)
                return false;
 
+       /*
+        * Verify that we have a reasonable-looking header.
+        */
+       if (!isValidTarHeader(buffer))
+               pg_fatal("input file does not appear to be a valid tar archive");
+
        /*
         * Parse key fields out of the header.
         */
@@ -295,12 +309,28 @@ astreamer_tar_header(astreamer_tar_parser *mystreamer)
        member->mode = read_tar_number(&buffer[TAR_OFFSET_MODE], 8);
        member->uid = read_tar_number(&buffer[TAR_OFFSET_UID], 8);
        member->gid = read_tar_number(&buffer[TAR_OFFSET_GID], 8);
-       member->is_directory =
-               (buffer[TAR_OFFSET_TYPEFLAG] == TAR_FILETYPE_DIRECTORY);
-       member->is_link =
-               (buffer[TAR_OFFSET_TYPEFLAG] == TAR_FILETYPE_SYMLINK);
-       if (member->is_link)
-               strlcpy(member->linktarget, &buffer[TAR_OFFSET_LINKNAME], 100);
+
+       switch (buffer[TAR_OFFSET_TYPEFLAG])
+       {
+               case TAR_FILETYPE_PLAIN:
+               case TAR_FILETYPE_PLAIN_OLD:
+                       member->is_regular = true;
+                       break;
+               case TAR_FILETYPE_DIRECTORY:
+                       member->is_directory = true;
+                       break;
+               case TAR_FILETYPE_SYMLINK:
+                       member->is_symlink = true;
+                       strlcpy(member->linktarget, &buffer[TAR_OFFSET_LINKNAME], 100);
+                       break;
+               case TAR_FILETYPE_PAX_EXTENDED:
+               case TAR_FILETYPE_PAX_EXTENDED_GLOBAL:
+                       pg_fatal("pax extensions to tar format are not supported");
+                       break;
+               default:
+                       /* For special filetypes, set none of the three is_xxx flags */
+                       break;
+       }
 
        /* Compute number of padding bytes. */
        mystreamer->pad_bytes_expected = tarPaddingBytesRequired(member->size);
index 0e0031741fa515dca09c4f45ff0daffac3c0f6b0..005141ea8b3bff267e488f3c57135a59f060ca4c 100644 (file)
@@ -83,8 +83,10 @@ typedef struct
        mode_t          mode;
        uid_t           uid;
        gid_t           gid;
+       /* note: special filetypes will set none of these flags */
+       bool            is_regular;
        bool            is_directory;
-       bool            is_link;
+       bool            is_symlink;
        char            linktarget[MAXPGPATH];
 } astreamer_member;
 
index b2677578a3d1dca3e6583f4235d7fac7bec59e25..84c98dd3f6ecaef8ae2ea2ce554196855cd420ea 100644 (file)
@@ -55,11 +55,15 @@ enum tarHeaderOffset
        /* last 12 bytes of the 512-byte block are unassigned */
 };
 
+/* See POSIX (not all the standard file type codes are listed here) */
 enum tarFileType
 {
        TAR_FILETYPE_PLAIN = '0',
+       TAR_FILETYPE_PLAIN_OLD = '\0',  /* backwards compatibility, per POSIX */
        TAR_FILETYPE_SYMLINK = '2',
        TAR_FILETYPE_DIRECTORY = '5',
+       TAR_FILETYPE_PAX_EXTENDED = 'x',
+       TAR_FILETYPE_PAX_EXTENDED_GLOBAL = 'g',
 };
 
 extern enum tarError tarCreateHeader(char *h, const char *filename,
@@ -68,7 +72,8 @@ extern enum tarError tarCreateHeader(char *h, const char *filename,
                                                                         time_t mtime);
 extern uint64 read_tar_number(const char *s, int len);
 extern void print_tar_number(char *s, int len, uint64 val);
-extern int     tarChecksum(char *header);
+extern int     tarChecksum(const char *header);
+extern bool isValidTarHeader(const char *header);
 
 /*
  * Compute the number of padding bytes required for an entry in a tar
index 592b4fb7b0f4e08fb9c9efdee6439e54661aa600..fee9dbbf5be2d70e8f0ada2d82aeace301873909 100644 (file)
@@ -87,7 +87,7 @@ read_tar_number(const char *s, int len)
  * be 512 bytes, per the tar standard.
  */
 int
-tarChecksum(char *header)
+tarChecksum(const char *header)
 {
        int                     i,
                                sum;
@@ -95,15 +95,44 @@ tarChecksum(char *header)
        /*
         * Per POSIX, the checksum is the simple sum of all bytes in the header,
         * treating the bytes as unsigned, and treating the checksum field (at
-        * offset 148) as though it contained 8 spaces.
+        * offset TAR_OFFSET_CHECKSUM) as though it contained 8 spaces.
         */
        sum = 8 * ' ';                          /* presumed value for checksum field */
-       for (i = 0; i < 512; i++)
-               if (i < 148 || i >= 156)
+       for (i = 0; i < TAR_BLOCK_SIZE; i++)
+               if (i < TAR_OFFSET_CHECKSUM || i >= TAR_OFFSET_CHECKSUM + 8)
                        sum += 0xFF & header[i];
        return sum;
 }
 
+/*
+ * Check validity of a tar header (assumed to be 512 bytes long).
+ * We verify the checksum and the magic number / version.
+ */
+bool
+isValidTarHeader(const char *header)
+{
+       int                     sum;
+       int                     chk = tarChecksum(header);
+
+       sum = read_tar_number(&header[TAR_OFFSET_CHECKSUM], 8);
+
+       if (sum != chk)
+               return false;
+
+       /* POSIX tar format */
+       if (memcmp(&header[TAR_OFFSET_MAGIC], "ustar\0", 6) == 0 &&
+               memcmp(&header[TAR_OFFSET_VERSION], "00", 2) == 0)
+               return true;
+       /* GNU tar format */
+       if (memcmp(&header[TAR_OFFSET_MAGIC], "ustar  \0", 8) == 0)
+               return true;
+       /* not-quite-POSIX format written by pre-9.3 pg_dump */
+       if (memcmp(&header[TAR_OFFSET_MAGIC], "ustar00\0", 8) == 0)
+               return true;
+
+       return false;
+}
+
 
 /*
  * Fill in the buffer pointed to by h with a tar format header. This buffer