]> git.ipfire.org Git - thirdparty/libarchive.git/commitdiff
tar: Improve LFS support on 32 bit systems (#2582)
authorTobias Stoeckmann <stoeckmann@users.noreply.github.com>
Sun, 13 Apr 2025 18:07:18 +0000 (20:07 +0200)
committerGitHub <noreply@github.com>
Sun, 13 Apr 2025 18:07:18 +0000 (11:07 -0700)
The size_t data type is only 32 bit on 32 bit sytems while off_t is
generally 64 bit to support files larger than 2 GB.

If an entry is declared to be larger than 4 GB and the entry shall be
skipped, then 32 bit systems truncate the requested amount of bytes.
This leads to different interpretation of data in tar files compared to
64 bit systems.

Signed-off-by: Tobias Stoeckmann <tobias@stoeckmann.org>
Makefile.am
libarchive/archive_read_support_format_tar.c
libarchive/test/CMakeLists.txt
libarchive/test/test_read_format_tar_pax_g_large.c [new file with mode: 0644]
libarchive/test/test_read_format_tar_pax_g_large.tar.uu [new file with mode: 0644]

index f372cbcbdd3daa646279c479861b0bc968cbde0a..ece9fca36aa22870cb3a7c142b45f0c99040f3ee 100644 (file)
@@ -530,6 +530,7 @@ libarchive_test_SOURCES= \
        libarchive/test/test_read_format_tar_empty_with_gnulabel.c \
        libarchive/test/test_read_format_tar_filename.c \
        libarchive/test/test_read_format_tar_invalid_pax_size.c \
+       libarchive/test/test_read_format_tar_pax_g_large.c \
        libarchive/test/test_read_format_tar_pax_large_attr.c \
        libarchive/test/test_read_format_tbz.c \
        libarchive/test/test_read_format_tgz.c \
@@ -963,6 +964,7 @@ libarchive_test_EXTRA_DIST=\
        libarchive/test/test_read_format_tar_empty_pax.tar.Z.uu \
        libarchive/test/test_read_format_tar_filename_koi8r.tar.Z.uu \
        libarchive/test/test_read_format_tar_invalid_pax_size.tar.uu \
+       libarchive/test/test_read_format_tar_pax_g_large.tar.uu \
        libarchive/test/test_read_format_tar_pax_large_attr.tar.Z.uu \
        libarchive/test/test_read_format_ustar_filename_cp866.tar.Z.uu \
        libarchive/test/test_read_format_ustar_filename_eucjp.tar.Z.uu \
index 7615d7cd78ed281a3b82d7349ec9aeb4aada7cb5..4ef21d443eac824790b5ba4e9512e79f4a5c929c 100644 (file)
@@ -169,36 +169,36 @@ static int        gnu_add_sparse_entry(struct archive_read *, struct tar *,
 
 static void    gnu_clear_sparse_list(struct tar *);
 static int     gnu_sparse_old_read(struct archive_read *, struct tar *,
-                   const struct archive_entry_header_gnutar *header, size_t *);
+                   const struct archive_entry_header_gnutar *header, int64_t *);
 static int     gnu_sparse_old_parse(struct archive_read *, struct tar *,
                    const struct gnu_sparse *sparse, int length);
 static int     gnu_sparse_01_parse(struct archive_read *, struct tar *,
                    const char *, size_t);
 static ssize_t gnu_sparse_10_read(struct archive_read *, struct tar *,
-                   size_t *);
+                   int64_t *);
 static int     header_Solaris_ACL(struct archive_read *,  struct tar *,
-                   struct archive_entry *, const void *, size_t *);
+                   struct archive_entry *, const void *, int64_t *);
 static int     header_common(struct archive_read *,  struct tar *,
                    struct archive_entry *, const void *);
 static int     header_old_tar(struct archive_read *, struct tar *,
                    struct archive_entry *, const void *);
 static int     header_pax_extension(struct archive_read *, struct tar *,
-                   struct archive_entry *, const void *, size_t *);
+                   struct archive_entry *, const void *, int64_t *);
 static int     header_pax_global(struct archive_read *, struct tar *,
-                   struct archive_entry *, const void *h, size_t *);
+                   struct archive_entry *, const void *h, int64_t *);
 static int     header_gnu_longlink(struct archive_read *, struct tar *,
-                   struct archive_entry *, const void *h, size_t *);
+                   struct archive_entry *, const void *h, int64_t *);
 static int     header_gnu_longname(struct archive_read *, struct tar *,
-                   struct archive_entry *, const void *h, size_t *);
+                   struct archive_entry *, const void *h, int64_t *);
 static int     is_mac_metadata_entry(struct archive_entry *entry);
 static int     read_mac_metadata_blob(struct archive_read *,
-                   struct archive_entry *, size_t *);
+                   struct archive_entry *, int64_t *);
 static int     header_volume(struct archive_read *, struct tar *,
-                   struct archive_entry *, const void *h, size_t *);
+                   struct archive_entry *, const void *h, int64_t *);
 static int     header_ustar(struct archive_read *, struct tar *,
                    struct archive_entry *, const void *h);
 static int     header_gnutar(struct archive_read *, struct tar *,
-                   struct archive_entry *, const void *h, size_t *);
+                   struct archive_entry *, const void *h, int64_t *);
 static int     archive_read_format_tar_bid(struct archive_read *, int);
 static int     archive_read_format_tar_options(struct archive_read *,
                    const char *, const char *);
@@ -211,7 +211,7 @@ static int  archive_read_format_tar_read_header(struct archive_read *,
 static int     checksum(struct archive_read *, const void *);
 static int     pax_attribute(struct archive_read *, struct tar *,
                    struct archive_entry *, const char *key, size_t key_length,
-                   size_t value_length, size_t *unconsumed);
+                   size_t value_length, int64_t *unconsumed);
 static int     pax_attribute_LIBARCHIVE_xattr(struct archive_entry *,
                    const char *, size_t, const char *, size_t);
 static int     pax_attribute_SCHILY_acl(struct archive_read *, struct tar *,
@@ -220,20 +220,20 @@ static int        pax_attribute_SUN_holesdata(struct archive_read *, struct tar *,
                    struct archive_entry *, const char *, size_t);
 static void    pax_time(const char *, size_t, int64_t *sec, long *nanos);
 static ssize_t readline(struct archive_read *, struct tar *, const char **,
-                   ssize_t limit, size_t *);
+                   ssize_t limit, int64_t *);
 static int     read_body_to_string(struct archive_read *, struct tar *,
-                   struct archive_string *, const void *h, size_t *);
+                   struct archive_string *, const void *h, int64_t *);
 static int     read_bytes_to_string(struct archive_read *,
-                   struct archive_string *, size_t, size_t *);
+                   struct archive_string *, size_t, int64_t *);
 static int64_t tar_atol(const char *, size_t);
 static int64_t tar_atol10(const char *, size_t);
 static int64_t tar_atol256(const char *, size_t);
 static int64_t tar_atol8(const char *, size_t);
 static int     tar_read_header(struct archive_read *, struct tar *,
-                   struct archive_entry *, size_t *);
+                   struct archive_entry *, int64_t *);
 static int     tohex(int c);
 static char    *url_decode(const char *, size_t);
-static void    tar_flush_unconsumed(struct archive_read *, size_t *);
+static void    tar_flush_unconsumed(struct archive_read *, int64_t *);
 
 /* Sanity limits:  These numbers should be low enough to
  * prevent a maliciously-crafted archive from forcing us to
@@ -478,7 +478,7 @@ archive_read_format_tar_options(struct archive_read *a,
  * anything outstanding since we're going to do read_aheads
  */
 static void
-tar_flush_unconsumed(struct archive_read *a, size_t *unconsumed)
+tar_flush_unconsumed(struct archive_read *a, int64_t *unconsumed)
 {
        if (*unconsumed) {
 /*
@@ -526,7 +526,8 @@ archive_read_format_tar_read_header(struct archive_read *a,
        const char *p;
        const wchar_t *wp;
        int r;
-       size_t l, unconsumed = 0;
+       size_t l;
+       int64_t unconsumed = 0;
 
        /* Assign default device/inode values. */
        archive_entry_set_dev(entry, 1 + default_dev); /* Don't use zero. */
@@ -706,7 +707,7 @@ archive_read_format_tar_skip(struct archive_read *a)
  */
 static int
 tar_read_header(struct archive_read *a, struct tar *tar,
-    struct archive_entry *entry, size_t *unconsumed)
+    struct archive_entry *entry, int64_t *unconsumed)
 {
        ssize_t bytes;
        int err = ARCHIVE_OK, err2;
@@ -1043,7 +1044,7 @@ archive_block_is_null(const char *p)
  */
 static int
 header_Solaris_ACL(struct archive_read *a, struct tar *tar,
-    struct archive_entry *entry, const void *h, size_t *unconsumed)
+    struct archive_entry *entry, const void *h, int64_t *unconsumed)
 {
        const struct archive_entry_header_ustar *header;
        struct archive_string    acl_text;
@@ -1149,7 +1150,7 @@ header_Solaris_ACL(struct archive_read *a, struct tar *tar,
  */
 static int
 header_gnu_longlink(struct archive_read *a, struct tar *tar,
-    struct archive_entry *entry, const void *h, size_t *unconsumed)
+    struct archive_entry *entry, const void *h, int64_t *unconsumed)
 {
        int err;
 
@@ -1183,7 +1184,7 @@ set_conversion_failed_error(struct archive_read *a,
  */
 static int
 header_gnu_longname(struct archive_read *a, struct tar *tar,
-    struct archive_entry *entry, const void *h, size_t *unconsumed)
+    struct archive_entry *entry, const void *h, int64_t *unconsumed)
 {
        int err;
        struct archive_string longname;
@@ -1204,7 +1205,7 @@ header_gnu_longname(struct archive_read *a, struct tar *tar,
  */
 static int
 header_volume(struct archive_read *a, struct tar *tar,
-    struct archive_entry *entry, const void *h, size_t *unconsumed)
+    struct archive_entry *entry, const void *h, int64_t *unconsumed)
 {
        const struct archive_entry_header_ustar *header;
        int64_t size, to_consume;
@@ -1230,7 +1231,7 @@ header_volume(struct archive_read *a, struct tar *tar,
 static int
 read_bytes_to_string(struct archive_read *a,
                     struct archive_string *as, size_t size,
-                    size_t *unconsumed) {
+                    int64_t *unconsumed) {
        const void *src;
 
        /* Fail if we can't make our buffer big enough. */
@@ -1263,7 +1264,7 @@ read_bytes_to_string(struct archive_read *a,
  */
 static int
 read_body_to_string(struct archive_read *a, struct tar *tar,
-    struct archive_string *as, const void *h, size_t *unconsumed)
+    struct archive_string *as, const void *h, int64_t *unconsumed)
 {
        int64_t size;
        const struct archive_entry_header_ustar *header;
@@ -1654,7 +1655,7 @@ is_mac_metadata_entry(struct archive_entry *entry) {
  */
 static int
 read_mac_metadata_blob(struct archive_read *a,
-    struct archive_entry *entry, size_t *unconsumed)
+    struct archive_entry *entry, int64_t *unconsumed)
 {
        int64_t size;
        size_t msize;
@@ -1711,7 +1712,7 @@ read_mac_metadata_blob(struct archive_read *a,
  */
 static int
 header_pax_global(struct archive_read *a, struct tar *tar,
-    struct archive_entry *entry, const void *h, size_t *unconsumed)
+    struct archive_entry *entry, const void *h, int64_t *unconsumed)
 {
        const struct archive_entry_header_ustar *header;
        int64_t size, to_consume;
@@ -1818,7 +1819,7 @@ header_ustar(struct archive_read *a, struct tar *tar,
 
 static int
 header_pax_extension(struct archive_read *a, struct tar *tar,
-    struct archive_entry *entry, const void *h, size_t *unconsumed)
+    struct archive_entry *entry, const void *h, int64_t *unconsumed)
 {
        /* Sanity checks: The largest `x` body I've ever heard of was
         * a little over 4MB.  So I doubt there has ever been a
@@ -2211,7 +2212,7 @@ pax_attribute_SCHILY_acl(struct archive_read *a, struct tar *tar,
 }
 
 static int
-pax_attribute_read_time(struct archive_read *a, size_t value_length, int64_t *ps, long *pn, size_t *unconsumed) {
+pax_attribute_read_time(struct archive_read *a, size_t value_length, int64_t *ps, long *pn, int64_t *unconsumed) {
        struct archive_string as;
        int r;
 
@@ -2240,7 +2241,7 @@ pax_attribute_read_time(struct archive_read *a, size_t value_length, int64_t *ps
 static int
 pax_attribute_read_number(struct archive_read *a, size_t value_length, int64_t *result) {
        struct archive_string as;
-       size_t unconsumed = 0;
+       int64_t unconsumed = 0;
        int r;
 
        if (value_length > 64) {
@@ -2280,7 +2281,7 @@ pax_attribute_read_number(struct archive_read *a, size_t value_length, int64_t *
  */
 static int
 pax_attribute(struct archive_read *a, struct tar *tar, struct archive_entry *entry,
-             const char *key, size_t key_length, size_t value_length, size_t *unconsumed)
+             const char *key, size_t key_length, size_t value_length, int64_t *unconsumed)
 {
        int64_t t;
        long n;
@@ -2877,7 +2878,7 @@ pax_time(const char *p, size_t length, int64_t *ps, long *pn)
  */
 static int
 header_gnutar(struct archive_read *a, struct tar *tar,
-    struct archive_entry *entry, const void *h, size_t *unconsumed)
+    struct archive_entry *entry, const void *h, int64_t *unconsumed)
 {
        const struct archive_entry_header_gnutar *header;
        int64_t t;
@@ -3028,7 +3029,7 @@ gnu_clear_sparse_list(struct tar *tar)
 
 static int
 gnu_sparse_old_read(struct archive_read *a, struct tar *tar,
-    const struct archive_entry_header_gnutar *header, size_t *unconsumed)
+    const struct archive_entry_header_gnutar *header, int64_t *unconsumed)
 {
        ssize_t bytes_read;
        const void *data;
@@ -3162,7 +3163,7 @@ gnu_sparse_01_parse(struct archive_read *a, struct tar *tar, const char *p, size
  */
 static int64_t
 gnu_sparse_10_atol(struct archive_read *a, struct tar *tar,
-    int64_t *remaining, size_t *unconsumed)
+    int64_t *remaining, int64_t *unconsumed)
 {
        int64_t l, limit, last_digit_limit;
        const char *p;
@@ -3208,7 +3209,7 @@ gnu_sparse_10_atol(struct archive_read *a, struct tar *tar,
  * that was read.
  */
 static ssize_t
-gnu_sparse_10_read(struct archive_read *a, struct tar *tar, size_t *unconsumed)
+gnu_sparse_10_read(struct archive_read *a, struct tar *tar, int64_t *unconsumed)
 {
        ssize_t bytes_read;
        int entries;
@@ -3455,7 +3456,7 @@ tar_atol256(const char *_p, size_t char_cnt)
  */
 static ssize_t
 readline(struct archive_read *a, struct tar *tar, const char **start,
-    ssize_t limit, size_t *unconsumed)
+    ssize_t limit, int64_t *unconsumed)
 {
        ssize_t bytes_read;
        ssize_t total_size = 0;
index c6960189830092e00c16e5076346c7b208c4d3e8..991a6caee5b43aea800c8c884a1f92a883eaf01c 100644 (file)
@@ -172,6 +172,7 @@ IF(ENABLE_TEST)
     test_read_format_tar_empty_pax.c
     test_read_format_tar_filename.c
     test_read_format_tar_invalid_pax_size.c
+    test_read_format_tar_pax_g_large.c
     test_read_format_tar_pax_large_attr.c
     test_read_format_tbz.c
     test_read_format_tgz.c
diff --git a/libarchive/test/test_read_format_tar_pax_g_large.c b/libarchive/test/test_read_format_tar_pax_g_large.c
new file mode 100644 (file)
index 0000000..c4771e6
--- /dev/null
@@ -0,0 +1,53 @@
+/*-
+ * Copyright (c) 2025 Tobias Stoeckmann
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include "test.h"
+
+/*
+ * Read a pax formatted tar archive that has an extremely large
+ * (4 GB) global header. The pax reader should correctly skip the header and
+ * jump to (or past) end of file without encountering any further entry.
+ */
+DEFINE_TEST(test_read_format_tar_pax_g_large)
+{
+       char name[] = "test_read_format_tar_pax_g_large.tar";
+       struct archive_entry *ae;
+       struct archive *a;
+
+       assert((a = archive_read_new()) != NULL);
+       assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a));
+       assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a));
+       extract_reference_file(name);
+       assertEqualIntA(a, ARCHIVE_OK, archive_read_open_filename(a, name, 10240));
+
+       /* Verify that no data entry is found. */
+       assertA(archive_read_next_header(a, &ae) != ARCHIVE_OK);
+
+       /* Verify that the format detection worked. */
+       assertEqualInt(ARCHIVE_FILTER_NONE, archive_filter_code(a, 0));
+       assertEqualInt(ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE, archive_format(a));
+
+       assertEqualInt(ARCHIVE_OK, archive_read_close(a));
+       assertEqualInt(ARCHIVE_OK, archive_read_free(a));
+}
diff --git a/libarchive/test/test_read_format_tar_pax_g_large.tar.uu b/libarchive/test/test_read_format_tar_pax_g_large.tar.uu
new file mode 100644 (file)
index 0000000..c37d355
--- /dev/null
@@ -0,0 +1,49 @@
+begin 644 test_read_format_tar_pax_g_large.tar.uu
+M96UP='D`````````````````````````````````````````````````````
+M````````````````````````````````````````````````````````````
+M`````````````#`P,#`V-#0`,#`P,3<U,``P,#`Q-S4P`#0P,#`P,#`P,#`P
+M`#$T-S<V-S0V,34T`#`Q,#<W-0`@9P``````````````````````````````
+M````````````````````````````````````````````````````````````
+M``````````````````````````````````````````!U<W1A<B`@`')O;W0`
+M````````````````````````````````````<F]O=```````````````````
+M````````````````````````````````````````````````````````````
+M````````````````````````````````````````````````````````````
+M````````````````````````````````````````````````````````````
+M````````````````````````````````````````````````````````````
+M``````````````````````!E;7!T>0``````````````````````````````
+M````````````````````````````````````````````````````````````
+M````````````````````````````````````,#`P,#8T-``P,#`Q-S4P`#`P
+M,#$W-3``,#`P,#`P,#`P,#``,30W-S8W-#8Q-30`,#$P-S`R`"`P````````
+M````````````````````````````````````````````````````````````
+M````````````````````````````````````````````````````````````
+M`````'5S=&%R("``<F]O=`````````````````````````````````````!R
+M;V]T````````````````````````````````````````````````````````
+M````````````````````````````````````````````````````````````
+M````````````````````````````````````````````````````````````
+M````````````````````````````````````````````````````````````
+M````````````````````````````````````````````````````````````
+M````````````````````````````````````````````````````````````
+M````````````````````````````````````````````````````````````
+M````````````````````````````````````````````````````````````
+M````````````````````````````````````````````````````````````
+M````````````````````````````````````````````````````````````
+M````````````````````````````````````````````````````````````
+M````````````````````````````````````````````````````````````
+M````````````````````````````````````````````````````````````
+M````````````````````````````````````````````````````````````
+M````````````````````````````````````````````````````````````
+M````````````````````````````````````````````````````````````
+M````````````````````````````````````````````````````````````
+M````````````````````````````````````````````````````````````
+M````````````````````````````````````````````````````````````
+M````````````````````````````````````````````````````````````
+M````````````````````````````````````````````````````````````
+M````````````````````````````````````````````````````````````
+M````````````````````````````````````````````````````````````
+M````````````````````````````````````````````````````````````
+M````````````````````````````````````````````````````````````
+M````````````````````````````````````````````````````````````
+M````````````````````````````````````````````````````````````
+7````````````````````````````````
+`
+end