]> git.ipfire.org Git - thirdparty/libarchive.git/commitdiff
warc: Prevent signed integer overflow (#2568)
authorTobias Stoeckmann <stoeckmann@users.noreply.github.com>
Sun, 6 Apr 2025 22:24:13 +0000 (00:24 +0200)
committerGitHub <noreply@github.com>
Sun, 6 Apr 2025 22:24:13 +0000 (15:24 -0700)
If a warc archive claims to have more than INT64_MAX - 4 content bytes,
the inevitable failure to skip all these bytes could lead to parsing
data which should be ignored instead.

The test case contains a conversation entry with that many bytes and if
the entry is not properly skipped, the warc implementation would read
the conversation data as a new file entry.

Signed-off-by: Tobias Stoeckmann <tobias@stoeckmann.org>
Makefile.am
libarchive/archive_read_support_format_warc.c
libarchive/test/test_read_format_warc.c
libarchive/test/test_read_format_warc_incomplete.warc.uu [new file with mode: 0644]

index efc49180044c6081e32bbd1c946208f3996183bd..f372cbcbdd3daa646279c479861b0bc968cbde0a 100644 (file)
@@ -968,6 +968,7 @@ libarchive_test_EXTRA_DIST=\
        libarchive/test/test_read_format_ustar_filename_eucjp.tar.Z.uu \
        libarchive/test/test_read_format_ustar_filename_koi8r.tar.Z.uu \
        libarchive/test/test_read_format_warc.warc.uu \
+       libarchive/test/test_read_format_warc_incomplete.warc.uu \
        libarchive/test/test_read_format_xar_doublelink.xar.uu \
        libarchive/test/test_read_format_xar_duplicate_filename_node.xar.uu \
        libarchive/test/test_read_format_zip.zip.uu \
index fcec5bc4cbb9cb0db0a5294f05964345053d63eb..696f959c341dc9c9c317ad6103c5a0f71862a137 100644 (file)
@@ -386,7 +386,8 @@ start_over:
        case LAST_WT:
        default:
                /* consume the content and start over */
-               _warc_skip(a);
+               if (_warc_skip(a) < 0)
+                       return (ARCHIVE_FATAL);
                goto start_over;
        }
        return (ARCHIVE_OK);
@@ -439,7 +440,9 @@ _warc_skip(struct archive_read *a)
 {
        struct warc_s *w = a->format->data;
 
-       __archive_read_consume(a, w->cntlen + 4U/*\r\n\r\n separator*/);
+       if (__archive_read_consume(a, w->cntlen) < 0 ||
+           __archive_read_consume(a, 4U/*\r\n\r\n separator*/) < 0)
+               return (ARCHIVE_FATAL);
        w->cntlen = 0U;
        w->cntoff = 0U;
        return (ARCHIVE_OK);
index 91e6dc67e133bb0cf8ab5f8157e9a12c7a3d2b83..745aabffa93ba9907b6e07922a8ffd82377c09e6 100644 (file)
@@ -78,3 +78,27 @@ DEFINE_TEST(test_read_format_warc)
        assertEqualIntA(a, ARCHIVE_OK, archive_read_close(a));
        assertEqualInt(ARCHIVE_OK, archive_read_free(a));
 }
+
+DEFINE_TEST(test_read_format_warc_incomplete)
+{
+       const char reffile[] = "test_read_format_warc_incomplete.warc";
+       struct archive_entry *ae;
+       struct archive *a;
+
+       extract_reference_file(reffile);
+       assert((a = archive_read_new()) != NULL);
+       assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a));
+       assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a));
+       assertEqualIntA(a, ARCHIVE_OK,
+           archive_read_open_filename(a, reffile, 10240));
+
+       /* Entry cannot be parsed */
+       assertEqualIntA(a, ARCHIVE_FATAL, archive_read_next_header(a, &ae));
+
+       /* Verify archive format. */
+       assertEqualIntA(a, ARCHIVE_FILTER_NONE, archive_filter_code(a, 0));
+
+       /* Verify closing and resource freeing */
+       assertEqualIntA(a, ARCHIVE_OK, archive_read_close(a));
+       assertEqualInt(ARCHIVE_OK, archive_read_free(a));
+}
diff --git a/libarchive/test/test_read_format_warc_incomplete.warc.uu b/libarchive/test/test_read_format_warc_incomplete.warc.uu
new file mode 100644 (file)
index 0000000..b91b97e
--- /dev/null
@@ -0,0 +1,10 @@
+begin 644 test_read_format_warc_incomplete.warc
+M5T%20R\Q+C`-"E=!4D,M5'EP93H@8V]N=F5R<VEO;@T*5T%20RU$871E.B`R
+M,#(U+3`S+3,P5#$U.C`P.C0P6@T*0V]N=&5N="U,96YG=&@Z(#DR,C,S-S(P
+M,S8X-30W-S4X,#<-"@T*5T%20R\Q+C`-"E=!4D,M5'EP93H@<F5S;W5R8V4-
+M"E=!4D,M5&%R9V5T+55223H@9FEL93HO+W)E861M92YT>'0-"E=!4D,M1&%T
+M93H@,C`R-2TP,RTS,%0Q-3HP,#HT,%H-"D-O;G1E;G0M5'EP93H@=&5X="]P
+M;&%I;@T*0V]N=&5N="U,96YG=&@Z(#,X#0H-"E1H92!R96%D;64N='AT('-H
+4;W5L9"!N;W0@8F4@=FES:6)L90H`
+`
+end