From: Tobias Stoeckmann Date: Sun, 6 Apr 2025 22:24:13 +0000 (+0200) Subject: warc: Prevent signed integer overflow (#2568) X-Git-Tag: v3.8.0~49 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=ef093729521fcf73fa4007d5ae77adfe4df42403;p=thirdparty%2Flibarchive.git warc: Prevent signed integer overflow (#2568) If a warc archive claims to have more than INT64_MAX - 4 content bytes, the inevitable failure to skip all these bytes could lead to parsing data which should be ignored instead. The test case contains a conversation entry with that many bytes and if the entry is not properly skipped, the warc implementation would read the conversation data as a new file entry. Signed-off-by: Tobias Stoeckmann --- diff --git a/Makefile.am b/Makefile.am index efc491800..f372cbcbd 100644 --- a/Makefile.am +++ b/Makefile.am @@ -968,6 +968,7 @@ libarchive_test_EXTRA_DIST=\ libarchive/test/test_read_format_ustar_filename_eucjp.tar.Z.uu \ libarchive/test/test_read_format_ustar_filename_koi8r.tar.Z.uu \ libarchive/test/test_read_format_warc.warc.uu \ + libarchive/test/test_read_format_warc_incomplete.warc.uu \ libarchive/test/test_read_format_xar_doublelink.xar.uu \ libarchive/test/test_read_format_xar_duplicate_filename_node.xar.uu \ libarchive/test/test_read_format_zip.zip.uu \ diff --git a/libarchive/archive_read_support_format_warc.c b/libarchive/archive_read_support_format_warc.c index fcec5bc4c..696f959c3 100644 --- a/libarchive/archive_read_support_format_warc.c +++ b/libarchive/archive_read_support_format_warc.c @@ -386,7 +386,8 @@ start_over: case LAST_WT: default: /* consume the content and start over */ - _warc_skip(a); + if (_warc_skip(a) < 0) + return (ARCHIVE_FATAL); goto start_over; } return (ARCHIVE_OK); @@ -439,7 +440,9 @@ _warc_skip(struct archive_read *a) { struct warc_s *w = a->format->data; - __archive_read_consume(a, w->cntlen + 4U/*\r\n\r\n separator*/); + if (__archive_read_consume(a, w->cntlen) < 0 || + __archive_read_consume(a, 4U/*\r\n\r\n separator*/) < 0) + return (ARCHIVE_FATAL); w->cntlen = 0U; w->cntoff = 0U; return (ARCHIVE_OK); diff --git a/libarchive/test/test_read_format_warc.c b/libarchive/test/test_read_format_warc.c index 91e6dc67e..745aabffa 100644 --- a/libarchive/test/test_read_format_warc.c +++ b/libarchive/test/test_read_format_warc.c @@ -78,3 +78,27 @@ DEFINE_TEST(test_read_format_warc) assertEqualIntA(a, ARCHIVE_OK, archive_read_close(a)); assertEqualInt(ARCHIVE_OK, archive_read_free(a)); } + +DEFINE_TEST(test_read_format_warc_incomplete) +{ + const char reffile[] = "test_read_format_warc_incomplete.warc"; + struct archive_entry *ae; + struct archive *a; + + extract_reference_file(reffile); + assert((a = archive_read_new()) != NULL); + assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a)); + assertEqualIntA(a, ARCHIVE_OK, + archive_read_open_filename(a, reffile, 10240)); + + /* Entry cannot be parsed */ + assertEqualIntA(a, ARCHIVE_FATAL, archive_read_next_header(a, &ae)); + + /* Verify archive format. */ + assertEqualIntA(a, ARCHIVE_FILTER_NONE, archive_filter_code(a, 0)); + + /* Verify closing and resource freeing */ + assertEqualIntA(a, ARCHIVE_OK, archive_read_close(a)); + assertEqualInt(ARCHIVE_OK, archive_read_free(a)); +} diff --git a/libarchive/test/test_read_format_warc_incomplete.warc.uu b/libarchive/test/test_read_format_warc_incomplete.warc.uu new file mode 100644 index 000000000..b91b97ef3 --- /dev/null +++ b/libarchive/test/test_read_format_warc_incomplete.warc.uu @@ -0,0 +1,10 @@ +begin 644 test_read_format_warc_incomplete.warc +M5T%20R\Q+C`-"E=!4D,M5'EP93H@8V]N=F5R'0-"E=!4D,M1&%T +M93H@,C`R-2TP,RTS,%0Q-3HP,#HT,%H-"D-O;G1E;G0M5'EP93H@=&5X="]P +M;&%I;@T*0V]N=&5N="U,96YG=&@Z(#,X#0H-"E1H92!R96%D;64N='AT('-H +4;W5L9"!N;W0@8F4@=FES:6)L90H` +` +end