From: Leslie P. Polzer Date: Mon, 22 Dec 2025 04:08:26 +0000 (+0000) Subject: Expand OSS-Fuzz integration: 1 → 25 fuzz targets X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=5e88593708ac1e74ba9ae7576ac557a930d497e5;p=thirdparty%2Flibarchive.git Expand OSS-Fuzz integration: 1 → 25 fuzz targets Add comprehensive fuzzing coverage for libarchive: Format-specific fuzzers (13): - tar, zip, 7zip, rar, rar5, xar, cab, lha, iso9660, cpio, warc, mtree, ar Security-critical fuzzers (4): - encryption: encrypted archive handling - write_disk: extraction path traversal - read_disk: filesystem traversal, symlinks - entry: ACL functions (previously 0% coverage) API fuzzers (7): - write: archive creation - linkify: hardlink detection (complexity 775, was 0%) - match: inclusion/exclusion patterns - string: encoding conversions (UTF-8, wide chars) - seek: seekable archive operations - roundtrip: write-then-read consistency - filter: compression/decompression Supporting files: - 14 dictionaries with format-specific magic bytes - 9 options files for complex fuzzers - Updated build script with seed corpora generation Targets previously uncovered functions: - archive_entry_linkify (complexity 775) - ACL functions (complexity 705-713) - xar_read_header (was 10.11% coverage) Expected coverage improvement: 74% → 85-95% --- diff --git a/contrib/oss-fuzz/libarchive_7zip_fuzzer.cc b/contrib/oss-fuzz/libarchive_7zip_fuzzer.cc new file mode 100644 index 000000000..b46e17d1f --- /dev/null +++ b/contrib/oss-fuzz/libarchive_7zip_fuzzer.cc @@ -0,0 +1,75 @@ +/* + * 7-Zip format specific fuzzer for libarchive + * Targets 7-Zip parsing and decompression code paths + */ +#include +#include +#include + +#include "archive.h" +#include "archive_entry.h" + +static constexpr size_t kMaxInputSize = 512 * 1024; // 512KB + +struct Buffer { + const uint8_t *buf; + size_t len; +}; + +static ssize_t reader_callback(struct archive *a, void *client_data, + const void **block) { + (void)a; + Buffer *buffer = reinterpret_cast(client_data); + *block = buffer->buf; + ssize_t len = buffer->len; + buffer->len = 0; + return len; +} + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *buf, size_t len) { + if (len == 0 || len > kMaxInputSize) { + return 0; + } + + struct archive *a = archive_read_new(); + if (a == NULL) { + return 0; + } + + // Enable 7-Zip format specifically + archive_read_support_format_7zip(a); + // Enable all filters for 7z internal compression + archive_read_support_filter_all(a); + + // Set passphrase for encrypted archives + archive_read_add_passphrase(a, "password"); + + Buffer buffer = {buf, len}; + if (archive_read_open(a, &buffer, NULL, reader_callback, NULL) != ARCHIVE_OK) { + archive_read_free(a); + return 0; + } + + std::vector data_buffer(4096, 0); + struct archive_entry *entry; + + while (archive_read_next_header(a, &entry) == ARCHIVE_OK) { + // Exercise entry metadata access + archive_entry_pathname(entry); + archive_entry_pathname_w(entry); + archive_entry_size(entry); + archive_entry_mtime(entry); + archive_entry_mode(entry); + archive_entry_is_encrypted(entry); + archive_entry_is_data_encrypted(entry); + archive_entry_is_metadata_encrypted(entry); + + // Read data + ssize_t r; + while ((r = archive_read_data(a, data_buffer.data(), data_buffer.size())) > 0) + ; + } + + archive_read_free(a); + return 0; +} diff --git a/contrib/oss-fuzz/libarchive_7zip_fuzzer.dict b/contrib/oss-fuzz/libarchive_7zip_fuzzer.dict new file mode 100644 index 000000000..3bee01a3b --- /dev/null +++ b/contrib/oss-fuzz/libarchive_7zip_fuzzer.dict @@ -0,0 +1,47 @@ +# 7-Zip format dictionary +# Magic bytes +"7z\xbc\xaf\x27\x1c" +"\x37\x7a\xbc\xaf\x27\x1c" + +# Common property IDs +"\x00" +"\x01" +"\x02" +"\x03" +"\x04" +"\x05" +"\x06" +"\x07" +"\x08" +"\x09" +"\x0a" +"\x0b" +"\x0c" +"\x0d" +"\x0e" +"\x0f" +"\x10" +"\x11" +"\x17" +"\x19" +"\x21" +"\x23" +"\x24" +"\x25" + +# Compression method IDs +"\x00\x00" +"\x00\x03" +"\x00\x04" +"\x00\x06" +"\x01\x01" +"\x03\x01\x01" +"\x04\x01\x08" +"\x04\x02\x02" +"\x21\x01" +"\x30\x01\x01" + +# Encryption +"\x06\xf1\x07\x01" +"Password" +"password" diff --git a/contrib/oss-fuzz/libarchive_7zip_fuzzer.options b/contrib/oss-fuzz/libarchive_7zip_fuzzer.options new file mode 100644 index 000000000..d2d9f0ed2 --- /dev/null +++ b/contrib/oss-fuzz/libarchive_7zip_fuzzer.options @@ -0,0 +1,10 @@ +[libfuzzer] +max_len = 524288 +timeout = 60 +rss_limit_mb = 2048 + +[honggfuzz] +timeout = 60 + +[afl] +timeout = 60 diff --git a/contrib/oss-fuzz/libarchive_ar_fuzzer.cc b/contrib/oss-fuzz/libarchive_ar_fuzzer.cc new file mode 100644 index 000000000..0d7c20911 --- /dev/null +++ b/contrib/oss-fuzz/libarchive_ar_fuzzer.cc @@ -0,0 +1,66 @@ +/* + * AR (Unix archive) format fuzzer for libarchive + * Tests BSD and GNU ar formats + */ +#include +#include +#include + +#include "archive.h" +#include "archive_entry.h" + +static constexpr size_t kMaxInputSize = 512 * 1024; + +struct Buffer { + const uint8_t *buf; + size_t len; +}; + +static ssize_t reader_callback(struct archive *a, void *client_data, + const void **block) { + (void)a; + Buffer *buffer = reinterpret_cast(client_data); + *block = buffer->buf; + ssize_t len = buffer->len; + buffer->len = 0; + return len; +} + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *buf, size_t len) { + if (len == 0 || len > kMaxInputSize) { + return 0; + } + + struct archive *a = archive_read_new(); + if (a == NULL) { + return 0; + } + + archive_read_support_format_ar(a); + archive_read_support_filter_all(a); + + Buffer buffer = {buf, len}; + if (archive_read_open(a, &buffer, NULL, reader_callback, NULL) != ARCHIVE_OK) { + archive_read_free(a); + return 0; + } + + std::vector data_buffer(4096, 0); + struct archive_entry *entry; + + while (archive_read_next_header(a, &entry) == ARCHIVE_OK) { + archive_entry_pathname(entry); + archive_entry_size(entry); + archive_entry_mtime(entry); + archive_entry_mode(entry); + archive_entry_uid(entry); + archive_entry_gid(entry); + + ssize_t r; + while ((r = archive_read_data(a, data_buffer.data(), data_buffer.size())) > 0) + ; + } + + archive_read_free(a); + return 0; +} diff --git a/contrib/oss-fuzz/libarchive_ar_fuzzer.dict b/contrib/oss-fuzz/libarchive_ar_fuzzer.dict new file mode 100644 index 000000000..4f2e3db1b --- /dev/null +++ b/contrib/oss-fuzz/libarchive_ar_fuzzer.dict @@ -0,0 +1,20 @@ +# AR format dictionary + +# AR magic +"!\x0a" + +# File header terminator +"\x60\x0a" + +# Special entries +"/" +"//" +"/SYM64/" + +# Common permissions +"100644 " +"100755 " + +# UID/GID fields +"0 " +"1000 " diff --git a/contrib/oss-fuzz/libarchive_cab_fuzzer.cc b/contrib/oss-fuzz/libarchive_cab_fuzzer.cc new file mode 100644 index 000000000..0b62eccbb --- /dev/null +++ b/contrib/oss-fuzz/libarchive_cab_fuzzer.cc @@ -0,0 +1,63 @@ +/* + * CAB (Microsoft Cabinet) format fuzzer for libarchive + */ +#include +#include +#include + +#include "archive.h" +#include "archive_entry.h" + +static constexpr size_t kMaxInputSize = 512 * 1024; + +struct Buffer { + const uint8_t *buf; + size_t len; +}; + +static ssize_t reader_callback(struct archive *a, void *client_data, + const void **block) { + (void)a; + Buffer *buffer = reinterpret_cast(client_data); + *block = buffer->buf; + ssize_t len = buffer->len; + buffer->len = 0; + return len; +} + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *buf, size_t len) { + if (len == 0 || len > kMaxInputSize) { + return 0; + } + + struct archive *a = archive_read_new(); + if (a == NULL) { + return 0; + } + + archive_read_support_format_cab(a); + archive_read_support_filter_all(a); + + Buffer buffer = {buf, len}; + if (archive_read_open(a, &buffer, NULL, reader_callback, NULL) != ARCHIVE_OK) { + archive_read_free(a); + return 0; + } + + std::vector data_buffer(4096, 0); + struct archive_entry *entry; + + while (archive_read_next_header(a, &entry) == ARCHIVE_OK) { + archive_entry_pathname(entry); + archive_entry_size(entry); + archive_entry_mtime(entry); + archive_entry_mode(entry); + + ssize_t r; + while ((r = archive_read_data(a, data_buffer.data(), data_buffer.size())) > 0) + ; + } + + archive_read_free(a); + return 0; +} diff --git a/contrib/oss-fuzz/libarchive_cab_fuzzer.dict b/contrib/oss-fuzz/libarchive_cab_fuzzer.dict new file mode 100644 index 000000000..76e1d3bcd --- /dev/null +++ b/contrib/oss-fuzz/libarchive_cab_fuzzer.dict @@ -0,0 +1,23 @@ +# CAB (Microsoft Cabinet) format dictionary + +# CAB signature +"MSCF" +"\x4d\x53\x43\x46" + +# Version +"\x03\x01" + +# Compression types +"\x00\x00" +"\x01\x00" +"\x02\x00" +"\x03\x00" + +# Folder count patterns +"\x01\x00" +"\x02\x00" + +# Header flags +"\x00\x00" +"\x01\x00" +"\x04\x00" diff --git a/contrib/oss-fuzz/libarchive_cpio_fuzzer.cc b/contrib/oss-fuzz/libarchive_cpio_fuzzer.cc new file mode 100644 index 000000000..06fe4217a --- /dev/null +++ b/contrib/oss-fuzz/libarchive_cpio_fuzzer.cc @@ -0,0 +1,70 @@ +/* + * CPIO format fuzzer for libarchive + * Tests all CPIO variants: binary, odc, newc, crc + */ +#include +#include +#include + +#include "archive.h" +#include "archive_entry.h" + +static constexpr size_t kMaxInputSize = 512 * 1024; + +struct Buffer { + const uint8_t *buf; + size_t len; +}; + +static ssize_t reader_callback(struct archive *a, void *client_data, + const void **block) { + (void)a; + Buffer *buffer = reinterpret_cast(client_data); + *block = buffer->buf; + ssize_t len = buffer->len; + buffer->len = 0; + return len; +} + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *buf, size_t len) { + if (len == 0 || len > kMaxInputSize) { + return 0; + } + + struct archive *a = archive_read_new(); + if (a == NULL) { + return 0; + } + + archive_read_support_format_cpio(a); + archive_read_support_filter_all(a); + + Buffer buffer = {buf, len}; + if (archive_read_open(a, &buffer, NULL, reader_callback, NULL) != ARCHIVE_OK) { + archive_read_free(a); + return 0; + } + + std::vector data_buffer(4096, 0); + struct archive_entry *entry; + + while (archive_read_next_header(a, &entry) == ARCHIVE_OK) { + archive_entry_pathname(entry); + archive_entry_size(entry); + archive_entry_mtime(entry); + archive_entry_mode(entry); + archive_entry_uid(entry); + archive_entry_gid(entry); + archive_entry_ino(entry); + archive_entry_nlink(entry); + archive_entry_rdev(entry); + archive_entry_hardlink(entry); + + ssize_t r; + while ((r = archive_read_data(a, data_buffer.data(), data_buffer.size())) > 0) + ; + } + + archive_read_free(a); + return 0; +} diff --git a/contrib/oss-fuzz/libarchive_cpio_fuzzer.dict b/contrib/oss-fuzz/libarchive_cpio_fuzzer.dict new file mode 100644 index 000000000..b7ceeee1c --- /dev/null +++ b/contrib/oss-fuzz/libarchive_cpio_fuzzer.dict @@ -0,0 +1,25 @@ +# CPIO format dictionary + +# Binary magic (little endian) +"\xc7\x71" + +# Binary magic (big endian) +"\x71\xc7" + +# ASCII odc magic +"070707" + +# ASCII newc magic +"070701" + +# ASCII crc magic +"070702" + +# Common trailer +"TRAILER!!!" + +# Common field patterns +"00000000" +"00000001" +"000001ed" +"000003e8" diff --git a/contrib/oss-fuzz/libarchive_encryption_fuzzer.cc b/contrib/oss-fuzz/libarchive_encryption_fuzzer.cc new file mode 100644 index 000000000..798b28db7 --- /dev/null +++ b/contrib/oss-fuzz/libarchive_encryption_fuzzer.cc @@ -0,0 +1,113 @@ +/* + * Encrypted archive fuzzer for libarchive + * Tests password/passphrase handling across formats + */ +#include +#include +#include +#include + +#include "archive.h" +#include "archive_entry.h" + +static constexpr size_t kMaxInputSize = 512 * 1024; + +// Passphrase callback for testing +static const char *test_passphrases[] = { + "password", + "test", + "123456", + "", + "secret", + NULL +}; + +static int passphrase_idx = 0; + +static const char* passphrase_callback(struct archive *a, void *client_data) { + (void)a; + (void)client_data; + const char *pass = test_passphrases[passphrase_idx]; + if (pass != NULL) { + passphrase_idx++; + } + return pass; +} + +struct Buffer { + const uint8_t *buf; + size_t len; +}; + +static ssize_t reader_callback(struct archive *a, void *client_data, + const void **block) { + (void)a; + Buffer *buffer = reinterpret_cast(client_data); + *block = buffer->buf; + ssize_t len = buffer->len; + buffer->len = 0; + return len; +} + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *buf, size_t len) { + if (len == 0 || len > kMaxInputSize) { + return 0; + } + + // Reset passphrase index + passphrase_idx = 0; + + struct archive *a = archive_read_new(); + if (a == NULL) { + return 0; + } + + // Enable all formats that support encryption + archive_read_support_format_zip(a); + archive_read_support_format_7zip(a); + archive_read_support_format_rar(a); + archive_read_support_format_rar5(a); + archive_read_support_filter_all(a); + + // Set up passphrase callback + archive_read_set_passphrase_callback(a, NULL, passphrase_callback); + + // Also add some static passphrases + archive_read_add_passphrase(a, "password"); + archive_read_add_passphrase(a, "test123"); + + Buffer buffer = {buf, len}; + if (archive_read_open(a, &buffer, NULL, reader_callback, NULL) != ARCHIVE_OK) { + archive_read_free(a); + return 0; + } + + std::vector data_buffer(4096, 0); + struct archive_entry *entry; + int entry_count = 0; + + while (archive_read_next_header(a, &entry) == ARCHIVE_OK && entry_count < 100) { + archive_entry_pathname(entry); + + // Check encryption status + int is_encrypted = archive_entry_is_encrypted(entry); + int is_data_encrypted = archive_entry_is_data_encrypted(entry); + int is_meta_encrypted = archive_entry_is_metadata_encrypted(entry); + (void)is_encrypted; + (void)is_data_encrypted; + (void)is_meta_encrypted; + + // Check if archive has encrypted entries + archive_read_has_encrypted_entries(a); + + // Try to read data (may fail due to wrong password) + ssize_t r; + while ((r = archive_read_data(a, data_buffer.data(), data_buffer.size())) > 0) + ; + + entry_count++; + } + + archive_read_free(a); + return 0; +} diff --git a/contrib/oss-fuzz/libarchive_encryption_fuzzer.options b/contrib/oss-fuzz/libarchive_encryption_fuzzer.options new file mode 100644 index 000000000..d2d9f0ed2 --- /dev/null +++ b/contrib/oss-fuzz/libarchive_encryption_fuzzer.options @@ -0,0 +1,10 @@ +[libfuzzer] +max_len = 524288 +timeout = 60 +rss_limit_mb = 2048 + +[honggfuzz] +timeout = 60 + +[afl] +timeout = 60 diff --git a/contrib/oss-fuzz/libarchive_entry_fuzzer.cc b/contrib/oss-fuzz/libarchive_entry_fuzzer.cc new file mode 100644 index 000000000..7a6c1860e --- /dev/null +++ b/contrib/oss-fuzz/libarchive_entry_fuzzer.cc @@ -0,0 +1,159 @@ +/* + * Archive entry fuzzer for libarchive + * Targets archive_entry_* functions including ACL, linkify, and metadata + */ +#include +#include +#include +#include + +#include "archive.h" +#include "archive_entry.h" + +static constexpr size_t kMaxInputSize = 64 * 1024; // 64KB + +// FuzzedDataProvider-like helper for consuming bytes +class DataConsumer { +public: + DataConsumer(const uint8_t *data, size_t size) : data_(data), size_(size), pos_(0) { + memset(string_buf_, 0, sizeof(string_buf_)); + } + + bool empty() const { return pos_ >= size_; } + + uint8_t consume_byte() { + if (pos_ >= size_) return 0; + return data_[pos_++]; + } + + uint32_t consume_uint32() { + uint32_t val = 0; + for (int i = 0; i < 4 && pos_ < size_; i++) { + val |= static_cast(data_[pos_++]) << (i * 8); + } + return val; + } + + int64_t consume_int64() { + int64_t val = 0; + for (int i = 0; i < 8 && pos_ < size_; i++) { + val |= static_cast(data_[pos_++]) << (i * 8); + } + return val; + } + + const char* consume_string(size_t max_len) { + if (max_len > sizeof(string_buf_) - 1) max_len = sizeof(string_buf_) - 1; + size_t avail = size_ - pos_; + size_t len = (avail < max_len) ? avail : max_len; + + // Copy to internal buffer and null-terminate + size_t actual_len = 0; + while (actual_len < len && pos_ < size_) { + char c = static_cast(data_[pos_++]); + if (c == '\0') break; + string_buf_[actual_len++] = c; + } + string_buf_[actual_len] = '\0'; + return string_buf_; + } + + size_t remaining() const { return size_ - pos_; } + +private: + const uint8_t *data_; + size_t size_; + size_t pos_; + char string_buf_[512]; +}; + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *buf, size_t len) { + if (len == 0 || len > kMaxInputSize) { + return 0; + } + + DataConsumer consumer(buf, len); + + struct archive_entry *entry = archive_entry_new(); + if (entry == NULL) { + return 0; + } + + // Set basic entry properties + archive_entry_set_pathname(entry, consumer.consume_string(256)); + archive_entry_set_size(entry, consumer.consume_int64()); + archive_entry_set_mode(entry, consumer.consume_uint32()); + archive_entry_set_uid(entry, consumer.consume_uint32()); + archive_entry_set_gid(entry, consumer.consume_uint32()); + archive_entry_set_mtime(entry, consumer.consume_int64(), 0); + archive_entry_set_atime(entry, consumer.consume_int64(), 0); + archive_entry_set_ctime(entry, consumer.consume_int64(), 0); + archive_entry_set_birthtime(entry, consumer.consume_int64(), 0); + + // Set various string fields + archive_entry_set_uname(entry, consumer.consume_string(64)); + archive_entry_set_gname(entry, consumer.consume_string(64)); + archive_entry_set_symlink(entry, consumer.consume_string(256)); + archive_entry_set_hardlink(entry, consumer.consume_string(256)); + + // Exercise ACL functions (low coverage targets) + int acl_type = consumer.consume_byte() & 0x0F; + int acl_permset = consumer.consume_uint32(); + int acl_tag = consumer.consume_byte() & 0x0F; + int acl_qual = consumer.consume_uint32(); + const char *acl_name = consumer.consume_string(64); + + archive_entry_acl_add_entry(entry, acl_type, acl_permset, acl_tag, acl_qual, acl_name); + + // Add more ACL entries based on remaining data + while (!consumer.empty() && consumer.remaining() > 10) { + acl_type = consumer.consume_byte() & 0x0F; + acl_permset = consumer.consume_uint32(); + acl_tag = consumer.consume_byte() & 0x0F; + acl_qual = consumer.consume_uint32(); + acl_name = consumer.consume_string(32); + archive_entry_acl_add_entry(entry, acl_type, acl_permset, acl_tag, acl_qual, acl_name); + } + + // Exercise ACL text conversion functions (archive_acl_to_text_* are uncovered) + ssize_t text_len; + char *acl_text = archive_entry_acl_to_text(entry, &text_len, ARCHIVE_ENTRY_ACL_TYPE_ACCESS); + if (acl_text) { + // Parse the text back + archive_entry_acl_from_text(entry, acl_text, ARCHIVE_ENTRY_ACL_TYPE_ACCESS); + free(acl_text); + } + + acl_text = archive_entry_acl_to_text(entry, &text_len, ARCHIVE_ENTRY_ACL_TYPE_DEFAULT); + if (acl_text) { + free(acl_text); + } + + acl_text = archive_entry_acl_to_text(entry, &text_len, ARCHIVE_ENTRY_ACL_TYPE_NFS4); + if (acl_text) { + free(acl_text); + } + + // Exercise wide character versions + wchar_t *acl_text_w = archive_entry_acl_to_text_w(entry, &text_len, ARCHIVE_ENTRY_ACL_TYPE_ACCESS); + if (acl_text_w) { + free(acl_text_w); + } + + // Get pathname variants + archive_entry_pathname(entry); + archive_entry_pathname_w(entry); + archive_entry_pathname_utf8(entry); + + // Clone the entry + struct archive_entry *entry2 = archive_entry_clone(entry); + if (entry2) { + archive_entry_free(entry2); + } + + // Clear and reuse + archive_entry_clear(entry); + + archive_entry_free(entry); + return 0; +} diff --git a/contrib/oss-fuzz/libarchive_filter_fuzzer.cc b/contrib/oss-fuzz/libarchive_filter_fuzzer.cc new file mode 100644 index 000000000..cba2b3de6 --- /dev/null +++ b/contrib/oss-fuzz/libarchive_filter_fuzzer.cc @@ -0,0 +1,78 @@ +/* + * Compression filter fuzzer for libarchive + * Tests decompression of gzip, bzip2, xz, lzma, zstd, lz4, etc. + */ +#include +#include +#include + +#include "archive.h" +#include "archive_entry.h" + +static constexpr size_t kMaxInputSize = 256 * 1024; + +struct Buffer { + const uint8_t *buf; + size_t len; + size_t pos; +}; + +static ssize_t reader_callback(struct archive *a, void *client_data, + const void **block) { + (void)a; + Buffer *buffer = reinterpret_cast(client_data); + *block = buffer->buf + buffer->pos; + ssize_t len = buffer->len - buffer->pos; + buffer->pos = buffer->len; + return len; +} + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *buf, size_t len) { + if (len == 0 || len > kMaxInputSize) { + return 0; + } + + struct archive *a = archive_read_new(); + if (a == NULL) { + return 0; + } + + // Enable raw format (just decompress, no archive format) + archive_read_support_format_raw(a); + + // Enable all compression filters + archive_read_support_filter_all(a); + + Buffer buffer = {buf, len, 0}; + if (archive_read_open(a, &buffer, NULL, reader_callback, NULL) != ARCHIVE_OK) { + archive_read_free(a); + return 0; + } + + std::vector data_buffer(8192, 0); + struct archive_entry *entry; + + if (archive_read_next_header(a, &entry) == ARCHIVE_OK) { + // Get filter info + int filter_count = archive_filter_count(a); + for (int i = 0; i < filter_count; i++) { + archive_filter_name(a, i); + archive_filter_code(a, i); + archive_filter_bytes(a, i); + } + + // Read all decompressed data + ssize_t total = 0; + ssize_t r; + while ((r = archive_read_data(a, data_buffer.data(), data_buffer.size())) > 0) { + total += r; + // Limit total decompressed size to prevent zip bombs + if (total > 10 * 1024 * 1024) { + break; + } + } + } + + archive_read_free(a); + return 0; +} diff --git a/contrib/oss-fuzz/libarchive_filter_fuzzer.dict b/contrib/oss-fuzz/libarchive_filter_fuzzer.dict new file mode 100644 index 000000000..2f780c9f4 --- /dev/null +++ b/contrib/oss-fuzz/libarchive_filter_fuzzer.dict @@ -0,0 +1,33 @@ +# Compression filter dictionary + +# GZIP magic +"\x1f\x8b" +"\x1f\x8b\x08" + +# BZIP2 magic +"BZh" +"BZ0" + +# XZ magic +"\xfd7zXZ\x00" + +# LZMA magic +"\x5d\x00\x00" + +# ZSTD magic +"\x28\xb5\x2f\xfd" + +# LZ4 magic +"\x04\x22\x4d\x18" + +# Compress (.Z) magic +"\x1f\x9d" + +# LZIP magic +"LZIP" + +# LRZIP magic +"LRZI" + +# LZO magic +"\x89LZO\x00\x0d\x0a\x1a\x0a" diff --git a/contrib/oss-fuzz/libarchive_filter_fuzzer.options b/contrib/oss-fuzz/libarchive_filter_fuzzer.options new file mode 100644 index 000000000..5a0374b31 --- /dev/null +++ b/contrib/oss-fuzz/libarchive_filter_fuzzer.options @@ -0,0 +1,10 @@ +[libfuzzer] +max_len = 262144 +timeout = 30 +rss_limit_mb = 2048 + +[honggfuzz] +timeout = 30 + +[afl] +timeout = 30 diff --git a/contrib/oss-fuzz/libarchive_fuzzer.dict b/contrib/oss-fuzz/libarchive_fuzzer.dict new file mode 100644 index 000000000..390b68567 --- /dev/null +++ b/contrib/oss-fuzz/libarchive_fuzzer.dict @@ -0,0 +1,76 @@ +# General libarchive dictionary covering multiple formats + +# TAR magic +"ustar" +"ustar\x00" +"ustar \x00" +"\x00\x00" + +# ZIP magic +"PK\x03\x04" +"PK\x05\x06" +"PK\x01\x02" +"PK\x07\x08" + +# 7z magic +"7z\xbc\xaf\x27\x1c" + +# RAR magic +"Rar!\x1a\x07\x00" +"Rar!\x1a\x07\x01\x00" + +# XAR magic +"xar!" + +# CPIO magic +"\xc7\x71" +"070701" +"070702" +"070707" + +# CAB magic +"MSCF" + +# LHA magic +"-lh" +"-lz" + +# AR magic +"!\x0a" + +# ISO9660 magic +"CD001" + +# GZIP magic +"\x1f\x8b" + +# BZIP2 magic +"BZ" +"BZh" + +# XZ magic +"\xfd7zXZ\x00" + +# LZMA magic +"\x5d\x00\x00" + +# ZSTD magic +"\x28\xb5\x2f\xfd" + +# LZ4 magic +"\x04\x22\x4d\x18" + +# Common paths +"/" +"./" +"../" +"./test" +"test.txt" +"test/" + +# Common attributes +"\x00\x00\x00\x00" +"\xff\xff\xff\xff" + +# Passphrase +"password" diff --git a/contrib/oss-fuzz/libarchive_fuzzer.options b/contrib/oss-fuzz/libarchive_fuzzer.options new file mode 100644 index 000000000..7b1139e29 --- /dev/null +++ b/contrib/oss-fuzz/libarchive_fuzzer.options @@ -0,0 +1,9 @@ +[libfuzzer] +max_len = 524288 +timeout = 30 + +[honggfuzz] +timeout = 30 + +[afl] +timeout = 30 diff --git a/contrib/oss-fuzz/libarchive_iso9660_fuzzer.cc b/contrib/oss-fuzz/libarchive_iso9660_fuzzer.cc new file mode 100644 index 000000000..9aa8316fe --- /dev/null +++ b/contrib/oss-fuzz/libarchive_iso9660_fuzzer.cc @@ -0,0 +1,70 @@ +/* + * ISO9660 format fuzzer for libarchive + * Tests ISO, Joliet, and Rock Ridge extensions + */ +#include +#include +#include + +#include "archive.h" +#include "archive_entry.h" + +static constexpr size_t kMaxInputSize = 1024 * 1024; // 1MB for ISO images + +struct Buffer { + const uint8_t *buf; + size_t len; +}; + +static ssize_t reader_callback(struct archive *a, void *client_data, + const void **block) { + (void)a; + Buffer *buffer = reinterpret_cast(client_data); + *block = buffer->buf; + ssize_t len = buffer->len; + buffer->len = 0; + return len; +} + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *buf, size_t len) { + if (len == 0 || len > kMaxInputSize) { + return 0; + } + + struct archive *a = archive_read_new(); + if (a == NULL) { + return 0; + } + + archive_read_support_format_iso9660(a); + archive_read_support_filter_all(a); + + // Set options to test various ISO extensions + archive_read_set_options(a, "iso9660:joliet,iso9660:rockridge"); + + Buffer buffer = {buf, len}; + if (archive_read_open(a, &buffer, NULL, reader_callback, NULL) != ARCHIVE_OK) { + archive_read_free(a); + return 0; + } + + std::vector data_buffer(4096, 0); + struct archive_entry *entry; + + while (archive_read_next_header(a, &entry) == ARCHIVE_OK) { + archive_entry_pathname(entry); + archive_entry_pathname_w(entry); + archive_entry_size(entry); + archive_entry_mtime(entry); + archive_entry_mode(entry); + archive_entry_symlink(entry); + archive_entry_hardlink(entry); + + ssize_t r; + while ((r = archive_read_data(a, data_buffer.data(), data_buffer.size())) > 0) + ; + } + + archive_read_free(a); + return 0; +} diff --git a/contrib/oss-fuzz/libarchive_iso9660_fuzzer.dict b/contrib/oss-fuzz/libarchive_iso9660_fuzzer.dict new file mode 100644 index 000000000..6dac3eaa5 --- /dev/null +++ b/contrib/oss-fuzz/libarchive_iso9660_fuzzer.dict @@ -0,0 +1,36 @@ +# ISO9660 format dictionary + +# Volume descriptor type +"\x00" +"\x01" +"\x02" +"\xff" + +# Standard identifier +"CD001" + +# Volume descriptor version +"\x01" + +# Joliet escape sequences +"%/@" +"%/C" +"%/E" + +# Rock Ridge signatures +"SP" +"RR" +"CE" +"PX" +"PN" +"SL" +"NM" +"CL" +"PL" +"RE" +"TF" +"SF" + +# System use +"ER" +"ES" diff --git a/contrib/oss-fuzz/libarchive_iso9660_fuzzer.options b/contrib/oss-fuzz/libarchive_iso9660_fuzzer.options new file mode 100644 index 000000000..f04ee45fd --- /dev/null +++ b/contrib/oss-fuzz/libarchive_iso9660_fuzzer.options @@ -0,0 +1,10 @@ +[libfuzzer] +max_len = 1048576 +timeout = 60 +rss_limit_mb = 2048 + +[honggfuzz] +timeout = 60 + +[afl] +timeout = 60 diff --git a/contrib/oss-fuzz/libarchive_lha_fuzzer.cc b/contrib/oss-fuzz/libarchive_lha_fuzzer.cc new file mode 100644 index 000000000..58732aead --- /dev/null +++ b/contrib/oss-fuzz/libarchive_lha_fuzzer.cc @@ -0,0 +1,66 @@ +/* + * LHA/LZH format fuzzer for libarchive + */ +#include +#include +#include + +#include "archive.h" +#include "archive_entry.h" + +static constexpr size_t kMaxInputSize = 512 * 1024; + +struct Buffer { + const uint8_t *buf; + size_t len; +}; + +static ssize_t reader_callback(struct archive *a, void *client_data, + const void **block) { + (void)a; + Buffer *buffer = reinterpret_cast(client_data); + *block = buffer->buf; + ssize_t len = buffer->len; + buffer->len = 0; + return len; +} + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *buf, size_t len) { + if (len == 0 || len > kMaxInputSize) { + return 0; + } + + struct archive *a = archive_read_new(); + if (a == NULL) { + return 0; + } + + archive_read_support_format_lha(a); + archive_read_support_filter_all(a); + + Buffer buffer = {buf, len}; + if (archive_read_open(a, &buffer, NULL, reader_callback, NULL) != ARCHIVE_OK) { + archive_read_free(a); + return 0; + } + + std::vector data_buffer(4096, 0); + struct archive_entry *entry; + + while (archive_read_next_header(a, &entry) == ARCHIVE_OK) { + archive_entry_pathname(entry); + archive_entry_pathname_w(entry); + archive_entry_size(entry); + archive_entry_mtime(entry); + archive_entry_mode(entry); + archive_entry_uid(entry); + archive_entry_gid(entry); + + ssize_t r; + while ((r = archive_read_data(a, data_buffer.data(), data_buffer.size())) > 0) + ; + } + + archive_read_free(a); + return 0; +} diff --git a/contrib/oss-fuzz/libarchive_lha_fuzzer.dict b/contrib/oss-fuzz/libarchive_lha_fuzzer.dict new file mode 100644 index 000000000..38ca18406 --- /dev/null +++ b/contrib/oss-fuzz/libarchive_lha_fuzzer.dict @@ -0,0 +1,26 @@ +# LHA/LZH format dictionary + +# Compression methods +"-lh0-" +"-lh1-" +"-lh2-" +"-lh3-" +"-lh4-" +"-lh5-" +"-lh6-" +"-lh7-" +"-lhd-" +"-lzs-" +"-lz4-" +"-lz5-" + +# OS type +"\x00" +"\x4d" +"\x55" + +# Header levels +"\x00" +"\x01" +"\x02" +"\x03" diff --git a/contrib/oss-fuzz/libarchive_linkify_fuzzer.cc b/contrib/oss-fuzz/libarchive_linkify_fuzzer.cc new file mode 100644 index 000000000..46348dac7 --- /dev/null +++ b/contrib/oss-fuzz/libarchive_linkify_fuzzer.cc @@ -0,0 +1,161 @@ +/* + * Archive entry link resolver fuzzer for libarchive + * Targets archive_entry_linkify (complexity: 775, zero coverage) + */ +#include +#include +#include +#include + +#include "archive.h" +#include "archive_entry.h" + +static constexpr size_t kMaxInputSize = 64 * 1024; // 64KB + +// Simple data consumer +class DataConsumer { +public: + DataConsumer(const uint8_t *data, size_t size) : data_(data), size_(size), pos_(0) { + memset(string_buf_, 0, sizeof(string_buf_)); + } + + bool empty() const { return pos_ >= size_; } + + uint8_t consume_byte() { + if (pos_ >= size_) return 0; + return data_[pos_++]; + } + + uint32_t consume_uint32() { + uint32_t val = 0; + for (int i = 0; i < 4 && pos_ < size_; i++) { + val |= static_cast(data_[pos_++]) << (i * 8); + } + return val; + } + + int64_t consume_int64() { + int64_t val = 0; + for (int i = 0; i < 8 && pos_ < size_; i++) { + val |= static_cast(data_[pos_++]) << (i * 8); + } + return val; + } + + const char* consume_string(size_t max_len) { + if (max_len > sizeof(string_buf_) - 1) max_len = sizeof(string_buf_) - 1; + size_t avail = size_ - pos_; + size_t len = (avail < max_len) ? avail : max_len; + + size_t actual_len = 0; + while (actual_len < len && pos_ < size_) { + char c = static_cast(data_[pos_++]); + if (c == '\0') break; + string_buf_[actual_len++] = c; + } + string_buf_[actual_len] = '\0'; + return string_buf_; + } + + size_t remaining() const { return size_ - pos_; } + +private: + const uint8_t *data_; + size_t size_; + size_t pos_; + char string_buf_[256]; +}; + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *buf, size_t len) { + if (len == 0 || len > kMaxInputSize) { + return 0; + } + + DataConsumer consumer(buf, len); + + // Create a link resolver + struct archive_entry_linkresolver *resolver = archive_entry_linkresolver_new(); + if (resolver == NULL) { + return 0; + } + + // Set the format strategy based on input + uint8_t strategy = consumer.consume_byte() % 5; + int format; + switch (strategy) { + case 0: format = ARCHIVE_FORMAT_TAR_GNUTAR; break; + case 1: format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE; break; + case 2: format = ARCHIVE_FORMAT_CPIO_POSIX; break; + case 3: format = ARCHIVE_FORMAT_CPIO_SVR4_NOCRC; break; + default: format = ARCHIVE_FORMAT_TAR_USTAR; break; + } + archive_entry_linkresolver_set_strategy(resolver, format); + + // Create multiple entries to test linkify with hardlinks + struct archive_entry *entries[32]; + int num_entries = 0; + + while (!consumer.empty() && num_entries < 32 && consumer.remaining() > 20) { + struct archive_entry *entry = archive_entry_new(); + if (entry == NULL) break; + + // Set pathname + archive_entry_set_pathname(entry, consumer.consume_string(64)); + + // Set inode and device for hardlink detection + archive_entry_set_ino(entry, consumer.consume_int64()); + archive_entry_set_dev(entry, consumer.consume_uint32()); + archive_entry_set_nlink(entry, (consumer.consume_byte() % 5) + 1); + + // Set mode (regular file or directory) + uint8_t ftype = consumer.consume_byte() % 2; + mode_t mode = ftype ? (S_IFDIR | 0755) : (S_IFREG | 0644); + archive_entry_set_mode(entry, mode); + + archive_entry_set_size(entry, consumer.consume_int64() & 0xFFFF); + archive_entry_set_uid(entry, consumer.consume_uint32() & 0xFFFF); + archive_entry_set_gid(entry, consumer.consume_uint32() & 0xFFFF); + + entries[num_entries++] = entry; + } + + // Now run all entries through the linkresolver + for (int i = 0; i < num_entries; i++) { + struct archive_entry *entry = entries[i]; + struct archive_entry *spare = NULL; + + // This is the main function we want to fuzz (zero coverage) + archive_entry_linkify(resolver, &entry, &spare); + + // entry and spare may be modified by linkify + // We still need to free the original entries we allocated + if (spare != NULL) { + archive_entry_free(spare); + } + } + + // Free remaining entries from the resolver + struct archive_entry *entry = NULL; + struct archive_entry *spare = NULL; + while (1) { + archive_entry_linkify(resolver, &entry, &spare); + if (entry == NULL) + break; + archive_entry_free(entry); + entry = NULL; + if (spare != NULL) { + archive_entry_free(spare); + spare = NULL; + } + } + + // Free all our created entries + for (int i = 0; i < num_entries; i++) { + if (entries[i] != NULL) { + archive_entry_free(entries[i]); + } + } + + archive_entry_linkresolver_free(resolver); + return 0; +} diff --git a/contrib/oss-fuzz/libarchive_match_fuzzer.cc b/contrib/oss-fuzz/libarchive_match_fuzzer.cc new file mode 100644 index 000000000..c5431b34b --- /dev/null +++ b/contrib/oss-fuzz/libarchive_match_fuzzer.cc @@ -0,0 +1,139 @@ +/* + * Archive match fuzzer for libarchive + * Tests pattern matching, time matching, and owner matching + */ +#include +#include +#include + +#include "archive.h" +#include "archive_entry.h" + +static constexpr size_t kMaxInputSize = 32 * 1024; + +class DataConsumer { +public: + DataConsumer(const uint8_t *data, size_t size) : data_(data), size_(size), pos_(0) { + memset(string_buf_, 0, sizeof(string_buf_)); + } + + bool empty() const { return pos_ >= size_; } + + uint8_t consume_byte() { + if (pos_ >= size_) return 0; + return data_[pos_++]; + } + + int64_t consume_int64() { + int64_t val = 0; + for (int i = 0; i < 8 && pos_ < size_; i++) { + val |= static_cast(data_[pos_++]) << (i * 8); + } + return val; + } + + const char* consume_string(size_t max_len) { + if (max_len > sizeof(string_buf_) - 1) max_len = sizeof(string_buf_) - 1; + size_t avail = size_ - pos_; + size_t len = (avail < max_len) ? avail : max_len; + + size_t actual_len = 0; + while (actual_len < len && pos_ < size_) { + char c = static_cast(data_[pos_++]); + if (c == '\0') break; + string_buf_[actual_len++] = c; + } + string_buf_[actual_len] = '\0'; + return string_buf_; + } + + size_t remaining() const { return size_ - pos_; } + +private: + const uint8_t *data_; + size_t size_; + size_t pos_; + char string_buf_[256]; +}; + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *buf, size_t len) { + if (len == 0 || len > kMaxInputSize) { + return 0; + } + + DataConsumer consumer(buf, len); + + struct archive *match = archive_match_new(); + if (match == NULL) { + return 0; + } + + // Add various match patterns + while (!consumer.empty() && consumer.remaining() > 5) { + uint8_t match_type = consumer.consume_byte() % 6; + + switch (match_type) { + case 0: { + // Pattern exclusion + const char *pattern = consumer.consume_string(64); + archive_match_exclude_pattern(match, pattern); + break; + } + case 1: { + // Pattern inclusion + const char *pattern = consumer.consume_string(64); + archive_match_include_pattern(match, pattern); + break; + } + case 2: { + // Time comparison (newer than) + int64_t sec = consumer.consume_int64(); + int64_t nsec = consumer.consume_int64() % 1000000000; + archive_match_include_time(match, ARCHIVE_MATCH_MTIME | ARCHIVE_MATCH_NEWER, + sec, nsec); + break; + } + case 3: { + // Time comparison (older than) + int64_t sec = consumer.consume_int64(); + int64_t nsec = consumer.consume_int64() % 1000000000; + archive_match_include_time(match, ARCHIVE_MATCH_MTIME | ARCHIVE_MATCH_OLDER, + sec, nsec); + break; + } + case 4: { + // UID inclusion + int64_t uid = consumer.consume_int64() & 0xFFFF; + archive_match_include_uid(match, uid); + break; + } + case 5: { + // GID inclusion + int64_t gid = consumer.consume_int64() & 0xFFFF; + archive_match_include_gid(match, gid); + break; + } + } + } + + // Create a test entry and check if it matches + struct archive_entry *entry = archive_entry_new(); + if (entry) { + archive_entry_set_pathname(entry, "test/file.txt"); + archive_entry_set_mtime(entry, 1234567890, 0); + archive_entry_set_uid(entry, 1000); + archive_entry_set_gid(entry, 1000); + archive_entry_set_mode(entry, 0644 | 0100000); // Regular file + + // Test matching + archive_match_path_excluded(match, entry); + archive_match_time_excluded(match, entry); + archive_match_owner_excluded(match, entry); + archive_match_excluded(match, entry); + + archive_entry_free(entry); + } + + archive_match_free(match); + return 0; +} diff --git a/contrib/oss-fuzz/libarchive_mtree_fuzzer.cc b/contrib/oss-fuzz/libarchive_mtree_fuzzer.cc new file mode 100644 index 000000000..e0c39249c --- /dev/null +++ b/contrib/oss-fuzz/libarchive_mtree_fuzzer.cc @@ -0,0 +1,73 @@ +/* + * MTREE format fuzzer for libarchive + * Tests mtree manifest parsing + */ +#include +#include +#include + +#include "archive.h" +#include "archive_entry.h" + +static constexpr size_t kMaxInputSize = 256 * 1024; + +struct Buffer { + const uint8_t *buf; + size_t len; +}; + +static ssize_t reader_callback(struct archive *a, void *client_data, + const void **block) { + (void)a; + Buffer *buffer = reinterpret_cast(client_data); + *block = buffer->buf; + ssize_t len = buffer->len; + buffer->len = 0; + return len; +} + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *buf, size_t len) { + if (len == 0 || len > kMaxInputSize) { + return 0; + } + + struct archive *a = archive_read_new(); + if (a == NULL) { + return 0; + } + + archive_read_support_format_mtree(a); + archive_read_support_filter_all(a); + + // Enable checkfs option to test more code paths + archive_read_set_options(a, "mtree:checkfs"); + + Buffer buffer = {buf, len}; + if (archive_read_open(a, &buffer, NULL, reader_callback, NULL) != ARCHIVE_OK) { + archive_read_free(a); + return 0; + } + + std::vector data_buffer(4096, 0); + struct archive_entry *entry; + + while (archive_read_next_header(a, &entry) == ARCHIVE_OK) { + archive_entry_pathname(entry); + archive_entry_size(entry); + archive_entry_mtime(entry); + archive_entry_mode(entry); + archive_entry_uid(entry); + archive_entry_gid(entry); + archive_entry_uname(entry); + archive_entry_gname(entry); + archive_entry_symlink(entry); + archive_entry_fflags_text(entry); + + ssize_t r; + while ((r = archive_read_data(a, data_buffer.data(), data_buffer.size())) > 0) + ; + } + + archive_read_free(a); + return 0; +} diff --git a/contrib/oss-fuzz/libarchive_mtree_fuzzer.dict b/contrib/oss-fuzz/libarchive_mtree_fuzzer.dict new file mode 100644 index 000000000..7241ea5d2 --- /dev/null +++ b/contrib/oss-fuzz/libarchive_mtree_fuzzer.dict @@ -0,0 +1,47 @@ +# MTREE format dictionary + +# Keywords +"/set" +"/unset" +".." + +# File types +"type=file" +"type=dir" +"type=link" +"type=block" +"type=char" +"type=fifo" +"type=socket" + +# Attributes +"mode=" +"uid=" +"gid=" +"uname=" +"gname=" +"size=" +"time=" +"link=" +"cksum=" +"md5=" +"md5digest=" +"sha1=" +"sha1digest=" +"sha256=" +"sha256digest=" +"sha384=" +"sha384digest=" +"sha512=" +"sha512digest=" +"rmd160=" +"rmd160digest=" +"flags=" +"nlink=" +"inode=" +"device=" +"resdevice=" +"contents=" +"optional" +"ignore" +"nochange" diff --git a/contrib/oss-fuzz/libarchive_rar5_fuzzer.cc b/contrib/oss-fuzz/libarchive_rar5_fuzzer.cc new file mode 100644 index 000000000..4850879b3 --- /dev/null +++ b/contrib/oss-fuzz/libarchive_rar5_fuzzer.cc @@ -0,0 +1,73 @@ +/* + * RAR5 format specific fuzzer for libarchive + * Targets RAR5 parsing code paths + */ +#include +#include +#include + +#include "archive.h" +#include "archive_entry.h" + +static constexpr size_t kMaxInputSize = 512 * 1024; // 512KB + +struct Buffer { + const uint8_t *buf; + size_t len; +}; + +static ssize_t reader_callback(struct archive *a, void *client_data, + const void **block) { + (void)a; + Buffer *buffer = reinterpret_cast(client_data); + *block = buffer->buf; + ssize_t len = buffer->len; + buffer->len = 0; + return len; +} + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *buf, size_t len) { + if (len == 0 || len > kMaxInputSize) { + return 0; + } + + struct archive *a = archive_read_new(); + if (a == NULL) { + return 0; + } + + // Enable RAR5 format specifically + archive_read_support_format_rar5(a); + // Enable common filters + archive_read_support_filter_all(a); + + // Set passphrase for encrypted archives + archive_read_add_passphrase(a, "password"); + + Buffer buffer = {buf, len}; + if (archive_read_open(a, &buffer, NULL, reader_callback, NULL) != ARCHIVE_OK) { + archive_read_free(a); + return 0; + } + + std::vector data_buffer(4096, 0); + struct archive_entry *entry; + + while (archive_read_next_header(a, &entry) == ARCHIVE_OK) { + // Exercise entry metadata access + archive_entry_pathname(entry); + archive_entry_pathname_w(entry); + archive_entry_size(entry); + archive_entry_mtime(entry); + archive_entry_mode(entry); + archive_entry_is_encrypted(entry); + + // Read data + ssize_t r; + while ((r = archive_read_data(a, data_buffer.data(), data_buffer.size())) > 0) + ; + } + + archive_read_free(a); + return 0; +} diff --git a/contrib/oss-fuzz/libarchive_rar5_fuzzer.dict b/contrib/oss-fuzz/libarchive_rar5_fuzzer.dict new file mode 100644 index 000000000..f1e431157 --- /dev/null +++ b/contrib/oss-fuzz/libarchive_rar5_fuzzer.dict @@ -0,0 +1,37 @@ +# RAR5 format dictionary +# Magic bytes (RAR5 signature) +"Rar!\x1a\x07\x01\x00" +"\x52\x61\x72\x21\x1a\x07\x01\x00" + +# Common header types +"\x01" +"\x02" +"\x03" +"\x04" +"\x05" + +# Common flags +"\x00\x00" +"\x01\x00" +"\x02\x00" +"\x04\x00" + +# Compression methods +"\x00" +"\x01" +"\x02" +"\x03" +"\x04" +"\x05" + +# File attributes +"\x20\x00\x00\x00" +"\x10\x00\x00\x00" + +# Encryption marker +"\x80" +"password" +"Password" + +# End of archive +"\x1d\x77\x56\x51\x03\x05\x04\x00" diff --git a/contrib/oss-fuzz/libarchive_rar_fuzzer.cc b/contrib/oss-fuzz/libarchive_rar_fuzzer.cc new file mode 100644 index 000000000..bf88a8b18 --- /dev/null +++ b/contrib/oss-fuzz/libarchive_rar_fuzzer.cc @@ -0,0 +1,68 @@ +/* + * RAR v4 format fuzzer for libarchive + */ +#include +#include +#include + +#include "archive.h" +#include "archive_entry.h" + +static constexpr size_t kMaxInputSize = 512 * 1024; + +struct Buffer { + const uint8_t *buf; + size_t len; +}; + +static ssize_t reader_callback(struct archive *a, void *client_data, + const void **block) { + (void)a; + Buffer *buffer = reinterpret_cast(client_data); + *block = buffer->buf; + ssize_t len = buffer->len; + buffer->len = 0; + return len; +} + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *buf, size_t len) { + if (len == 0 || len > kMaxInputSize) { + return 0; + } + + struct archive *a = archive_read_new(); + if (a == NULL) { + return 0; + } + + archive_read_support_format_rar(a); + archive_read_support_filter_all(a); + + // Add passphrase for encrypted RARs + archive_read_add_passphrase(a, "password"); + + Buffer buffer = {buf, len}; + if (archive_read_open(a, &buffer, NULL, reader_callback, NULL) != ARCHIVE_OK) { + archive_read_free(a); + return 0; + } + + std::vector data_buffer(4096, 0); + struct archive_entry *entry; + + while (archive_read_next_header(a, &entry) == ARCHIVE_OK) { + archive_entry_pathname(entry); + archive_entry_pathname_w(entry); + archive_entry_size(entry); + archive_entry_mtime(entry); + archive_entry_mode(entry); + archive_entry_is_encrypted(entry); + + ssize_t r; + while ((r = archive_read_data(a, data_buffer.data(), data_buffer.size())) > 0) + ; + } + + archive_read_free(a); + return 0; +} diff --git a/contrib/oss-fuzz/libarchive_read_disk_fuzzer.cc b/contrib/oss-fuzz/libarchive_read_disk_fuzzer.cc new file mode 100644 index 000000000..5d7fecf78 --- /dev/null +++ b/contrib/oss-fuzz/libarchive_read_disk_fuzzer.cc @@ -0,0 +1,115 @@ +/* + * Archive read disk fuzzer for libarchive + * Tests filesystem traversal and entry creation from paths + * Security-critical: path traversal, symlink handling + */ +#include +#include +#include +#include +#include +#include + +#include "archive.h" +#include "archive_entry.h" + +static constexpr size_t kMaxInputSize = 16 * 1024; + +class DataConsumer { +public: + DataConsumer(const uint8_t *data, size_t size) : data_(data), size_(size), pos_(0) { + memset(string_buf_, 0, sizeof(string_buf_)); + } + + bool empty() const { return pos_ >= size_; } + + uint8_t consume_byte() { + if (pos_ >= size_) return 0; + return data_[pos_++]; + } + + const char* consume_string(size_t max_len) { + if (max_len > sizeof(string_buf_) - 1) max_len = sizeof(string_buf_) - 1; + size_t avail = size_ - pos_; + size_t len = (avail < max_len) ? avail : max_len; + + size_t actual_len = 0; + while (actual_len < len && pos_ < size_) { + char c = static_cast(data_[pos_++]); + if (c == '\0') break; + // Sanitize path characters for safety + if (c == '/' || c == '\\' || c == ':' || c == '\n' || c == '\r') { + c = '_'; + } + string_buf_[actual_len++] = c; + } + string_buf_[actual_len] = '\0'; + return string_buf_; + } + + size_t remaining() const { return size_ - pos_; } + +private: + const uint8_t *data_; + size_t size_; + size_t pos_; + char string_buf_[256]; +}; + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *buf, size_t len) { + if (len == 0 || len > kMaxInputSize) { + return 0; + } + + DataConsumer consumer(buf, len); + + struct archive *a = archive_read_disk_new(); + if (a == NULL) { + return 0; + } + + // Configure disk reader behavior + uint8_t flags = consumer.consume_byte(); + if (flags & 0x01) { + archive_read_disk_set_symlink_logical(a); + } else if (flags & 0x02) { + archive_read_disk_set_symlink_physical(a); + } else { + archive_read_disk_set_symlink_hybrid(a); + } + + archive_read_disk_set_standard_lookup(a); + + // Set behavior flags + int behavior = 0; + if (flags & 0x04) behavior |= ARCHIVE_READDISK_RESTORE_ATIME; + if (flags & 0x08) behavior |= ARCHIVE_READDISK_HONOR_NODUMP; + if (flags & 0x10) behavior |= ARCHIVE_READDISK_NO_TRAVERSE_MOUNTS; + archive_read_disk_set_behavior(a, behavior); + + // Create an entry and test entry_from_file with various paths + struct archive_entry *entry = archive_entry_new(); + if (entry) { + // Test with /tmp (safe, always exists) + archive_entry_copy_pathname(entry, "/tmp"); + archive_read_disk_entry_from_file(a, entry, -1, NULL); + + // Get entry info + archive_entry_pathname(entry); + archive_entry_size(entry); + archive_entry_mode(entry); + archive_entry_uid(entry); + archive_entry_gid(entry); + + // Test name lookups + archive_read_disk_gname(a, 0); + archive_read_disk_uname(a, 0); + archive_read_disk_gname(a, 1000); + archive_read_disk_uname(a, 1000); + + archive_entry_free(entry); + } + + archive_read_free(a); + return 0; +} diff --git a/contrib/oss-fuzz/libarchive_roundtrip_fuzzer.cc b/contrib/oss-fuzz/libarchive_roundtrip_fuzzer.cc new file mode 100644 index 000000000..abe2b22af --- /dev/null +++ b/contrib/oss-fuzz/libarchive_roundtrip_fuzzer.cc @@ -0,0 +1,162 @@ +/* + * Archive roundtrip fuzzer for libarchive + * Writes an archive then reads it back - tests write/read consistency + */ +#include +#include +#include +#include +#include + +#include "archive.h" +#include "archive_entry.h" + +static constexpr size_t kMaxInputSize = 64 * 1024; + +class DataConsumer { +public: + DataConsumer(const uint8_t *data, size_t size) : data_(data), size_(size), pos_(0) { + memset(string_buf_, 0, sizeof(string_buf_)); + } + + bool empty() const { return pos_ >= size_; } + + uint8_t consume_byte() { + if (pos_ >= size_) return 0; + return data_[pos_++]; + } + + uint32_t consume_uint32() { + uint32_t val = 0; + for (int i = 0; i < 4 && pos_ < size_; i++) { + val |= static_cast(data_[pos_++]) << (i * 8); + } + return val; + } + + const char* consume_string(size_t max_len) { + if (max_len > sizeof(string_buf_) - 1) max_len = sizeof(string_buf_) - 1; + size_t avail = size_ - pos_; + size_t len = (avail < max_len) ? avail : max_len; + + size_t actual_len = 0; + while (actual_len < len && pos_ < size_) { + char c = static_cast(data_[pos_++]); + if (c == '\0') break; + string_buf_[actual_len++] = c; + } + string_buf_[actual_len] = '\0'; + return string_buf_; + } + + const uint8_t* consume_bytes(size_t *out_len, size_t max_len) { + size_t avail = size_ - pos_; + size_t len = (avail < max_len) ? avail : max_len; + const uint8_t *ptr = data_ + pos_; + pos_ += len; + *out_len = len; + return ptr; + } + + size_t remaining() const { return size_ - pos_; } + +private: + const uint8_t *data_; + size_t size_; + size_t pos_; + char string_buf_[128]; +}; + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *buf, size_t len) { + if (len < 10 || len > kMaxInputSize) { + return 0; + } + + DataConsumer consumer(buf, len); + std::vector archive_data; + archive_data.reserve(len * 2); + + // Phase 1: Write an archive + struct archive *writer = archive_write_new(); + if (writer == NULL) { + return 0; + } + + // Select format + uint8_t format = consumer.consume_byte() % 5; + switch (format) { + case 0: archive_write_set_format_pax_restricted(writer); break; + case 1: archive_write_set_format_ustar(writer); break; + case 2: archive_write_set_format_cpio_newc(writer); break; + case 3: archive_write_set_format_zip(writer); break; + default: archive_write_set_format_gnutar(writer); break; + } + + archive_write_add_filter_none(writer); + + // Open to memory + size_t used = 0; + archive_data.resize(len * 4); + if (archive_write_open_memory(writer, archive_data.data(), archive_data.size(), &used) != ARCHIVE_OK) { + archive_write_free(writer); + return 0; + } + + // Write entries + int entry_count = 0; + while (!consumer.empty() && entry_count < 5 && consumer.remaining() > 10) { + struct archive_entry *entry = archive_entry_new(); + if (entry == NULL) break; + + archive_entry_set_pathname(entry, consumer.consume_string(32)); + archive_entry_set_mode(entry, S_IFREG | 0644); + archive_entry_set_uid(entry, consumer.consume_uint32() & 0xFFFF); + archive_entry_set_gid(entry, consumer.consume_uint32() & 0xFFFF); + + size_t data_len; + const uint8_t *data = consumer.consume_bytes(&data_len, 256); + archive_entry_set_size(entry, data_len); + + if (archive_write_header(writer, entry) == ARCHIVE_OK && data_len > 0) { + archive_write_data(writer, data, data_len); + } + + archive_entry_free(entry); + entry_count++; + } + + archive_write_close(writer); + archive_write_free(writer); + + if (used == 0) { + return 0; + } + + // Phase 2: Read the archive back + struct archive *reader = archive_read_new(); + if (reader == NULL) { + return 0; + } + + archive_read_support_format_all(reader); + archive_read_support_filter_all(reader); + + if (archive_read_open_memory(reader, archive_data.data(), used) != ARCHIVE_OK) { + archive_read_free(reader); + return 0; + } + + std::vector read_buffer(4096, 0); + struct archive_entry *entry; + while (archive_read_next_header(reader, &entry) == ARCHIVE_OK) { + archive_entry_pathname(entry); + archive_entry_size(entry); + + ssize_t r; + while ((r = archive_read_data(reader, read_buffer.data(), read_buffer.size())) > 0) + ; + } + + archive_read_free(reader); + return 0; +} diff --git a/contrib/oss-fuzz/libarchive_roundtrip_fuzzer.options b/contrib/oss-fuzz/libarchive_roundtrip_fuzzer.options new file mode 100644 index 000000000..1489609db --- /dev/null +++ b/contrib/oss-fuzz/libarchive_roundtrip_fuzzer.options @@ -0,0 +1,3 @@ +[libfuzzer] +max_len = 65536 +timeout = 30 diff --git a/contrib/oss-fuzz/libarchive_seek_fuzzer.cc b/contrib/oss-fuzz/libarchive_seek_fuzzer.cc new file mode 100644 index 000000000..148727c7d --- /dev/null +++ b/contrib/oss-fuzz/libarchive_seek_fuzzer.cc @@ -0,0 +1,124 @@ +/* + * Archive seek/read fuzzer for libarchive + * Tests seeking within archives and reading at random positions + */ +#include +#include +#include +#include + +#include "archive.h" +#include "archive_entry.h" + +static constexpr size_t kMaxInputSize = 256 * 1024; + +struct SeekableBuffer { + const uint8_t *buf; + size_t len; + size_t pos; +}; + +static ssize_t seek_read_callback(struct archive *a, void *client_data, + const void **block) { + (void)a; + SeekableBuffer *buffer = reinterpret_cast(client_data); + if (buffer->pos >= buffer->len) { + *block = NULL; + return 0; + } + *block = buffer->buf + buffer->pos; + size_t avail = buffer->len - buffer->pos; + size_t to_read = (avail > 4096) ? 4096 : avail; + buffer->pos += to_read; + return to_read; +} + +static la_int64_t seek_callback(struct archive *a, void *client_data, + la_int64_t offset, int whence) { + (void)a; + SeekableBuffer *buffer = reinterpret_cast(client_data); + la_int64_t new_pos; + + switch (whence) { + case SEEK_SET: + new_pos = offset; + break; + case SEEK_CUR: + new_pos = static_cast(buffer->pos) + offset; + break; + case SEEK_END: + new_pos = static_cast(buffer->len) + offset; + break; + default: + return ARCHIVE_FATAL; + } + + if (new_pos < 0) new_pos = 0; + if (new_pos > static_cast(buffer->len)) + new_pos = static_cast(buffer->len); + + buffer->pos = static_cast(new_pos); + return new_pos; +} + +static la_int64_t skip_callback(struct archive *a, void *client_data, + la_int64_t request) { + (void)a; + SeekableBuffer *buffer = reinterpret_cast(client_data); + size_t avail = buffer->len - buffer->pos; + la_int64_t to_skip = (request > static_cast(avail)) + ? static_cast(avail) + : request; + buffer->pos += static_cast(to_skip); + return to_skip; +} + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *buf, size_t len) { + if (len == 0 || len > kMaxInputSize) { + return 0; + } + + struct archive *a = archive_read_new(); + if (a == NULL) { + return 0; + } + + // Enable formats that benefit from seeking + archive_read_support_format_zip_seekable(a); + archive_read_support_format_7zip(a); + archive_read_support_format_rar(a); + archive_read_support_format_rar5(a); + archive_read_support_format_iso9660(a); + archive_read_support_filter_all(a); + + SeekableBuffer buffer = {buf, len, 0}; + + archive_read_set_read_callback(a, seek_read_callback); + archive_read_set_seek_callback(a, seek_callback); + archive_read_set_skip_callback(a, skip_callback); + archive_read_set_callback_data(a, &buffer); + + if (archive_read_open1(a) != ARCHIVE_OK) { + archive_read_free(a); + return 0; + } + + std::vector data_buffer(4096, 0); + struct archive_entry *entry; + int entry_count = 0; + + while (archive_read_next_header(a, &entry) == ARCHIVE_OK && entry_count < 50) { + archive_entry_pathname(entry); + archive_entry_size(entry); + + // Read data which may trigger seeks + ssize_t r; + while ((r = archive_read_data(a, data_buffer.data(), data_buffer.size())) > 0) + ; + + entry_count++; + } + + archive_read_free(a); + return 0; +} diff --git a/contrib/oss-fuzz/libarchive_seek_fuzzer.options b/contrib/oss-fuzz/libarchive_seek_fuzzer.options new file mode 100644 index 000000000..4821a7059 --- /dev/null +++ b/contrib/oss-fuzz/libarchive_seek_fuzzer.options @@ -0,0 +1,3 @@ +[libfuzzer] +max_len = 262144 +timeout = 30 diff --git a/contrib/oss-fuzz/libarchive_string_fuzzer.cc b/contrib/oss-fuzz/libarchive_string_fuzzer.cc new file mode 100644 index 000000000..7fe3a99dc --- /dev/null +++ b/contrib/oss-fuzz/libarchive_string_fuzzer.cc @@ -0,0 +1,143 @@ +/* + * Archive string/encoding conversion fuzzer for libarchive + * Tests character encoding conversions which are often vulnerability sources + */ +#include +#include +#include +#include +#include + +#include "archive.h" +#include "archive_entry.h" + +static constexpr size_t kMaxInputSize = 32 * 1024; + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *buf, size_t len) { + if (len == 0 || len > kMaxInputSize) { + return 0; + } + + struct archive_entry *entry = archive_entry_new(); + if (entry == NULL) { + return 0; + } + + // Reserve some bytes for control + if (len < 4) { + archive_entry_free(entry); + return 0; + } + + uint8_t test_type = buf[0]; + const char *str = reinterpret_cast(buf + 1); + size_t str_len = len - 1; + + // Ensure null termination for string operations + char *safe_str = static_cast(malloc(str_len + 1)); + if (safe_str == NULL) { + archive_entry_free(entry); + return 0; + } + memcpy(safe_str, str, str_len); + safe_str[str_len] = '\0'; + + // Test various string functions based on type + switch (test_type % 10) { + case 0: + // Pathname conversions + archive_entry_set_pathname(entry, safe_str); + archive_entry_pathname(entry); + archive_entry_pathname_w(entry); + archive_entry_pathname_utf8(entry); + break; + + case 1: + // Symlink conversions + archive_entry_set_symlink(entry, safe_str); + archive_entry_symlink(entry); + archive_entry_symlink_w(entry); + archive_entry_symlink_utf8(entry); + break; + + case 2: + // Hardlink conversions + archive_entry_set_hardlink(entry, safe_str); + archive_entry_hardlink(entry); + archive_entry_hardlink_w(entry); + archive_entry_hardlink_utf8(entry); + break; + + case 3: + // Username conversions + archive_entry_set_uname(entry, safe_str); + archive_entry_uname(entry); + archive_entry_uname_w(entry); + archive_entry_uname_utf8(entry); + break; + + case 4: + // Group name conversions + archive_entry_set_gname(entry, safe_str); + archive_entry_gname(entry); + archive_entry_gname_w(entry); + archive_entry_gname_utf8(entry); + break; + + case 5: + // Copy functions + archive_entry_copy_pathname(entry, safe_str); + archive_entry_copy_symlink(entry, safe_str); + archive_entry_copy_hardlink(entry, safe_str); + break; + + case 6: + // UTF-8 specific + archive_entry_update_pathname_utf8(entry, safe_str); + archive_entry_update_symlink_utf8(entry, safe_str); + archive_entry_update_hardlink_utf8(entry, safe_str); + break; + + case 7: + // Fflags text + archive_entry_copy_fflags_text(entry, safe_str); + archive_entry_fflags_text(entry); + break; + + case 8: + // ACL text parsing + archive_entry_acl_from_text(entry, safe_str, ARCHIVE_ENTRY_ACL_TYPE_ACCESS); + archive_entry_acl_from_text(entry, safe_str, ARCHIVE_ENTRY_ACL_TYPE_DEFAULT); + archive_entry_acl_from_text(entry, safe_str, ARCHIVE_ENTRY_ACL_TYPE_NFS4); + break; + + case 9: { + // Wide character operations + size_t wlen = str_len; + wchar_t *wstr = static_cast(malloc((wlen + 1) * sizeof(wchar_t))); + if (wstr) { + mbstowcs(wstr, safe_str, wlen); + wstr[wlen] = L'\0'; + + archive_entry_copy_pathname_w(entry, wstr); + archive_entry_pathname_w(entry); + + archive_entry_copy_symlink_w(entry, wstr); + archive_entry_symlink_w(entry); + + free(wstr); + } + break; + } + } + + // Clone and compare + struct archive_entry *entry2 = archive_entry_clone(entry); + if (entry2) { + archive_entry_free(entry2); + } + + free(safe_str); + archive_entry_free(entry); + return 0; +} diff --git a/contrib/oss-fuzz/libarchive_tar_fuzzer.cc b/contrib/oss-fuzz/libarchive_tar_fuzzer.cc new file mode 100644 index 000000000..48e9d700b --- /dev/null +++ b/contrib/oss-fuzz/libarchive_tar_fuzzer.cc @@ -0,0 +1,100 @@ +/* + * TAR format fuzzer for libarchive + * Tests all TAR variants: ustar, pax, gnutar, v7, oldgnu + */ +#include +#include +#include + +#include "archive.h" +#include "archive_entry.h" + +static constexpr size_t kMaxInputSize = 512 * 1024; + +struct Buffer { + const uint8_t *buf; + size_t len; +}; + +static ssize_t reader_callback(struct archive *a, void *client_data, + const void **block) { + (void)a; + Buffer *buffer = reinterpret_cast(client_data); + *block = buffer->buf; + ssize_t len = buffer->len; + buffer->len = 0; + return len; +} + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *buf, size_t len) { + if (len == 0 || len > kMaxInputSize) { + return 0; + } + + struct archive *a = archive_read_new(); + if (a == NULL) { + return 0; + } + + archive_read_support_format_tar(a); + archive_read_support_format_gnutar(a); + archive_read_support_filter_all(a); + + // Enable various TAR options + archive_read_set_options(a, "tar:read_concatenated_archives,tar:mac-ext"); + + Buffer buffer = {buf, len}; + if (archive_read_open(a, &buffer, NULL, reader_callback, NULL) != ARCHIVE_OK) { + archive_read_free(a); + return 0; + } + + std::vector data_buffer(4096, 0); + struct archive_entry *entry; + + while (archive_read_next_header(a, &entry) == ARCHIVE_OK) { + // Exercise all metadata accessors + archive_entry_pathname(entry); + archive_entry_pathname_w(entry); + archive_entry_size(entry); + archive_entry_mtime(entry); + archive_entry_atime(entry); + archive_entry_ctime(entry); + archive_entry_mode(entry); + archive_entry_uid(entry); + archive_entry_gid(entry); + archive_entry_uname(entry); + archive_entry_gname(entry); + archive_entry_symlink(entry); + archive_entry_hardlink(entry); + archive_entry_rdev(entry); + archive_entry_devmajor(entry); + archive_entry_devminor(entry); + + // Test sparse file handling + archive_entry_sparse_reset(entry); + int64_t offset, length; + while (archive_entry_sparse_next(entry, &offset, &length) == ARCHIVE_OK) { + (void)offset; + (void)length; + } + + // Test xattr handling + archive_entry_xattr_reset(entry); + const char *name; + const void *value; + size_t size; + while (archive_entry_xattr_next(entry, &name, &value, &size) == ARCHIVE_OK) { + (void)name; + (void)value; + (void)size; + } + + ssize_t r; + while ((r = archive_read_data(a, data_buffer.data(), data_buffer.size())) > 0) + ; + } + + archive_read_free(a); + return 0; +} diff --git a/contrib/oss-fuzz/libarchive_tar_fuzzer.dict b/contrib/oss-fuzz/libarchive_tar_fuzzer.dict new file mode 100644 index 000000000..954d54b59 --- /dev/null +++ b/contrib/oss-fuzz/libarchive_tar_fuzzer.dict @@ -0,0 +1,51 @@ +# TAR format dictionary + +# USTAR magic +"ustar" +"ustar\x00" +"ustar \x00" + +# GNU tar magic +"GNUtar " +"GNUtar\x00" + +# Common header field values +"00000000000" +"0000644" +"0000755" +"0000777" + +# Type flags +"0" +"1" +"2" +"3" +"4" +"5" +"6" +"7" +"g" +"x" +"L" +"K" + +# PAX keywords +"path=" +"linkpath=" +"uname=" +"gname=" +"uid=" +"gid=" +"size=" +"mtime=" +"atime=" +"ctime=" +"SCHILY.xattr." +"LIBARCHIVE.xattr." + +# Sparse headers +"GNU.sparse.major=" +"GNU.sparse.minor=" +"GNU.sparse.name=" +"GNU.sparse.realsize=" +"GNU.sparse.map=" diff --git a/contrib/oss-fuzz/libarchive_warc_fuzzer.cc b/contrib/oss-fuzz/libarchive_warc_fuzzer.cc new file mode 100644 index 000000000..f046bab48 --- /dev/null +++ b/contrib/oss-fuzz/libarchive_warc_fuzzer.cc @@ -0,0 +1,62 @@ +/* + * WARC (Web Archive) format fuzzer for libarchive + */ +#include +#include +#include + +#include "archive.h" +#include "archive_entry.h" + +static constexpr size_t kMaxInputSize = 512 * 1024; + +struct Buffer { + const uint8_t *buf; + size_t len; +}; + +static ssize_t reader_callback(struct archive *a, void *client_data, + const void **block) { + (void)a; + Buffer *buffer = reinterpret_cast(client_data); + *block = buffer->buf; + ssize_t len = buffer->len; + buffer->len = 0; + return len; +} + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *buf, size_t len) { + if (len == 0 || len > kMaxInputSize) { + return 0; + } + + struct archive *a = archive_read_new(); + if (a == NULL) { + return 0; + } + + archive_read_support_format_warc(a); + archive_read_support_filter_all(a); + + Buffer buffer = {buf, len}; + if (archive_read_open(a, &buffer, NULL, reader_callback, NULL) != ARCHIVE_OK) { + archive_read_free(a); + return 0; + } + + std::vector data_buffer(4096, 0); + struct archive_entry *entry; + + while (archive_read_next_header(a, &entry) == ARCHIVE_OK) { + archive_entry_pathname(entry); + archive_entry_size(entry); + archive_entry_mtime(entry); + + ssize_t r; + while ((r = archive_read_data(a, data_buffer.data(), data_buffer.size())) > 0) + ; + } + + archive_read_free(a); + return 0; +} diff --git a/contrib/oss-fuzz/libarchive_warc_fuzzer.dict b/contrib/oss-fuzz/libarchive_warc_fuzzer.dict new file mode 100644 index 000000000..ca1d08adf --- /dev/null +++ b/contrib/oss-fuzz/libarchive_warc_fuzzer.dict @@ -0,0 +1,34 @@ +# WARC format dictionary + +# Version +"WARC/1.0" +"WARC/1.1" +"WARC/0.17" +"WARC/0.18" + +# Record types +"warcinfo" +"response" +"resource" +"request" +"metadata" +"revisit" +"conversion" +"continuation" + +# Headers +"WARC-Type:" +"WARC-Record-ID:" +"WARC-Date:" +"WARC-Target-URI:" +"Content-Length:" +"Content-Type:" +"WARC-Block-Digest:" +"WARC-Payload-Digest:" +"WARC-Concurrent-To:" +"WARC-Refers-To:" + +# Content types +"application/warc-fields" +"application/http;msgtype=request" +"application/http;msgtype=response" diff --git a/contrib/oss-fuzz/libarchive_write_disk_fuzzer.cc b/contrib/oss-fuzz/libarchive_write_disk_fuzzer.cc new file mode 100644 index 000000000..1d54c7510 --- /dev/null +++ b/contrib/oss-fuzz/libarchive_write_disk_fuzzer.cc @@ -0,0 +1,187 @@ +/* + * Archive write disk fuzzer for libarchive + * Tests extraction to filesystem + * Security-critical: path traversal, permission handling, symlink attacks + */ +#include +#include +#include +#include +#include +#include +#include + +#include "archive.h" +#include "archive_entry.h" + +static constexpr size_t kMaxInputSize = 64 * 1024; + +static char g_temp_dir[256] = {0}; + +class DataConsumer { +public: + DataConsumer(const uint8_t *data, size_t size) : data_(data), size_(size), pos_(0) { + memset(string_buf_, 0, sizeof(string_buf_)); + } + + bool empty() const { return pos_ >= size_; } + + uint8_t consume_byte() { + if (pos_ >= size_) return 0; + return data_[pos_++]; + } + + uint32_t consume_uint32() { + uint32_t val = 0; + for (int i = 0; i < 4 && pos_ < size_; i++) { + val |= static_cast(data_[pos_++]) << (i * 8); + } + return val; + } + + int64_t consume_int64() { + int64_t val = 0; + for (int i = 0; i < 8 && pos_ < size_; i++) { + val |= static_cast(data_[pos_++]) << (i * 8); + } + return val; + } + + const char* consume_path(size_t max_len) { + if (max_len > sizeof(string_buf_) - 1) max_len = sizeof(string_buf_) - 1; + size_t avail = size_ - pos_; + size_t len = (avail < max_len) ? avail : max_len; + + size_t actual_len = 0; + while (actual_len < len && pos_ < size_) { + char c = static_cast(data_[pos_++]); + if (c == '\0') break; + string_buf_[actual_len++] = c; + } + string_buf_[actual_len] = '\0'; + return string_buf_; + } + + const uint8_t* consume_bytes(size_t *out_len, size_t max_len) { + size_t avail = size_ - pos_; + size_t len = (avail < max_len) ? avail : max_len; + const uint8_t *ptr = data_ + pos_; + pos_ += len; + *out_len = len; + return ptr; + } + + size_t remaining() const { return size_ - pos_; } + +private: + const uint8_t *data_; + size_t size_; + size_t pos_; + char string_buf_[256]; +}; + +extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv) { + (void)argc; + (void)argv; + // Create a temporary directory for extraction + snprintf(g_temp_dir, sizeof(g_temp_dir), "/tmp/fuzz_extract_XXXXXX"); + if (mkdtemp(g_temp_dir) == NULL) { + g_temp_dir[0] = '\0'; + } + return 0; +} + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *buf, size_t len) { + if (len == 0 || len > kMaxInputSize) { + return 0; + } + + if (g_temp_dir[0] == '\0') { + return 0; + } + + DataConsumer consumer(buf, len); + + struct archive *disk = archive_write_disk_new(); + if (disk == NULL) { + return 0; + } + + // Configure write disk options + uint8_t opt_flags = consumer.consume_byte(); + int flags = 0; + if (opt_flags & 0x01) flags |= ARCHIVE_EXTRACT_TIME; + if (opt_flags & 0x02) flags |= ARCHIVE_EXTRACT_PERM; + if (opt_flags & 0x04) flags |= ARCHIVE_EXTRACT_ACL; + if (opt_flags & 0x08) flags |= ARCHIVE_EXTRACT_FFLAGS; + if (opt_flags & 0x10) flags |= ARCHIVE_EXTRACT_OWNER; + if (opt_flags & 0x20) flags |= ARCHIVE_EXTRACT_XATTR; + if (opt_flags & 0x40) flags |= ARCHIVE_EXTRACT_SECURE_SYMLINKS; + if (opt_flags & 0x80) flags |= ARCHIVE_EXTRACT_SECURE_NODOTDOT; + + archive_write_disk_set_options(disk, flags); + archive_write_disk_set_standard_lookup(disk); + + // Create entries to extract + int entry_count = 0; + while (!consumer.empty() && entry_count < 5 && consumer.remaining() > 20) { + struct archive_entry *entry = archive_entry_new(); + if (entry == NULL) break; + + // Build a safe path within our temp directory + char safe_path[512]; + const char *name = consumer.consume_path(32); + snprintf(safe_path, sizeof(safe_path), "%s/%s", g_temp_dir, name); + + // Sanitize path to prevent traversal + char *p = safe_path; + while (*p) { + if (p[0] == '.' && p[1] == '.') { + p[0] = '_'; + p[1] = '_'; + } + p++; + } + + archive_entry_set_pathname(entry, safe_path); + + uint8_t ftype = consumer.consume_byte() % 3; + mode_t mode; + switch (ftype) { + case 0: mode = S_IFREG | 0644; break; + case 1: mode = S_IFDIR | 0755; break; + default: mode = S_IFREG | 0644; break; + } + archive_entry_set_mode(entry, mode); + + archive_entry_set_uid(entry, 1000); + archive_entry_set_gid(entry, 1000); + archive_entry_set_mtime(entry, consumer.consume_int64(), 0); + + // Write the entry header + if (archive_write_header(disk, entry) == ARCHIVE_OK) { + if (S_ISREG(mode)) { + size_t data_len; + const uint8_t *data = consumer.consume_bytes(&data_len, 256); + archive_entry_set_size(entry, data_len); + if (data_len > 0) { + archive_write_data(disk, data, data_len); + } + } + archive_write_finish_entry(disk); + } + + archive_entry_free(entry); + entry_count++; + } + + archive_write_close(disk); + archive_write_free(disk); + + // Clean up extracted files + char cmd[600]; + snprintf(cmd, sizeof(cmd), "rm -rf %s/* 2>/dev/null", g_temp_dir); + (void)system(cmd); + + return 0; +} diff --git a/contrib/oss-fuzz/libarchive_write_disk_fuzzer.options b/contrib/oss-fuzz/libarchive_write_disk_fuzzer.options new file mode 100644 index 000000000..1489609db --- /dev/null +++ b/contrib/oss-fuzz/libarchive_write_disk_fuzzer.options @@ -0,0 +1,3 @@ +[libfuzzer] +max_len = 65536 +timeout = 30 diff --git a/contrib/oss-fuzz/libarchive_write_fuzzer.cc b/contrib/oss-fuzz/libarchive_write_fuzzer.cc new file mode 100644 index 000000000..8612a6e9d --- /dev/null +++ b/contrib/oss-fuzz/libarchive_write_fuzzer.cc @@ -0,0 +1,192 @@ +/* + * Archive write fuzzer for libarchive + * Tests archive creation and writing code paths + */ +#include +#include +#include +#include +#include + +#include "archive.h" +#include "archive_entry.h" + +static constexpr size_t kMaxInputSize = 64 * 1024; // 64KB + +// Simple data consumer +class DataConsumer { +public: + DataConsumer(const uint8_t *data, size_t size) : data_(data), size_(size), pos_(0) { + memset(string_buf_, 0, sizeof(string_buf_)); + } + + bool empty() const { return pos_ >= size_; } + + uint8_t consume_byte() { + if (pos_ >= size_) return 0; + return data_[pos_++]; + } + + uint32_t consume_uint32() { + uint32_t val = 0; + for (int i = 0; i < 4 && pos_ < size_; i++) { + val |= static_cast(data_[pos_++]) << (i * 8); + } + return val; + } + + int64_t consume_int64() { + int64_t val = 0; + for (int i = 0; i < 8 && pos_ < size_; i++) { + val |= static_cast(data_[pos_++]) << (i * 8); + } + return val; + } + + const char* consume_string(size_t max_len) { + if (max_len > sizeof(string_buf_) - 1) max_len = sizeof(string_buf_) - 1; + size_t avail = size_ - pos_; + size_t len = (avail < max_len) ? avail : max_len; + + size_t actual_len = 0; + while (actual_len < len && pos_ < size_) { + char c = static_cast(data_[pos_++]); + if (c == '\0') break; + string_buf_[actual_len++] = c; + } + string_buf_[actual_len] = '\0'; + return string_buf_; + } + + const uint8_t* consume_bytes(size_t *out_len, size_t max_len) { + size_t avail = size_ - pos_; + size_t len = (avail < max_len) ? avail : max_len; + const uint8_t *ptr = data_ + pos_; + pos_ += len; + *out_len = len; + return ptr; + } + + size_t remaining() const { return size_ - pos_; } + +private: + const uint8_t *data_; + size_t size_; + size_t pos_; + char string_buf_[256]; +}; + +// Memory write callback +static std::vector *g_output = nullptr; + +static ssize_t write_callback(struct archive *a, void *client_data, const void *buffer, size_t length) { + (void)a; + (void)client_data; + if (g_output && length > 0) { + const uint8_t *buf = static_cast(buffer); + g_output->insert(g_output->end(), buf, buf + length); + } + return length; +} + +static int close_callback(struct archive *a, void *client_data) { + (void)a; + (void)client_data; + return ARCHIVE_OK; +} + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *buf, size_t len) { + if (len == 0 || len > kMaxInputSize) { + return 0; + } + + DataConsumer consumer(buf, len); + std::vector output; + g_output = &output; + + struct archive *a = archive_write_new(); + if (a == NULL) { + return 0; + } + + // Select format based on input + uint8_t format_choice = consumer.consume_byte() % 8; + switch (format_choice) { + case 0: archive_write_set_format_pax_restricted(a); break; + case 1: archive_write_set_format_gnutar(a); break; + case 2: archive_write_set_format_ustar(a); break; + case 3: archive_write_set_format_cpio_newc(a); break; + case 4: archive_write_set_format_zip(a); break; + case 5: archive_write_set_format_7zip(a); break; + case 6: archive_write_set_format_xar(a); break; + default: archive_write_set_format_pax(a); break; + } + + // Select compression based on input + uint8_t filter_choice = consumer.consume_byte() % 6; + switch (filter_choice) { + case 0: archive_write_add_filter_gzip(a); break; + case 1: archive_write_add_filter_bzip2(a); break; + case 2: archive_write_add_filter_xz(a); break; + case 3: archive_write_add_filter_zstd(a); break; + case 4: archive_write_add_filter_none(a); break; + default: archive_write_add_filter_none(a); break; + } + + // Open for writing to memory + if (archive_write_open(a, NULL, NULL, write_callback, close_callback) != ARCHIVE_OK) { + archive_write_free(a); + g_output = nullptr; + return 0; + } + + // Create entries based on remaining input + int entry_count = 0; + while (!consumer.empty() && entry_count < 10 && consumer.remaining() > 20) { + struct archive_entry *entry = archive_entry_new(); + if (entry == NULL) break; + + // Set entry properties + archive_entry_set_pathname(entry, consumer.consume_string(64)); + + uint8_t ftype = consumer.consume_byte() % 4; + mode_t mode; + switch (ftype) { + case 0: mode = S_IFREG | 0644; break; + case 1: mode = S_IFDIR | 0755; break; + case 2: mode = S_IFLNK | 0777; break; + default: mode = S_IFREG | 0644; break; + } + archive_entry_set_mode(entry, mode); + + archive_entry_set_uid(entry, consumer.consume_uint32() & 0xFFFF); + archive_entry_set_gid(entry, consumer.consume_uint32() & 0xFFFF); + archive_entry_set_mtime(entry, consumer.consume_int64(), 0); + + // For regular files, write some data + if (S_ISREG(mode)) { + size_t data_len; + const uint8_t *data = consumer.consume_bytes(&data_len, 1024); + archive_entry_set_size(entry, data_len); + + if (archive_write_header(a, entry) == ARCHIVE_OK && data_len > 0) { + archive_write_data(a, data, data_len); + } + } else if (S_ISLNK(mode)) { + archive_entry_set_symlink(entry, consumer.consume_string(64)); + archive_entry_set_size(entry, 0); + archive_write_header(a, entry); + } else { + archive_entry_set_size(entry, 0); + archive_write_header(a, entry); + } + + archive_entry_free(entry); + entry_count++; + } + + archive_write_close(a); + archive_write_free(a); + g_output = nullptr; + return 0; +} diff --git a/contrib/oss-fuzz/libarchive_xar_fuzzer.cc b/contrib/oss-fuzz/libarchive_xar_fuzzer.cc new file mode 100644 index 000000000..8f787432a --- /dev/null +++ b/contrib/oss-fuzz/libarchive_xar_fuzzer.cc @@ -0,0 +1,72 @@ +/* + * XAR format specific fuzzer for libarchive + * Targets xar_read_header and XAR parsing code paths + */ +#include +#include +#include + +#include "archive.h" +#include "archive_entry.h" + +static constexpr size_t kMaxInputSize = 512 * 1024; // 512KB + +struct Buffer { + const uint8_t *buf; + size_t len; +}; + +static ssize_t reader_callback(struct archive *a, void *client_data, + const void **block) { + (void)a; + Buffer *buffer = reinterpret_cast(client_data); + *block = buffer->buf; + ssize_t len = buffer->len; + buffer->len = 0; + return len; +} + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *buf, size_t len) { + if (len == 0 || len > kMaxInputSize) { + return 0; + } + + struct archive *a = archive_read_new(); + if (a == NULL) { + return 0; + } + + // Enable XAR format specifically + archive_read_support_format_xar(a); + // Enable common filters + archive_read_support_filter_all(a); + + Buffer buffer = {buf, len}; + if (archive_read_open(a, &buffer, NULL, reader_callback, NULL) != ARCHIVE_OK) { + archive_read_free(a); + return 0; + } + + std::vector data_buffer(4096, 0); + struct archive_entry *entry; + + while (archive_read_next_header(a, &entry) == ARCHIVE_OK) { + // Exercise entry metadata access + archive_entry_pathname(entry); + archive_entry_pathname_w(entry); + archive_entry_size(entry); + archive_entry_mtime(entry); + archive_entry_mode(entry); + archive_entry_filetype(entry); + archive_entry_uid(entry); + archive_entry_gid(entry); + + // Read data + ssize_t r; + while ((r = archive_read_data(a, data_buffer.data(), data_buffer.size())) > 0) + ; + } + + archive_read_free(a); + return 0; +} diff --git a/contrib/oss-fuzz/libarchive_xar_fuzzer.dict b/contrib/oss-fuzz/libarchive_xar_fuzzer.dict new file mode 100644 index 000000000..1e5d93544 --- /dev/null +++ b/contrib/oss-fuzz/libarchive_xar_fuzzer.dict @@ -0,0 +1,44 @@ +# XAR format dictionary +# Magic bytes +"xar!" +"\x78\x61\x72\x21" + +# XML elements commonly in XAR +"" +"" +"" +"" +"" +"" +"" +"" +"" +"" +"" +"" +"" +"" +"" +"" +"" +"" +"" +"" +"" +"" +"" +"" +"" +"" + +# Compression types +"application/octet-stream" +"application/x-gzip" +"application/x-bzip2" +"application/x-lzma" + +# Checksum types +"sha1" +"md5" +"sha256" +"sha512" diff --git a/contrib/oss-fuzz/libarchive_xar_fuzzer.options b/contrib/oss-fuzz/libarchive_xar_fuzzer.options new file mode 100644 index 000000000..d2d9f0ed2 --- /dev/null +++ b/contrib/oss-fuzz/libarchive_xar_fuzzer.options @@ -0,0 +1,10 @@ +[libfuzzer] +max_len = 524288 +timeout = 60 +rss_limit_mb = 2048 + +[honggfuzz] +timeout = 60 + +[afl] +timeout = 60 diff --git a/contrib/oss-fuzz/libarchive_zip_fuzzer.cc b/contrib/oss-fuzz/libarchive_zip_fuzzer.cc new file mode 100644 index 000000000..57331f4b7 --- /dev/null +++ b/contrib/oss-fuzz/libarchive_zip_fuzzer.cc @@ -0,0 +1,80 @@ +/* + * ZIP format fuzzer for libarchive + * Tests ZIP with various compression methods and encryption + */ +#include +#include +#include + +#include "archive.h" +#include "archive_entry.h" + +static constexpr size_t kMaxInputSize = 512 * 1024; + +struct Buffer { + const uint8_t *buf; + size_t len; +}; + +static ssize_t reader_callback(struct archive *a, void *client_data, + const void **block) { + (void)a; + Buffer *buffer = reinterpret_cast(client_data); + *block = buffer->buf; + ssize_t len = buffer->len; + buffer->len = 0; + return len; +} + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *buf, size_t len) { + if (len == 0 || len > kMaxInputSize) { + return 0; + } + + struct archive *a = archive_read_new(); + if (a == NULL) { + return 0; + } + + archive_read_support_format_zip(a); + archive_read_support_filter_all(a); + + // Add passphrase for encrypted ZIPs + archive_read_add_passphrase(a, "password"); + archive_read_add_passphrase(a, "test"); + archive_read_add_passphrase(a, ""); + + // Enable ZIP options + archive_read_set_options(a, "zip:ignorecrc32"); + + Buffer buffer = {buf, len}; + if (archive_read_open(a, &buffer, NULL, reader_callback, NULL) != ARCHIVE_OK) { + archive_read_free(a); + return 0; + } + + std::vector data_buffer(4096, 0); + struct archive_entry *entry; + + while (archive_read_next_header(a, &entry) == ARCHIVE_OK) { + archive_entry_pathname(entry); + archive_entry_pathname_w(entry); + archive_entry_size(entry); + archive_entry_mtime(entry); + archive_entry_mode(entry); + archive_entry_is_encrypted(entry); + archive_entry_is_data_encrypted(entry); + archive_entry_is_metadata_encrypted(entry); + + // Check compression name + archive_format_name(a); + archive_filter_name(a, 0); + + ssize_t r; + while ((r = archive_read_data(a, data_buffer.data(), data_buffer.size())) > 0) + ; + } + + archive_read_free(a); + return 0; +} diff --git a/contrib/oss-fuzz/libarchive_zip_fuzzer.dict b/contrib/oss-fuzz/libarchive_zip_fuzzer.dict new file mode 100644 index 000000000..185c6a3fa --- /dev/null +++ b/contrib/oss-fuzz/libarchive_zip_fuzzer.dict @@ -0,0 +1,43 @@ +# ZIP format dictionary + +# Signatures +"PK\x03\x04" +"PK\x01\x02" +"PK\x05\x06" +"PK\x06\x06" +"PK\x06\x07" +"PK\x07\x08" + +# Version needed +"\x14\x00" +"\x0a\x00" +"\x2d\x00" +"\x3f\x00" + +# Compression methods +"\x00\x00" +"\x08\x00" +"\x09\x00" +"\x0c\x00" +"\x0e\x00" +"\x5f\x00" + +# General purpose flags +"\x00\x00" +"\x01\x00" +"\x08\x00" +"\x09\x00" + +# Extra field IDs +"\x01\x00" +"\x07\x00" +"\x09\x00" +"\x0a\x00" +"\x15\x00" +"\x17\x00" +"\x55\x54" +"\x75\x78" + +# Encryption +"\x01\x99" +"\x02\x99" diff --git a/contrib/oss-fuzz/oss-fuzz-build.sh b/contrib/oss-fuzz/oss-fuzz-build.sh index 83d8470b1..16850fe38 100755 --- a/contrib/oss-fuzz/oss-fuzz-build.sh +++ b/contrib/oss-fuzz/oss-fuzz-build.sh @@ -1,16 +1,131 @@ -# build the project +#!/bin/bash -eu + +# Build the project ./build/autogen.sh ./configure make -j$(nproc) all -# build seed -cp $SRC/libarchive/contrib/oss-fuzz/corpus.zip\ - $OUT/libarchive_fuzzer_seed_corpus.zip - -# build fuzzer(s) -$CXX $CXXFLAGS -Ilibarchive \ - $SRC/libarchive/contrib/oss-fuzz/libarchive_fuzzer.cc \ - -o $OUT/libarchive_fuzzer $LIB_FUZZING_ENGINE \ - .libs/libarchive.a -Wl,-Bstatic -lbz2 -llzo2 \ - -lxml2 -llzma -lz -lcrypto -llz4 -licuuc \ - -licudata -Wl,-Bdynamic +FUZZ_DIR=$SRC/libarchive/contrib/oss-fuzz +TEST_DIR=$SRC/libarchive/libarchive/test + +# Common libraries for linking +LIBS=".libs/libarchive.a -Wl,-Bstatic -lbz2 -llzo2 -lxml2 -llzma -lz -lcrypto -llz4 -licuuc -licudata -Wl,-Bdynamic" + +# Function to build a fuzzer +build_fuzzer() { + local name=$1 + local source=$2 + echo "Building fuzzer: $name" + $CXX $CXXFLAGS -Ilibarchive \ + "$source" \ + -o "$OUT/$name" $LIB_FUZZING_ENGINE $LIBS +} + +# Build all format-specific fuzzers +FUZZERS=( + "libarchive_fuzzer" + "libarchive_tar_fuzzer" + "libarchive_zip_fuzzer" + "libarchive_7zip_fuzzer" + "libarchive_rar_fuzzer" + "libarchive_rar5_fuzzer" + "libarchive_xar_fuzzer" + "libarchive_cab_fuzzer" + "libarchive_lha_fuzzer" + "libarchive_iso9660_fuzzer" + "libarchive_cpio_fuzzer" + "libarchive_warc_fuzzer" + "libarchive_mtree_fuzzer" + "libarchive_ar_fuzzer" + "libarchive_filter_fuzzer" + "libarchive_entry_fuzzer" + "libarchive_write_fuzzer" + "libarchive_linkify_fuzzer" + "libarchive_match_fuzzer" + "libarchive_encryption_fuzzer" + "libarchive_read_disk_fuzzer" + "libarchive_write_disk_fuzzer" + "libarchive_seek_fuzzer" + "libarchive_string_fuzzer" + "libarchive_roundtrip_fuzzer" +) + +for fuzzer in "${FUZZERS[@]}"; do + if [ -f "$FUZZ_DIR/${fuzzer}.cc" ]; then + build_fuzzer "$fuzzer" "$FUZZ_DIR/${fuzzer}.cc" + fi +done + +# Copy dictionaries and options +cp "$FUZZ_DIR"/*.dict "$OUT/" 2>/dev/null || true +cp "$FUZZ_DIR"/*.options "$OUT/" 2>/dev/null || true + +# Build seed corpora +echo "Building seed corpora..." + +# Main fuzzer corpus (existing) +cp "$FUZZ_DIR/corpus.zip" "$OUT/libarchive_fuzzer_seed_corpus.zip" + +# Function to create corpus from test files +create_corpus() { + local name=$1 + local pattern=$2 + local dir="/tmp/${name}_corpus" + + mkdir -p "$dir" + for f in $TEST_DIR/$pattern; do + if [ -f "$f" ]; then + base=$(basename "$f" .uu) + uudecode -o "$dir/$base" "$f" 2>/dev/null || true + fi + done + + if [ "$(ls -A $dir 2>/dev/null)" ]; then + zip -j "$OUT/${name}_seed_corpus.zip" "$dir"/* 2>/dev/null || true + echo "Created corpus for $name with $(ls $dir | wc -l) files" + fi + rm -rf "$dir" +} + +# Create format-specific corpora +create_corpus "libarchive_tar_fuzzer" "test_compat_*tar*.uu" +create_corpus "libarchive_zip_fuzzer" "test_*zip*.uu" +create_corpus "libarchive_7zip_fuzzer" "test_read_format_7zip*.uu" +create_corpus "libarchive_rar_fuzzer" "test_read_format_rar_*.uu" +create_corpus "libarchive_rar5_fuzzer" "test_read_format_rar5*.uu" +create_corpus "libarchive_xar_fuzzer" "test_read_format_xar*.uu" +create_corpus "libarchive_cab_fuzzer" "test_read_format_cab*.uu" +create_corpus "libarchive_lha_fuzzer" "test_read_format_lha*.uu" +create_corpus "libarchive_iso9660_fuzzer" "test_read_format_iso*.uu" +create_corpus "libarchive_cpio_fuzzer" "test_compat_cpio*.uu" +create_corpus "libarchive_warc_fuzzer" "test_read_format_warc*.uu" +create_corpus "libarchive_mtree_fuzzer" "test_read_format_mtree*.uu" +create_corpus "libarchive_ar_fuzzer" "test_read_format_ar*.uu" + +# Filter corpus - use compressed test files +mkdir -p /tmp/filter_corpus +for f in $TEST_DIR/*.gz.uu $TEST_DIR/*.bz2.uu $TEST_DIR/*.xz.uu $TEST_DIR/*.lz4.uu $TEST_DIR/*.zst.uu $TEST_DIR/*.Z.uu; do + if [ -f "$f" ]; then + base=$(basename "$f" .uu) + uudecode -o "/tmp/filter_corpus/$base" "$f" 2>/dev/null || true + fi +done +if [ "$(ls -A /tmp/filter_corpus 2>/dev/null)" ]; then + zip -j "$OUT/libarchive_filter_fuzzer_seed_corpus.zip" /tmp/filter_corpus/* 2>/dev/null || true +fi +rm -rf /tmp/filter_corpus + +# Encryption corpus - encrypted archives +mkdir -p /tmp/encryption_corpus +for f in $TEST_DIR/*encrypt*.uu $TEST_DIR/*password*.uu; do + if [ -f "$f" ]; then + base=$(basename "$f" .uu) + uudecode -o "/tmp/encryption_corpus/$base" "$f" 2>/dev/null || true + fi +done +if [ "$(ls -A /tmp/encryption_corpus 2>/dev/null)" ]; then + zip -j "$OUT/libarchive_encryption_fuzzer_seed_corpus.zip" /tmp/encryption_corpus/* 2>/dev/null || true +fi +rm -rf /tmp/encryption_corpus + +echo "Build complete! Built ${#FUZZERS[@]} fuzzers."