From: Joel Rosdahl Date: Thu, 4 Nov 2021 07:20:33 +0000 (+0100) Subject: feat: Improve cache entry format X-Git-Tag: v4.5~11 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=e83ac28d74dcb60af9ceaa58d92e706ded089ea3;p=thirdparty%2Fccache.git feat: Improve cache entry format Cache entry header changes: * Changed magic bytes to represent a generic “ccache entry” instead of a specific result/manifest entry. * Added a payload type field (currently manifest or result) separate from the magic bytes. This allows code that only operates on the “container” (e.g. recompression) to be ignorant of payload types and changes to payload formats. * Added a creation timestamp field. This can be useful when debugging, for instance to match the timestamp with a build start/end. * Added a field for the ccache version that created the cache entry. This can be useful for debugging, and it also makes it possible for future ccache versions to refuse using results from previous buggy ccache versions. * Added a tag field to be used by a future feature. Cache entry epilogue changes: * Use 128-bit XXH3 instead of 64-bit XXH3 for the checksum since those extra bits are computed by the algorithm anyway. Manifest/result entry changes: * Added manifest/result format version fields. This means that the payload version can be stepped without stepping the outer format version, thereby making it possible for older ccache versions to for instance recompress the cache even though the payload format has changed. Other improvements: * Added generic Reader and Writer interfaces to decouple code from using FILE* directly. * Refactored checksum handling into ChecksummingReader and ChecksummingWriter. --- diff --git a/misc/ccache.magic b/misc/ccache.magic index ea5a143d9..ad08262f7 100644 --- a/misc/ccache.magic +++ b/misc/ccache.magic @@ -1,7 +1,4 @@ -# ccache manifest -0 string cCmF ccache manifest ->4 ubyte x \b, version %d - -# ccache result -0 string cCrS ccache result ->4 ubyte x \b, version %d +0 beshort 0xCCAC ccache entry +>2 byte x \b, format version %d +>3 byte 0 \b, result +>3 byte 1 \b, manifest diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index e480cf8c5..1fb096957 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -2,8 +2,6 @@ set( source_files Args.cpp AtomicFile.cpp - CacheEntryReader.cpp - CacheEntryWriter.cpp Config.cpp Context.cpp Depfile.cpp diff --git a/src/CacheEntryReader.cpp b/src/CacheEntryReader.cpp deleted file mode 100644 index 79271c7c5..000000000 --- a/src/CacheEntryReader.cpp +++ /dev/null @@ -1,97 +0,0 @@ -// Copyright (C) 2019-2021 Joel Rosdahl and other contributors -// -// See doc/AUTHORS.adoc for a complete list of contributors. -// -// This program is free software; you can redistribute it and/or modify it -// under the terms of the GNU General Public License as published by the Free -// Software Foundation; either version 3 of the License, or (at your option) -// any later version. -// -// This program is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -// more details. -// -// You should have received a copy of the GNU General Public License along with -// this program; if not, write to the Free Software Foundation, Inc., 51 -// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - -#include "CacheEntryReader.hpp" - -#include "fmtmacros.hpp" - -#include -#include - -#include "third_party/fmt/core.h" - -CacheEntryReader::CacheEntryReader(FILE* stream, - const uint8_t* expected_magic, - uint8_t expected_version) -{ - uint8_t header_bytes[15]; - if (fread(header_bytes, sizeof(header_bytes), 1, stream) != 1) { - throw core::Error("Error reading header"); - } - - memcpy(m_magic, header_bytes, sizeof(m_magic)); - m_version = header_bytes[4]; - m_compression_type = compression::type_from_int(header_bytes[5]); - m_compression_level = header_bytes[6]; - Util::big_endian_to_int(header_bytes + 7, m_content_size); - - if (memcmp(m_magic, expected_magic, sizeof(m_magic)) != 0) { - throw core::Error("Bad magic value 0x{:02x}{:02x}{:02x}{:02x}", - m_magic[0], - m_magic[1], - m_magic[2], - m_magic[3]); - } - if (m_version != expected_version) { - throw core::Error( - "Unknown version (actual {}, expected {})", m_version, expected_version); - } - - m_checksum.update(header_bytes, sizeof(header_bytes)); - m_decompressor = - compression::Decompressor::create_from_type(m_compression_type, stream); -} - -void -CacheEntryReader::dump_header(FILE* dump_stream) -{ - PRINT(dump_stream, "Magic: {:.4}\n", m_magic); - PRINT(dump_stream, "Version: {}\n", m_version); - PRINT(dump_stream, - "Compression type: {}\n", - compression::type_to_string(m_compression_type)); - PRINT(dump_stream, "Compression level: {}\n", m_compression_level); - PRINT(dump_stream, "Content size: {}\n", m_content_size); -} - -void -CacheEntryReader::read(void* data, size_t count) -{ - m_decompressor->read(data, count); - m_checksum.update(data, count); -} - -void -CacheEntryReader::finalize() -{ - uint64_t actual_digest = m_checksum.digest(); - - uint8_t buffer[8]; - read(buffer, sizeof(buffer)); - uint64_t expected_digest; - Util::big_endian_to_int(buffer, expected_digest); - - if (actual_digest != expected_digest) { - throw core::Error( - "Incorrect checksum (actual 0x{:016x}, expected 0x{:016x})", - actual_digest, - expected_digest); - } - - m_decompressor->finalize(); -} diff --git a/src/CacheEntryReader.hpp b/src/CacheEntryReader.hpp deleted file mode 100644 index 75806b1dc..000000000 --- a/src/CacheEntryReader.hpp +++ /dev/null @@ -1,147 +0,0 @@ -// Copyright (C) 2019-2021 Joel Rosdahl and other contributors -// -// See doc/AUTHORS.adoc for a complete list of contributors. -// -// This program is free software; you can redistribute it and/or modify it -// under the terms of the GNU General Public License as published by the Free -// Software Foundation; either version 3 of the License, or (at your option) -// any later version. -// -// This program is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -// more details. -// -// You should have received a copy of the GNU General Public License along with -// this program; if not, write to the Free Software Foundation, Inc., 51 -// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - -#pragma once - -#include "Util.hpp" - -#include -#include - -#include -#include -#include - -// This class knows how to read a cache entry with a common header and a -// payload part that is different depending on the cache entry type (result or -// manifest). -class CacheEntryReader -{ -public: - // Constructor. - // - // Parameters: - // - stream: Stream to read header and payload from. - // - expected_magic: Expected file format magic (first four bytes of the - // file). - // - expected_version: Expected file format version. - CacheEntryReader(FILE* stream, - const uint8_t* expected_magic, - uint8_t expected_version); - - // Dump header information in text format. - // - // Parameters: - // - dump_stream: Stream to write to. - void dump_header(FILE* dump_stream); - - // Read data into a buffer from the payload. - // - // Parameters: - // - data: Buffer to write data to. - // - count: How many bytes to write. - // - // Throws Error on failure. - void read(void* data, size_t count); - - // Read an unsigned integer from the payload. - // - // Parameters: - // - value: Variable to write to. - // - // Throws Error on failure. - template void read(T& value); - - // Close for reading. - // - // This method potentially verifies the end state after reading the cache - // entry and throws Error if any integrity issues are found. - void finalize(); - - // Get size of the payload, - uint64_t payload_size() const; - - // Get content magic. - const uint8_t* magic() const; - - // Get content version. - uint8_t version() const; - - // Get compression type. - compression::Type compression_type() const; - - // Get compression level. - int8_t compression_level() const; - - // Get size of the content (header + payload + checksum). - uint64_t content_size() const; - -private: - std::unique_ptr m_decompressor; - util::XXH3_64 m_checksum; - uint8_t m_magic[4]; - uint8_t m_version; - compression::Type m_compression_type; - int8_t m_compression_level; - uint64_t m_content_size; -}; - -template -inline void -CacheEntryReader::read(T& value) -{ - uint8_t buffer[sizeof(T)]; - read(buffer, sizeof(T)); - Util::big_endian_to_int(buffer, value); -} - -inline const uint8_t* -CacheEntryReader::magic() const -{ - return m_magic; -} - -inline uint8_t -CacheEntryReader::version() const -{ - return m_version; -} - -inline compression::Type -CacheEntryReader::compression_type() const -{ - return m_compression_type; -} - -inline int8_t -CacheEntryReader::compression_level() const -{ - return m_compression_level; -} - -inline uint64_t -CacheEntryReader::payload_size() const -{ - return m_content_size - 15 - 8; -} - -inline uint64_t -CacheEntryReader::content_size() const -{ - return m_content_size; -} diff --git a/src/CacheEntryWriter.cpp b/src/CacheEntryWriter.cpp deleted file mode 100644 index e04312029..000000000 --- a/src/CacheEntryWriter.cpp +++ /dev/null @@ -1,61 +0,0 @@ -// Copyright (C) 2019-2021 Joel Rosdahl and other contributors -// -// See doc/AUTHORS.adoc for a complete list of contributors. -// -// This program is free software; you can redistribute it and/or modify it -// under the terms of the GNU General Public License as published by the Free -// Software Foundation; either version 3 of the License, or (at your option) -// any later version. -// -// This program is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -// more details. -// -// You should have received a copy of the GNU General Public License along with -// this program; if not, write to the Free Software Foundation, Inc., 51 -// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - -#include "CacheEntryWriter.hpp" - -#include - -CacheEntryWriter::CacheEntryWriter(FILE* stream, - const uint8_t* magic, - uint8_t version, - compression::Type compression_type, - int8_t compression_level, - uint64_t payload_size) - // clang-format off - : m_compressor(compression::Compressor::create_from_type( - compression_type, stream, compression_level)) -// clang-format on -{ - uint8_t header_bytes[15]; - memcpy(header_bytes, magic, 4); - header_bytes[4] = version; - header_bytes[5] = static_cast(compression_type); - header_bytes[6] = m_compressor->actual_compression_level(); - uint64_t content_size = 15 + payload_size + 8; - Util::int_to_big_endian(content_size, header_bytes + 7); - if (fwrite(header_bytes, sizeof(header_bytes), 1, stream) != 1) { - throw core::Error("Failed to write cache entry header"); - } - m_checksum.update(header_bytes, sizeof(header_bytes)); -} - -void -CacheEntryWriter::write(const void* data, size_t count) -{ - m_compressor->write(data, count); - m_checksum.update(data, count); -} - -void -CacheEntryWriter::finalize() -{ - uint8_t buffer[8]; - Util::int_to_big_endian(m_checksum.digest(), buffer); - m_compressor->write(buffer, sizeof(buffer)); - m_compressor->finalize(); -} diff --git a/src/CacheEntryWriter.hpp b/src/CacheEntryWriter.hpp deleted file mode 100644 index 2f3ae82b5..000000000 --- a/src/CacheEntryWriter.hpp +++ /dev/null @@ -1,87 +0,0 @@ -// Copyright (C) 2019-2021 Joel Rosdahl and other contributors -// -// See doc/AUTHORS.adoc for a complete list of contributors. -// -// This program is free software; you can redistribute it and/or modify it -// under the terms of the GNU General Public License as published by the Free -// Software Foundation; either version 3 of the License, or (at your option) -// any later version. -// -// This program is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -// more details. -// -// You should have received a copy of the GNU General Public License along with -// this program; if not, write to the Free Software Foundation, Inc., 51 -// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - -#pragma once - -#include "Util.hpp" - -#include -#include - -#include -#include -#include - -// This class knows how to write a cache entry with a common header and a -// payload part that is different depending on the cache entry type (result or -// manifest). -class CacheEntryWriter -{ -public: - // Constructor. - // - // Parameters: - // - stream: Stream to write header + payload to. - // - magic: File format magic (first four bytes of the file). - // - version: File format version. - // - compression_type: Compression type to use. - // - compression_level: Compression level to use. - // - payload_size: Payload size. - CacheEntryWriter(FILE* stream, - const uint8_t* magic, - uint8_t version, - compression::Type compression_type, - int8_t compression_level, - uint64_t payload_size); - - // Write data to the payload from a buffer. - // - // Parameters: - // - data: Data to write. - // - count: Size of data to write. - // - // Throws Error on failure. - void write(const void* data, size_t count); - - // Write an unsigned integer to the payload. - // - // Parameters: - // - value: Value to write. - // - // Throws Error on failure. - template void write(T value); - - // Close for writing. - // - // This method potentially verifies the end state after writing the cache - // entry and throws Error if any integrity issues are found. - void finalize(); - -private: - std::unique_ptr m_compressor; - util::XXH3_64 m_checksum; -}; - -template -inline void -CacheEntryWriter::write(T value) -{ - uint8_t buffer[sizeof(T)]; - Util::int_to_big_endian(value, buffer); - write(buffer, sizeof(T)); -} diff --git a/src/Manifest.cpp b/src/Manifest.cpp index a9ab17b18..09d7683a0 100644 --- a/src/Manifest.cpp +++ b/src/Manifest.cpp @@ -19,8 +19,6 @@ #include "Manifest.hpp" #include "AtomicFile.hpp" -#include "CacheEntryReader.hpp" -#include "CacheEntryWriter.hpp" #include "Config.hpp" #include "Context.hpp" #include "Digest.hpp" @@ -30,6 +28,11 @@ #include "fmtmacros.hpp" #include "hashutil.hpp" +#include +#include +#include +#include +#include #include #include @@ -40,18 +43,8 @@ // // Integers are big-endian. // -// ::=
::= -// -// ::= 4 bytes ("cCrS") -// ::= uint8_t -// ::= | -// ::= 0 (uint8_t) -// ::= 1 (uint8_t) -// ::= int8_t -// ::= uint64_t ; size of file if stored uncompressed -// ::= ; body is potentially -// ; compressed +// ::= +// ::= uint8_t // ::= * // ::= uint32_t // ::= @@ -67,52 +60,15 @@ // ::= int64_t ; status change time // ::= * // ::= uint32_t -// ::= * +// ::= * // ::= uint32_t // ::= uint32_t -// ::= Digest::size() bytes -// ::= -// ::= uint64_t ; XXH3 of content bytes -// -// Sketch of concrete layout: - -// 4 bytes -// 1 byte -// 1 byte -// 1 byte -// 8 bytes -// --- [potentially compressed from here] ------------------------------------- -// 4 bytes -// 2 bytes -// path_len bytes -// ... -// ---------------------------------------------------------------------------- -// 4 bytes -// 4 bytes -// Digest::size() bytes -// 8 bytes -// 8 bytes -// 8 bytes -// ... -// ---------------------------------------------------------------------------- -// 4 bytes -// 4 bytes -// 4 bytes -// ... -// Digest::size() bytes -// ... -// checksum 8 bytes -// -// -// Version history -// =============== -// -// 1: Introduced in ccache 3.0. (Files are always compressed with gzip.) -// 2: Introduced in ccache 4.0. +// ::= Digest::size() bytes using nonstd::nullopt; using nonstd::optional; +const uint8_t k_manifest_format_version = 0; const uint32_t k_max_manifest_entries = 100; const uint32_t k_max_manifest_file_info_entries = 10000; @@ -150,9 +106,9 @@ template<> struct hash operator()(const FileInfo& file_info) const { static_assert(sizeof(FileInfo) == 48, "unexpected size"); // No padding. - util::XXH3_64 checksum; - checksum.update(&file_info, sizeof(file_info)); - return checksum.digest(); + util::XXH3_64 hash; + hash.update(&file_info, sizeof(file_info)); + return hash.digest(); } }; @@ -307,49 +263,49 @@ read_manifest(const std::string& path, FILE* dump_stream = nullptr) file_stream = file.get(); } - CacheEntryReader reader(file_stream, Manifest::k_magic, Manifest::k_version); + core::FileReader file_reader(file_stream); + core::CacheEntryReader reader(file_reader); if (dump_stream) { - reader.dump_header(dump_stream); + reader.header().dump(dump_stream); } - auto mf = std::make_unique(); + const auto format_ver = reader.read_int(); + if (format_ver != k_manifest_format_version) { + throw core::Error("Unknown manifest format version: {}", format_ver); + } - uint32_t entry_count; - reader.read(entry_count); - for (uint32_t i = 0; i < entry_count; ++i) { - mf->files.emplace_back(); - auto& entry = mf->files.back(); + if (dump_stream) { + PRINT(dump_stream, "Manifest format version: {}\n", format_ver); + } + + auto mf = std::make_unique(); - uint16_t length; - reader.read(length); - entry.assign(length, 0); - reader.read(&entry[0], length); + const auto file_count = reader.read_int(); + for (uint32_t i = 0; i < file_count; ++i) { + mf->files.push_back(reader.read_str(reader.read_int())); } - reader.read(entry_count); - for (uint32_t i = 0; i < entry_count; ++i) { + const auto file_info_count = reader.read_int(); + for (uint32_t i = 0; i < file_info_count; ++i) { mf->file_infos.emplace_back(); auto& entry = mf->file_infos.back(); - reader.read(entry.index); + reader.read_int(entry.index); reader.read(entry.digest.bytes(), Digest::size()); - reader.read(entry.fsize); - reader.read(entry.mtime); - reader.read(entry.ctime); + reader.read_int(entry.fsize); + reader.read_int(entry.mtime); + reader.read_int(entry.ctime); } - reader.read(entry_count); - for (uint32_t i = 0; i < entry_count; ++i) { + const auto result_count = reader.read_int(); + for (uint32_t i = 0; i < result_count; ++i) { mf->results.emplace_back(); auto& entry = mf->results.back(); - uint32_t file_info_count; - reader.read(file_info_count); - for (uint32_t j = 0; j < file_info_count; ++j) { - uint32_t file_info_index; - reader.read(file_info_index); - entry.file_info_indexes.push_back(file_info_index); + const auto file_info_index_count = reader.read_int(); + for (uint32_t j = 0; j < file_info_index_count; ++j) { + entry.file_info_indexes.push_back(reader.read_int()); } reader.read(entry.key.bytes(), Digest::size()); } @@ -364,6 +320,7 @@ write_manifest(const Config& config, const ManifestData& mf) { uint64_t payload_size = 0; + payload_size += 1; // format_ver payload_size += 4; // n_files for (const auto& file : mf.files) { payload_size += 2 + file.length(); @@ -378,32 +335,37 @@ write_manifest(const Config& config, } AtomicFile atomic_manifest_file(path, AtomicFile::Mode::binary); - CacheEntryWriter writer(atomic_manifest_file.stream(), - Manifest::k_magic, - Manifest::k_version, - compression::type_from_config(config), - compression::level_from_config(config), - payload_size); - writer.write(mf.files.size()); + core::FileWriter file_writer(atomic_manifest_file.stream()); + core::CacheEntryHeader header(core::CacheEntryType::manifest, + compression::type_from_config(config), + compression::level_from_config(config), + time(nullptr), + CCACHE_VERSION, + ""); + header.set_entry_size_from_payload_size(payload_size); + + core::CacheEntryWriter writer(file_writer, header); + writer.write_int(k_manifest_format_version); + writer.write_int(mf.files.size()); for (const auto& file : mf.files) { - writer.write(file.length()); - writer.write(file.data(), file.length()); + writer.write_int(file.length()); + writer.write_str(file); } - writer.write(mf.file_infos.size()); + writer.write_int(mf.file_infos.size()); for (const auto& file_info : mf.file_infos) { - writer.write(file_info.index); + writer.write_int(file_info.index); writer.write(file_info.digest.bytes(), Digest::size()); - writer.write(file_info.fsize); - writer.write(file_info.mtime); - writer.write(file_info.ctime); + writer.write_int(file_info.fsize); + writer.write_int(file_info.mtime); + writer.write_int(file_info.ctime); } - writer.write(mf.results.size()); + writer.write_int(mf.results.size()); for (const auto& result : mf.results) { - writer.write(result.file_info_indexes.size()); + writer.write_int(result.file_info_indexes.size()); for (auto index : result.file_info_indexes) { - writer.write(index); + writer.write_int(index); } writer.write(result.key.bytes(), Digest::size()); } diff --git a/src/Result.cpp b/src/Result.cpp index 202ce820c..e79334f34 100644 --- a/src/Result.cpp +++ b/src/Result.cpp @@ -19,8 +19,6 @@ #include "Result.hpp" #include "AtomicFile.hpp" -#include "CacheEntryReader.hpp" -#include "CacheEntryWriter.hpp" #include "Config.hpp" #include "Context.hpp" #include "Fd.hpp" @@ -30,6 +28,11 @@ #include "Util.hpp" #include "fmtmacros.hpp" +#include +#include +#include +#include +#include #include #include #include @@ -50,17 +53,8 @@ // // Integers are big-endian. // -// ::=
-//
::= -// -// ::= 4 bytes ("cCrS") -// ::= uint8_t -// ::= | -// ::= 0 (uint8_t) -// ::= 1 (uint8_t) -// ::= int8_t -// ::= uint64_t ; size of file if stored uncompressed -// ::= * ; potentially compressed +// ::= * +// ::= uint8_t // ::= uint8_t // ::= | // ::= @@ -74,32 +68,6 @@ // ::= uint64_t // ::= // ::= uint64_t ; XXH3 of content bytes -// -// Sketch of concrete layout: -// -// 4 bytes -// 1 byte -// 1 byte -// 1 byte -// 8 bytes -// --- [potentially compressed from here] ------------------------------------- -// 1 byte -// 1 byte -// 1 byte -// 8 bytes -// data_len bytes -// ... -// 1 byte -// 1 byte -// key_len bytes -// ... -// checksum 8 bytes -// -// -// Version history -// =============== -// -// 1: Introduced in ccache 4.0. using nonstd::nullopt; using nonstd::optional; @@ -107,6 +75,8 @@ using nonstd::string_view; namespace { +const uint8_t k_result_format_version = 0; + // File data stored inside the result file. const uint8_t k_embedded_file_marker = 0; @@ -252,12 +222,18 @@ Reader::read_result(Consumer& consumer) file_stream = file.get(); } - CacheEntryReader cache_entry_reader(file_stream, k_magic, k_version); + core::FileReader file_reader(file_stream); + core::CacheEntryReader cache_entry_reader(file_reader); + + const auto result_format_version = cache_entry_reader.read_int(); + if (result_format_version != k_result_format_version) { + throw core::Error("Unknown result format version: {}", + result_format_version); + } - consumer.on_header(cache_entry_reader); + consumer.on_header(cache_entry_reader, result_format_version); - uint8_t n_entries; - cache_entry_reader.read(n_entries); + const auto n_entries = cache_entry_reader.read_int(); uint32_t i; for (i = 0; i < n_entries; ++i) { @@ -273,12 +249,11 @@ Reader::read_result(Consumer& consumer) } void -Reader::read_entry(CacheEntryReader& cache_entry_reader, +Reader::read_entry(core::CacheEntryReader& cache_entry_reader, uint32_t entry_number, Reader::Consumer& consumer) { - uint8_t marker; - cache_entry_reader.read(marker); + const auto marker = cache_entry_reader.read_int(); switch (marker) { case k_embedded_file_marker: @@ -289,12 +264,9 @@ Reader::read_entry(CacheEntryReader& cache_entry_reader, throw core::Error("Unknown entry type: {}", marker); } - UnderlyingFileTypeInt type; - cache_entry_reader.read(type); - FileType file_type = FileType(type); - - uint64_t file_len; - cache_entry_reader.read(file_len); + const auto type = cache_entry_reader.read_int(); + const auto file_type = FileType(type); + const auto file_len = cache_entry_reader.read_int(); if (marker == k_embedded_file_marker) { consumer.on_entry_start(entry_number, file_type, file_len, nullopt); @@ -365,14 +337,19 @@ Writer::do_finalize() } AtomicFile atomic_result_file(m_result_path, AtomicFile::Mode::binary); - CacheEntryWriter writer(atomic_result_file.stream(), - k_magic, - k_version, - compression::type_from_config(m_ctx.config), - compression::level_from_config(m_ctx.config), - payload_size); + core::CacheEntryHeader header(core::CacheEntryType::result, + compression::type_from_config(m_ctx.config), + compression::level_from_config(m_ctx.config), + time(nullptr), + CCACHE_VERSION, + ""); + header.set_entry_size_from_payload_size(payload_size); + + core::FileWriter file_writer(atomic_result_file.stream()); + core::CacheEntryWriter writer(file_writer, header); - writer.write(m_entries_to_write.size()); + writer.write_int(k_result_format_version); + writer.write_int(m_entries_to_write.size()); uint32_t entry_number = 0; for (const auto& pair : m_entries_to_write) { @@ -390,10 +367,10 @@ Writer::do_finalize() file_size, path); - writer.write(store_raw ? k_raw_file_marker - : k_embedded_file_marker); - writer.write(UnderlyingFileTypeInt(file_type)); - writer.write(file_size); + writer.write_int(store_raw ? k_raw_file_marker + : k_embedded_file_marker); + writer.write_int(UnderlyingFileTypeInt(file_type)); + writer.write_int(file_size); if (store_raw) { file_size_and_count_diff += write_raw_file_entry(path, entry_number); @@ -411,7 +388,7 @@ Writer::do_finalize() } void -Result::Writer::write_embedded_file_entry(CacheEntryWriter& writer, +Result::Writer::write_embedded_file_entry(core::CacheEntryWriter& writer, const std::string& path, uint64_t file_size) { diff --git a/src/Result.hpp b/src/Result.hpp index 994322dd4..f36cbb6cd 100644 --- a/src/Result.hpp +++ b/src/Result.hpp @@ -26,8 +26,13 @@ #include #include +namespace core { + class CacheEntryReader; class CacheEntryWriter; + +} // namespace core + class Context; namespace Result { @@ -98,7 +103,8 @@ public: public: virtual ~Consumer() = default; - virtual void on_header(CacheEntryReader& cache_entry_reader) = 0; + virtual void on_header(core::CacheEntryReader& cache_entry_reader, + uint8_t result_format_version) = 0; virtual void on_entry_start(uint32_t entry_number, FileType file_type, uint64_t file_len, @@ -114,7 +120,7 @@ private: const std::string m_result_path; bool read_result(Consumer& consumer); - void read_entry(CacheEntryReader& cache_entry_reader, + void read_entry(core::CacheEntryReader& cache_entry_reader, uint32_t entry_number, Reader::Consumer& consumer); }; @@ -137,7 +143,7 @@ private: std::vector> m_entries_to_write; FileSizeAndCountDiff do_finalize(); - static void write_embedded_file_entry(CacheEntryWriter& writer, + static void write_embedded_file_entry(core::CacheEntryWriter& writer, const std::string& path, uint64_t file_size); FileSizeAndCountDiff write_raw_file_entry(const std::string& path, diff --git a/src/ResultDumper.cpp b/src/ResultDumper.cpp index 1e3bac40c..220ca2228 100644 --- a/src/ResultDumper.cpp +++ b/src/ResultDumper.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Joel Rosdahl and other contributors +// Copyright (C) 2020-2021 Joel Rosdahl and other contributors // // See doc/AUTHORS.adoc for a complete list of contributors. // @@ -18,11 +18,12 @@ #include "ResultDumper.hpp" -#include "CacheEntryReader.hpp" #include "Context.hpp" #include "Logging.hpp" #include "fmtmacros.hpp" +#include + using nonstd::optional; ResultDumper::ResultDumper(FILE* stream) : m_stream(stream) @@ -30,9 +31,11 @@ ResultDumper::ResultDumper(FILE* stream) : m_stream(stream) } void -ResultDumper::on_header(CacheEntryReader& cache_entry_reader) +ResultDumper::on_header(core::CacheEntryReader& cache_entry_reader, + const uint8_t result_format_version) { - cache_entry_reader.dump_header(m_stream); + cache_entry_reader.header().dump(m_stream); + PRINT(m_stream, "Result format version: {}\n", result_format_version); } void diff --git a/src/ResultDumper.hpp b/src/ResultDumper.hpp index c48d2ab59..0274efaf4 100644 --- a/src/ResultDumper.hpp +++ b/src/ResultDumper.hpp @@ -29,7 +29,8 @@ class ResultDumper : public Result::Reader::Consumer public: ResultDumper(FILE* stream); - void on_header(CacheEntryReader& cache_entry_reader) override; + void on_header(core::CacheEntryReader& cache_entry_reader, + uint8_t result_format_version) override; void on_entry_start(uint32_t entry_number, Result::FileType file_type, uint64_t file_len, diff --git a/src/ResultExtractor.cpp b/src/ResultExtractor.cpp index 077f32442..5dcef476e 100644 --- a/src/ResultExtractor.cpp +++ b/src/ResultExtractor.cpp @@ -34,7 +34,8 @@ ResultExtractor::ResultExtractor(const std::string& directory) } void -ResultExtractor::on_header(CacheEntryReader& /*cache_entry_reader*/) +ResultExtractor::on_header(core::CacheEntryReader& /*cache_entry_reader*/, + const uint8_t /*result_format_version*/) { } diff --git a/src/ResultExtractor.hpp b/src/ResultExtractor.hpp index 0e801ff4a..4ee2a3a9b 100644 --- a/src/ResultExtractor.hpp +++ b/src/ResultExtractor.hpp @@ -29,7 +29,8 @@ class ResultExtractor : public Result::Reader::Consumer public: ResultExtractor(const std::string& directory); - void on_header(CacheEntryReader& cache_entry_reader) override; + void on_header(core::CacheEntryReader& cache_entry_reader, + uint8_t result_format_version) override; void on_entry_start(uint32_t entry_number, Result::FileType file_type, uint64_t file_len, diff --git a/src/ResultRetriever.cpp b/src/ResultRetriever.cpp index 06dab8686..2e0bf4740 100644 --- a/src/ResultRetriever.cpp +++ b/src/ResultRetriever.cpp @@ -42,7 +42,8 @@ ResultRetriever::ResultRetriever(Context& ctx, bool rewrite_dependency_target) } void -ResultRetriever::on_header(CacheEntryReader& /*cache_entry_reader*/) +ResultRetriever::on_header(core::CacheEntryReader& /*cache_entry_reader*/, + const uint8_t /*result_format_version*/) { } diff --git a/src/ResultRetriever.hpp b/src/ResultRetriever.hpp index 4794afdef..3924965ab 100644 --- a/src/ResultRetriever.hpp +++ b/src/ResultRetriever.hpp @@ -29,7 +29,8 @@ class ResultRetriever : public Result::Reader::Consumer public: ResultRetriever(Context& ctx, bool rewrite_dependency_target); - void on_header(CacheEntryReader& cache_entry_reader) override; + void on_header(core::CacheEntryReader& cache_entry_reader, + uint8_t result_format_version) override; void on_entry_start(uint32_t entry_number, Result::FileType file_type, uint64_t file_len, diff --git a/src/ccache.cpp b/src/ccache.cpp index f23f50ef7..2d8197f7b 100644 --- a/src/ccache.cpp +++ b/src/ccache.cpp @@ -88,7 +88,7 @@ using nonstd::string_view; // different for the same input in a new ccache version, we can just change // this string. A typical example would be if the format of one of the files // stored in the cache changes in a backwards-incompatible way. -const char HASH_PREFIX[] = "3"; +const char HASH_PREFIX[] = "4"; namespace { diff --git a/src/compression/Compressor.cpp b/src/compression/Compressor.cpp index cd695beea..dbcc665cf 100644 --- a/src/compression/Compressor.cpp +++ b/src/compression/Compressor.cpp @@ -22,21 +22,23 @@ #include "ZstdCompressor.hpp" #include "assertions.hpp" +#include + #include namespace compression { std::unique_ptr Compressor::create_from_type(const Type type, - FILE* const stream, + core::Writer& writer, const int8_t compression_level) { switch (type) { case compression::Type::none: - return std::make_unique(stream); + return std::make_unique(writer); case compression::Type::zstd: - return std::make_unique(stream, compression_level); + return std::make_unique(writer, compression_level); } ASSERT(false); diff --git a/src/compression/Compressor.hpp b/src/compression/Compressor.hpp index 9c8e56393..deebbd997 100644 --- a/src/compression/Compressor.hpp +++ b/src/compression/Compressor.hpp @@ -19,52 +19,28 @@ #pragma once #include +#include #include -#include #include +namespace core { + +class Writer; + +} + namespace compression { -class Compressor +class Compressor : public core::Writer { public: virtual ~Compressor() = default; - // Create a compressor for the specified type. - // - // Parameters: - // - type: The type. - // - stream: The stream to write to. - // - compression_level: Desired compression level. static std::unique_ptr - create_from_type(Type type, FILE* stream, int8_t compression_level); + create_from_type(Type type, core::Writer& writer, int8_t compression_level); - // Get the actual compression level used for the compressed stream. virtual int8_t actual_compression_level() const = 0; - - // Write data from a buffer to the compressed stream. - // - // Parameters: - // - data: Data to write. - // - count: Size of data to write. - // - // Throws Error on failure. - virtual void write(const void* data, size_t count) = 0; - - // Write an unsigned integer to the compressed stream. - // - // Parameters: - // - value: Value to write. - // - // Throws Error on failure. - template void write(T value); - - // Finalize compression. - // - // This method checks that the end state of the compressed stream is correct - // and throws Error if not. - virtual void finalize() = 0; }; } // namespace compression diff --git a/src/compression/Decompressor.cpp b/src/compression/Decompressor.cpp index 3a76b1643..6bfa713df 100644 --- a/src/compression/Decompressor.cpp +++ b/src/compression/Decompressor.cpp @@ -25,14 +25,14 @@ namespace compression { std::unique_ptr -Decompressor::create_from_type(Type type, FILE* stream) +Decompressor::create_from_type(Type type, core::Reader& reader) { switch (type) { case compression::Type::none: - return std::make_unique(stream); + return std::make_unique(reader); case compression::Type::zstd: - return std::make_unique(stream); + return std::make_unique(reader); } ASSERT(false); diff --git a/src/compression/Decompressor.hpp b/src/compression/Decompressor.hpp index 8d6b173b0..2223d59e0 100644 --- a/src/compression/Decompressor.hpp +++ b/src/compression/Decompressor.hpp @@ -19,33 +19,20 @@ #pragma once #include +#include -#include #include namespace compression { -class Decompressor +class Decompressor : public core::Reader { public: virtual ~Decompressor() = default; // Create a decompressor for the specified type. - // - // Parameters: - // - type: The type. - // - stream: The stream to read from. static std::unique_ptr create_from_type(Type type, - FILE* stream); - - // Read data into a buffer from the compressed stream. - // - // Parameters: - // - data: Buffer to write decompressed data to. - // - count: How many bytes to write. - // - // Throws Error on failure. - virtual void read(void* data, size_t count) = 0; + core::Reader& reader); // Finalize decompression. // diff --git a/src/compression/NullCompressor.cpp b/src/compression/NullCompressor.cpp index 4d5ee67a5..a2cb76517 100644 --- a/src/compression/NullCompressor.cpp +++ b/src/compression/NullCompressor.cpp @@ -22,7 +22,7 @@ namespace compression { -NullCompressor::NullCompressor(FILE* const stream) : m_stream(stream) +NullCompressor::NullCompressor(core::Writer& writer) : m_writer(writer) { } @@ -35,17 +35,13 @@ NullCompressor::actual_compression_level() const void NullCompressor::write(const void* const data, const size_t count) { - if (fwrite(data, 1, count, m_stream) != count) { - throw core::Error("failed to write to uncompressed stream"); - } + m_writer.write(data, count); } void NullCompressor::finalize() { - if (fflush(m_stream) != 0) { - throw core::Error("failed to finalize uncompressed stream"); - } + m_writer.finalize(); } } // namespace compression diff --git a/src/compression/NullCompressor.hpp b/src/compression/NullCompressor.hpp index 2f26c7b03..154b568c7 100644 --- a/src/compression/NullCompressor.hpp +++ b/src/compression/NullCompressor.hpp @@ -22,24 +22,20 @@ #include -#include - namespace compression { // A compressor of an uncompressed stream. class NullCompressor : public Compressor, NonCopyable { public: - // Parameters: - // - stream: The file to write data to. - explicit NullCompressor(FILE* stream); + explicit NullCompressor(core::Writer& writer); int8_t actual_compression_level() const override; void write(const void* data, size_t count) override; void finalize() override; private: - FILE* m_stream; + core::Writer& m_writer; }; } // namespace compression diff --git a/src/compression/NullDecompressor.cpp b/src/compression/NullDecompressor.cpp index f25da6354..81693e4c2 100644 --- a/src/compression/NullDecompressor.cpp +++ b/src/compression/NullDecompressor.cpp @@ -22,23 +22,28 @@ namespace compression { -NullDecompressor::NullDecompressor(FILE* const stream) : m_stream(stream) +NullDecompressor::NullDecompressor(core::Reader& reader) : m_reader(reader) { } -void +size_t NullDecompressor::read(void* const data, const size_t count) { - if (fread(data, count, 1, m_stream) != 1) { - throw core::Error("failed to read from uncompressed stream"); - } + return m_reader.read(data, count); } void NullDecompressor::finalize() { - if (fgetc(m_stream) != EOF) { - throw core::Error("garbage data at end of uncompressed stream"); + bool eof; + try { + m_reader.read_int(); + eof = false; + } catch (core::Error&) { + eof = true; + } + if (!eof) { + throw core::Error("Garbage data at end of uncompressed stream"); } } diff --git a/src/compression/NullDecompressor.hpp b/src/compression/NullDecompressor.hpp index 722319d86..c2c8f5deb 100644 --- a/src/compression/NullDecompressor.hpp +++ b/src/compression/NullDecompressor.hpp @@ -22,23 +22,19 @@ #include -#include - namespace compression { // A decompressor of an uncompressed stream. class NullDecompressor : public Decompressor, NonCopyable { public: - // Parameters: - // - stream: The file to read data from. - explicit NullDecompressor(FILE* stream); + explicit NullDecompressor(core::Reader& reader); - void read(void* data, size_t count) override; + size_t read(void* data, size_t count) override; void finalize() override; private: - FILE* m_stream; + core::Reader& m_reader; }; } // namespace compression diff --git a/src/compression/ZstdCompressor.cpp b/src/compression/ZstdCompressor.cpp index fffefd1e8..73abe3b07 100644 --- a/src/compression/ZstdCompressor.cpp +++ b/src/compression/ZstdCompressor.cpp @@ -29,8 +29,8 @@ namespace compression { -ZstdCompressor::ZstdCompressor(FILE* const stream, int8_t compression_level) - : m_stream(stream), +ZstdCompressor::ZstdCompressor(core::Writer& writer, int8_t compression_level) + : m_writer(writer), m_zstd_stream(ZSTD_createCStream()), m_zstd_in(std::make_unique()), m_zstd_out(std::make_unique()) @@ -94,9 +94,8 @@ ZstdCompressor::write(const void* const data, const size_t count) ret = ZSTD_compressStream(m_zstd_stream, m_zstd_out.get(), m_zstd_in.get()); ASSERT(!(ZSTD_isError(ret))); const size_t compressed_bytes = m_zstd_out->pos; - if (fwrite(buffer, 1, compressed_bytes, m_stream) != compressed_bytes - || ferror(m_stream)) { - throw core::Error("failed to write to zstd output stream "); + if (compressed_bytes > 0) { + m_writer.write(buffer, compressed_bytes); } } ret = flush; @@ -107,9 +106,8 @@ ZstdCompressor::write(const void* const data, const size_t count) m_zstd_out->pos = 0; ret = ZSTD_endStream(m_zstd_stream, m_zstd_out.get()); const size_t compressed_bytes = m_zstd_out->pos; - if (fwrite(buffer, 1, compressed_bytes, m_stream) != compressed_bytes - || ferror(m_stream)) { - throw core::Error("failed to write to zstd output stream"); + if (compressed_bytes > 0) { + m_writer.write(buffer, compressed_bytes); } } } @@ -118,6 +116,7 @@ void ZstdCompressor::finalize() { write(nullptr, 0); + m_writer.finalize(); } } // namespace compression diff --git a/src/compression/ZstdCompressor.hpp b/src/compression/ZstdCompressor.hpp index 3a1c18e5e..efb6bfc08 100644 --- a/src/compression/ZstdCompressor.hpp +++ b/src/compression/ZstdCompressor.hpp @@ -35,10 +35,7 @@ namespace compression { class ZstdCompressor : public Compressor, NonCopyable { public: - // Parameters: - // - stream: The file to write data to. - // - compression_level: Desired compression level. - ZstdCompressor(FILE* stream, int8_t compression_level); + ZstdCompressor(core::Writer& writer, int8_t compression_level); ~ZstdCompressor() override; @@ -49,7 +46,7 @@ public: constexpr static uint8_t default_compression_level = 1; private: - FILE* m_stream; + core::Writer& m_writer; ZSTD_CCtx_s* m_zstd_stream; std::unique_ptr m_zstd_in; std::unique_ptr m_zstd_out; diff --git a/src/compression/ZstdDecompressor.cpp b/src/compression/ZstdDecompressor.cpp index 916a6e8fe..9ceab28ba 100644 --- a/src/compression/ZstdDecompressor.cpp +++ b/src/compression/ZstdDecompressor.cpp @@ -24,8 +24,8 @@ namespace compression { -ZstdDecompressor::ZstdDecompressor(FILE* const stream) - : m_stream(stream), +ZstdDecompressor::ZstdDecompressor(core::Reader& reader) + : m_reader(reader), m_input_size(0), m_input_consumed(0), m_zstd_stream(ZSTD_createDStream()), @@ -43,17 +43,14 @@ ZstdDecompressor::~ZstdDecompressor() ZSTD_freeDStream(m_zstd_stream); } -void +size_t ZstdDecompressor::read(void* const data, const size_t count) { size_t bytes_read = 0; while (bytes_read < count) { ASSERT(m_input_size >= m_input_consumed); if (m_input_size == m_input_consumed) { - m_input_size = fread(m_input_buffer, 1, sizeof(m_input_buffer), m_stream); - if (m_input_size == 0) { - throw core::Error("failed to read from zstd input stream"); - } + m_input_size = m_reader.read(m_input_buffer, sizeof(m_input_buffer)); m_input_consumed = 0; } @@ -67,7 +64,7 @@ ZstdDecompressor::read(void* const data, const size_t count) const size_t ret = ZSTD_decompressStream(m_zstd_stream, &m_zstd_out, &m_zstd_in); if (ZSTD_isError(ret)) { - throw core::Error("failed to read from zstd input stream"); + throw core::Error("Failed to read from zstd input stream"); } if (ret == 0) { m_reached_stream_end = true; @@ -76,13 +73,15 @@ ZstdDecompressor::read(void* const data, const size_t count) bytes_read += m_zstd_out.pos; m_input_consumed += m_zstd_in.pos; } + + return count; } void ZstdDecompressor::finalize() { if (!m_reached_stream_end) { - throw core::Error("garbage data at end of zstd input stream"); + throw core::Error("Garbage data at end of zstd input stream"); } } diff --git a/src/compression/ZstdDecompressor.hpp b/src/compression/ZstdDecompressor.hpp index aee51b6d2..0006ec407 100644 --- a/src/compression/ZstdDecompressor.hpp +++ b/src/compression/ZstdDecompressor.hpp @@ -30,17 +30,15 @@ namespace compression { class ZstdDecompressor : public Decompressor { public: - // Parameters: - // - stream: The file to read data from. - explicit ZstdDecompressor(FILE* stream); + explicit ZstdDecompressor(core::Reader& reader); ~ZstdDecompressor() override; - void read(void* data, size_t count) override; + size_t read(void* data, size_t count) override; void finalize() override; private: - FILE* m_stream; + core::Reader& m_reader; char m_input_buffer[CCACHE_READ_BUFFER_SIZE]; size_t m_input_size; size_t m_input_consumed; diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 60d42973b..060881203 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -1,5 +1,8 @@ set( sources + ${CMAKE_CURRENT_SOURCE_DIR}/CacheEntryHeader.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/CacheEntryReader.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/CacheEntryWriter.cpp ${CMAKE_CURRENT_SOURCE_DIR}/Statistics.cpp ${CMAKE_CURRENT_SOURCE_DIR}/StatisticsCounters.cpp ${CMAKE_CURRENT_SOURCE_DIR}/StatsLog.cpp diff --git a/src/core/CacheEntryHeader.cpp b/src/core/CacheEntryHeader.cpp new file mode 100644 index 000000000..e29ce801d --- /dev/null +++ b/src/core/CacheEntryHeader.cpp @@ -0,0 +1,95 @@ +// Copyright (C) 2019-2021 Joel Rosdahl and other contributors +// +// See doc/AUTHORS.adoc for a complete list of contributors. +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License as published by the Free +// Software Foundation; either version 3 of the License, or (at your option) +// any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +// more details. +// +// You should have received a copy of the GNU General Public License along with +// this program; if not, write to the Free Software Foundation, Inc., 51 +// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +#include "CacheEntryHeader.hpp" + +#include + +const size_t k_static_header_fields_size = + sizeof(core::CacheEntryHeader::magic) + + sizeof(core::CacheEntryHeader::entry_format_version) + + sizeof(core::CacheEntryHeader::entry_type) + + sizeof(core::CacheEntryHeader::compression_type) + + sizeof(core::CacheEntryHeader::compression_level) + + sizeof(core::CacheEntryHeader::creation_time) + + sizeof(core::CacheEntryHeader::entry_size) + // ccache_version length field: + + 1 + // tag length field: + + 1; + +const size_t k_static_epilogue_fields_size = + sizeof(uint64_t) + sizeof(uint64_t); + +namespace core { + +CacheEntryHeader::CacheEntryHeader(const core::CacheEntryType entry_type_, + const compression::Type compression_type_, + const int8_t compression_level_, + const uint64_t creation_time_, + const std::string& ccache_version_, + const std::string& tag_, + const uint64_t entry_size_) + : magic(k_ccache_magic), + entry_format_version(k_entry_format_version), + entry_type(entry_type_), + compression_type(compression_type_), + compression_level(compression_level_), + creation_time(creation_time_), + ccache_version(ccache_version_), + tag(tag_), + entry_size(entry_size_) +{ +} + +uint64_t +CacheEntryHeader::payload_size() const +{ + return entry_size - non_payload_size(); +} + +void +CacheEntryHeader::set_entry_size_from_payload_size(const uint64_t payload_size) +{ + entry_size = non_payload_size() + payload_size; +} + +void +CacheEntryHeader::dump(FILE* const stream) const +{ + PRINT(stream, "Magic: {:04x}\n", magic); + PRINT(stream, "Entry format version: {}\n", entry_format_version); + PRINT(stream, "Entry type: {}\n", entry_type); + PRINT(stream, + "Compression type: {}\n", + compression::type_to_string(compression_type)); + PRINT(stream, "Compression level: {}\n", compression_level); + PRINT(stream, "Creation time: {}\n", creation_time); + PRINT(stream, "Ccache version: {}\n", ccache_version); + PRINT(stream, "Tag: {}\n", tag); + PRINT(stream, "Entry size: {}\n", entry_size); +} + +size_t +CacheEntryHeader::non_payload_size() const +{ + return k_static_header_fields_size + ccache_version.length() + tag.length() + + k_static_epilogue_fields_size; +} + +} // namespace core diff --git a/src/core/CacheEntryHeader.hpp b/src/core/CacheEntryHeader.hpp new file mode 100644 index 000000000..cbbb7b43e --- /dev/null +++ b/src/core/CacheEntryHeader.hpp @@ -0,0 +1,85 @@ +// Copyright (C) 2021 Joel Rosdahl and other contributors +// +// See doc/AUTHORS.adoc for a complete list of contributors. +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License as published by the Free +// Software Foundation; either version 3 of the License, or (at your option) +// any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +// more details. +// +// You should have received a copy of the GNU General Public License along with +// this program; if not, write to the Free Software Foundation, Inc., 51 +// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +#pragma once + +#include +#include + +// Cache entry format +// ================== +// +// Integers are big-endian. +// +// ::=
+//
::= +// +// +// ::= uint16_t (0xccac) +// ::= uint8_t +// ::= | +// ::= 0 (uint8_t) +// ::= 1 (uint8_t) +// ::= | +// ::= 0 (uint8_t) +// ::= 1 (uint8_t) +// ::= int8_t +// ::= uint64_t (Unix epoch time when entry was created) +// ::= string length (uint8_t) + string data +// ::= string length (uint8_t) + string data +// ::= uint64_t ; = size of file if stored uncompressed +// ; potentially compressed from here +// ::= depends on entry_type +// ::= +// ::= uint64_t ; XXH3-128 (high bits) of entry bytes +// ::= uint64_t ; XXH3-128 (low bits) of entry bytes + +namespace core { + +const uint16_t k_ccache_magic = 0xccac; +const uint16_t k_entry_format_version = 0; + +struct CacheEntryHeader +{ + CacheEntryHeader(core::CacheEntryType entry_type, + compression::Type compression_type, + int8_t compression_level, + uint64_t creation_time, + const std::string& ccache_version, + const std::string& tag, + uint64_t entry_size = 0); + + uint16_t magic; + uint8_t entry_format_version; + core::CacheEntryType entry_type; + compression::Type compression_type; + int8_t compression_level; + uint64_t creation_time; + std::string ccache_version; + std::string tag; + uint64_t entry_size; + + uint64_t payload_size() const; + void set_entry_size_from_payload_size(uint64_t payload_size); + void dump(FILE* stream) const; + +private: + size_t non_payload_size() const; +}; + +} // namespace core diff --git a/src/core/CacheEntryReader.cpp b/src/core/CacheEntryReader.cpp new file mode 100644 index 000000000..26ae366e2 --- /dev/null +++ b/src/core/CacheEntryReader.cpp @@ -0,0 +1,108 @@ +// Copyright (C) 2019-2021 Joel Rosdahl and other contributors +// +// See doc/AUTHORS.adoc for a complete list of contributors. +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License as published by the Free +// Software Foundation; either version 3 of the License, or (at your option) +// any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +// more details. +// +// You should have received a copy of the GNU General Public License along with +// this program; if not, write to the Free Software Foundation, Inc., 51 +// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +#include "CacheEntryReader.hpp" + +#include + +namespace { + +core::CacheEntryType +cache_entry_type_from_int(const uint8_t entry_type) +{ + switch (entry_type) { + case 0: + return core::CacheEntryType::result; + break; + case 1: + return core::CacheEntryType::manifest; + break; + default: + throw core::Error("Unknown entry type: {}", entry_type); + } +} + +} // namespace + +namespace core { + +CacheEntryReader::CacheEntryReader(core::Reader& reader) + : m_checksumming_reader(reader) +{ + const auto magic = m_checksumming_reader.read_int(); + if (magic != core::k_ccache_magic) { + throw core::Error("Bad magic value: 0x{:04x}", magic); + } + + const auto entry_format_version = m_checksumming_reader.read_int(); + if (entry_format_version != core::k_entry_format_version) { + throw core::Error("Unknown entry format version: {}", entry_format_version); + } + + const auto entry_type = m_checksumming_reader.read_int(); + const auto compression_type = m_checksumming_reader.read_int(); + const auto compression_level = m_checksumming_reader.read_int(); + const auto creation_time = m_checksumming_reader.read_int(); + const auto ccache_version = + m_checksumming_reader.read_str(m_checksumming_reader.read_int()); + const auto tag = + m_checksumming_reader.read_str(m_checksumming_reader.read_int()); + const auto entry_size = m_checksumming_reader.read_int(); + + m_header = std::make_unique( + cache_entry_type_from_int(entry_type), + compression::type_from_int(compression_type), + compression_level, + creation_time, + ccache_version, + tag, + entry_size); + + m_decompressor = compression::Decompressor::create_from_type( + m_header->compression_type, reader); + m_checksumming_reader.set_reader(*m_decompressor); +} + +size_t +CacheEntryReader::read(void* const data, const size_t count) +{ + return m_checksumming_reader.read(data, count); +} + +void +CacheEntryReader::finalize() +{ + const util::XXH3_128::Digest actual = m_checksumming_reader.digest(); + util::XXH3_128::Digest expected; + m_decompressor->read(expected.bytes(), expected.size()); + + // actual == null_digest: Checksumming is not enabled now. + // expected == null_digest: Checksumming was not enabled when the entry was + // created. + const util::XXH3_128::Digest null_digest; + + if (actual != expected && actual != null_digest && expected != null_digest) { + throw core::Error("Incorrect checksum (actual {}, expected {})", + Util::format_base16(actual.bytes(), actual.size()), + Util::format_base16(expected.bytes(), expected.size())); + } + + m_decompressor->finalize(); +} + +} // namespace core diff --git a/src/core/CacheEntryReader.hpp b/src/core/CacheEntryReader.hpp new file mode 100644 index 000000000..a4e518191 --- /dev/null +++ b/src/core/CacheEntryReader.hpp @@ -0,0 +1,61 @@ +// Copyright (C) 2019-2021 Joel Rosdahl and other contributors +// +// See doc/AUTHORS.adoc for a complete list of contributors. +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License as published by the Free +// Software Foundation; either version 3 of the License, or (at your option) +// any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +// more details. +// +// You should have received a copy of the GNU General Public License along with +// this program; if not, write to the Free Software Foundation, Inc., 51 +// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +#pragma once + +#include +#include +#include +#include +#include + +namespace core { + +// This class knows how to read a cache entry with a format described in +// CacheEntryHeader. +class CacheEntryReader : public Reader +{ +public: + // Read cache entry data from `reader`. + CacheEntryReader(Reader& reader); + + size_t read(void* data, size_t count) override; + using Reader::read; + + // Close for reading. + // + // This method potentially verifies the end state after reading the cache + // entry and throws `core::Error` if any integrity issues are found. + void finalize(); + + const CacheEntryHeader& header() const; + +private: + ChecksummingReader m_checksumming_reader; + std::unique_ptr m_header; + util::XXH3_128 m_checksum; + std::unique_ptr m_decompressor; +}; + +inline const CacheEntryHeader& +CacheEntryReader::header() const +{ + return *m_header; +} + +} // namespace core diff --git a/src/core/CacheEntryWriter.cpp b/src/core/CacheEntryWriter.cpp new file mode 100644 index 000000000..b09b4c837 --- /dev/null +++ b/src/core/CacheEntryWriter.cpp @@ -0,0 +1,61 @@ +// Copyright (C) 2019-2021 Joel Rosdahl and other contributors +// +// See doc/AUTHORS.adoc for a complete list of contributors. +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License as published by the Free +// Software Foundation; either version 3 of the License, or (at your option) +// any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +// more details. +// +// You should have received a copy of the GNU General Public License along with +// this program; if not, write to the Free Software Foundation, Inc., 51 +// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +#include "CacheEntryWriter.hpp" + +#include + +namespace core { + +CacheEntryWriter::CacheEntryWriter(core::Writer& writer, + const CacheEntryHeader& header) + : m_checksumming_writer(writer), + m_compressor(compression::Compressor::create_from_type( + header.compression_type, writer, header.compression_level)) +{ + m_checksumming_writer.write_int(header.magic); + m_checksumming_writer.write_int(header.entry_format_version); + m_checksumming_writer.write_int(static_cast(header.entry_type)); + m_checksumming_writer.write_int( + static_cast(header.compression_type)); + m_checksumming_writer.write_int(header.compression_level); + m_checksumming_writer.write_int(header.creation_time); + m_checksumming_writer.write_int(header.ccache_version.length()); + m_checksumming_writer.write_str(header.ccache_version); + m_checksumming_writer.write_int(header.tag.length()); + m_checksumming_writer.write_str(header.tag); + m_checksumming_writer.write_int(header.entry_size); + + m_checksumming_writer.set_writer(*m_compressor); +} + +void +CacheEntryWriter::write(const void* const data, const size_t count) +{ + m_checksumming_writer.write(data, count); +} + +void +CacheEntryWriter::finalize() +{ + const auto digest = m_checksumming_writer.digest(); + m_compressor->write(digest.bytes(), digest.size()); + m_compressor->finalize(); +} + +} // namespace core diff --git a/src/core/CacheEntryWriter.hpp b/src/core/CacheEntryWriter.hpp new file mode 100644 index 000000000..789eb0c85 --- /dev/null +++ b/src/core/CacheEntryWriter.hpp @@ -0,0 +1,50 @@ +// Copyright (C) 2019-2021 Joel Rosdahl and other contributors +// +// See doc/AUTHORS.adoc for a complete list of contributors. +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License as published by the Free +// Software Foundation; either version 3 of the License, or (at your option) +// any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +// more details. +// +// You should have received a copy of the GNU General Public License along with +// this program; if not, write to the Free Software Foundation, Inc., 51 +// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +#pragma once + +#include +#include +#include + +namespace core { + +struct CacheEntryHeader; + +// This class knows how to write a cache entry with a format described in +// CacheEntryHeader. +class CacheEntryWriter : public Writer +{ +public: + CacheEntryWriter(Writer& writer, const CacheEntryHeader& header); + + void write(const void* data, size_t count) override; + using Writer::write; + + // Close for writing. + // + // This method potentially verifies the end state after writing the cache + // entry and throws `core::Error` if any integrity issues are found. + void finalize() override; + +private: + ChecksummingWriter m_checksumming_writer; + std::unique_ptr m_compressor; +}; + +} // namespace core diff --git a/src/core/ChecksummingReader.hpp b/src/core/ChecksummingReader.hpp new file mode 100644 index 000000000..41c569fa2 --- /dev/null +++ b/src/core/ChecksummingReader.hpp @@ -0,0 +1,68 @@ +// Copyright (C) 2021 Joel Rosdahl and other contributors +// +// See doc/AUTHORS.adoc for a complete list of contributors. +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License as published by the Free +// Software Foundation; either version 3 of the License, or (at your option) +// any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +// more details. +// +// You should have received a copy of the GNU General Public License along with +// this program; if not, write to the Free Software Foundation, Inc., 51 +// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +#pragma once + +#include +#include + +namespace core { + +class ChecksummingReader : public Reader +{ +public: + ChecksummingReader(core::Reader& reader); + + using core::Reader::read; + size_t read(void* data, size_t count) override; + + void set_reader(core::Reader& reader); + + util::XXH3_128::Digest digest() const; + +private: + core::Reader* m_reader; + util::XXH3_128 m_checksum; +}; + +inline ChecksummingReader::ChecksummingReader(core::Reader& reader) + : m_reader(&reader) +{ +} + +inline size_t +ChecksummingReader::read(void* const data, const size_t count) +{ + const auto bytes_read = m_reader->read(data, count); + m_checksum.update(data, bytes_read); + return bytes_read; +} + +inline void +ChecksummingReader::set_reader(core::Reader& reader) +{ + m_reader = &reader; +} + +inline util::XXH3_128::Digest +ChecksummingReader::digest() const +{ + return m_checksum.digest(); +} + +} // namespace core diff --git a/src/core/ChecksummingWriter.hpp b/src/core/ChecksummingWriter.hpp new file mode 100644 index 000000000..1d6b009f6 --- /dev/null +++ b/src/core/ChecksummingWriter.hpp @@ -0,0 +1,74 @@ +// Copyright (C) 2021 Joel Rosdahl and other contributors +// +// See doc/AUTHORS.adoc for a complete list of contributors. +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License as published by the Free +// Software Foundation; either version 3 of the License, or (at your option) +// any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +// more details. +// +// You should have received a copy of the GNU General Public License along with +// this program; if not, write to the Free Software Foundation, Inc., 51 +// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +#pragma once + +#include +#include + +namespace core { + +class ChecksummingWriter : public Writer +{ +public: + ChecksummingWriter(core::Writer& writer); + + using core::Writer::write; + void write(const void* data, size_t count) override; + void finalize() override; + + void set_writer(core::Writer& writer); + + util::XXH3_128::Digest digest() const; + +private: + core::Writer* m_writer; + util::XXH3_128 m_checksum; +}; + +inline ChecksummingWriter::ChecksummingWriter(core::Writer& writer) + : m_writer(&writer) +{ +} + +inline void +ChecksummingWriter::write(const void* const data, const size_t count) +{ + m_writer->write(data, count); + m_checksum.update(data, count); +} + +inline void +ChecksummingWriter::finalize() +{ + m_writer->finalize(); +} + +inline void +ChecksummingWriter::set_writer(core::Writer& writer) +{ + m_writer = &writer; +} + +inline util::XXH3_128::Digest +ChecksummingWriter::digest() const +{ + return m_checksum.digest(); +} + +} // namespace core diff --git a/src/core/FileReader.hpp b/src/core/FileReader.hpp new file mode 100644 index 000000000..c7c7b8ced --- /dev/null +++ b/src/core/FileReader.hpp @@ -0,0 +1,56 @@ +// Copyright (C) 2021 Joel Rosdahl and other contributors +// +// See doc/AUTHORS.adoc for a complete list of contributors. +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License as published by the Free +// Software Foundation; either version 3 of the License, or (at your option) +// any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +// more details. +// +// You should have received a copy of the GNU General Public License along with +// this program; if not, write to the Free Software Foundation, Inc., 51 +// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +#pragma once + +#include +#include + +#include + +namespace core { + +class FileReader : public Reader +{ +public: + FileReader(FILE* stream); + + size_t read(void* data, size_t size) override; + +private: + FILE* m_stream; +}; + +inline FileReader::FileReader(FILE* stream) : m_stream(stream) +{ +} + +inline size_t +FileReader::read(void* const data, const size_t size) +{ + if (size == 0) { + return 0; + } + const auto bytes_read = fread(data, 1, size, m_stream); + if (bytes_read == 0) { + throw core::Error("Failed to read from file stream"); + } + return bytes_read; +} + +} // namespace core diff --git a/src/core/FileWriter.hpp b/src/core/FileWriter.hpp new file mode 100644 index 000000000..f4cdc0bd4 --- /dev/null +++ b/src/core/FileWriter.hpp @@ -0,0 +1,58 @@ +// Copyright (C) 2021 Joel Rosdahl and other contributors +// +// See doc/AUTHORS.adoc for a complete list of contributors. +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License as published by the Free +// Software Foundation; either version 3 of the License, or (at your option) +// any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +// more details. +// +// You should have received a copy of the GNU General Public License along with +// this program; if not, write to the Free Software Foundation, Inc., 51 +// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +#pragma once + +#include +#include + +#include + +namespace core { + +class FileWriter : public Writer +{ +public: + FileWriter(FILE* stream); + + void write(const void* data, size_t size) override; + void finalize() override; + +private: + FILE* m_stream; +}; + +inline FileWriter::FileWriter(FILE* const stream) : m_stream(stream) +{ +} + +inline void +FileWriter::write(const void* const data, const size_t size) +{ + if (size > 0 && fwrite(data, size, 1, m_stream) != 1) { + throw core::Error("Failed to write to stream"); + } +} + +inline void +FileWriter::finalize() +{ + fflush(m_stream); +} + +} // namespace core diff --git a/src/core/Reader.hpp b/src/core/Reader.hpp new file mode 100644 index 000000000..a352b9a54 --- /dev/null +++ b/src/core/Reader.hpp @@ -0,0 +1,79 @@ +// Copyright (C) 2021 Joel Rosdahl and other contributors +// +// See doc/AUTHORS.adoc for a complete list of contributors. +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License as published by the Free +// Software Foundation; either version 3 of the License, or (at your option) +// any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +// more details. +// +// You should have received a copy of the GNU General Public License along with +// this program; if not, write to the Free Software Foundation, Inc., 51 +// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +#pragma once + +#include +#include + +#include +#include +#include + +namespace core { + +class Reader +{ +public: + virtual ~Reader() = default; + + // Read `count` bytes into `data`, returning the actual number of bytes read + // if not enough data is available. Throws `core::Error` on failure, e.g. if + // no bytes could be read. + virtual size_t read(void* data, size_t count) = 0; + + // Read an integer. Throws Error on failure. + template T read_int(); + + // Read an integer into `value`. Throws Error on failure. + template void read_int(T& value); + + // Read a string of length `length`. Throws `core::Error` on failure. + std::string read_str(size_t length); +}; + +template +inline T +Reader::read_int() +{ + uint8_t buffer[sizeof(T)]; + const auto bytes_read = read(buffer, sizeof(T)); + if (bytes_read != sizeof(T)) { + throw core::Error("Read underflow"); + } + T value; + Util::big_endian_to_int(buffer, value); + return value; +} + +template +inline void +Reader::read_int(T& value) +{ + value = read_int(); +} + +inline std::string +Reader::read_str(const size_t length) +{ + std::string value(length, 0); + read(&value[0], length); + return value; +} + +} // namespace core diff --git a/src/core/Writer.hpp b/src/core/Writer.hpp new file mode 100644 index 000000000..7474a5ee6 --- /dev/null +++ b/src/core/Writer.hpp @@ -0,0 +1,64 @@ +// Copyright (C) 2021 Joel Rosdahl and other contributors +// +// See doc/AUTHORS.adoc for a complete list of contributors. +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License as published by the Free +// Software Foundation; either version 3 of the License, or (at your option) +// any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +// more details. +// +// You should have received a copy of the GNU General Public License along with +// this program; if not, write to the Free Software Foundation, Inc., 51 +// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +#pragma once + +#include +#include + +#include +#include +#include + +namespace core { + +class Writer +{ +public: + virtual ~Writer() = default; + + // Write `count` bytes from `data`. Throws `core::Error` on failure. + virtual void write(const void* data, size_t count) = 0; + + // Write integer `value`. Throws `core::Error` on failure. + template void write_int(T value); + + // Write `value`. Throws `core::Error` on failure. + void write_str(const std::string& value); + + // Finalize writing, e.g. flush written bytes and potentially check for error + // states. Throws `core::Error` on failure. + virtual void finalize() = 0; +}; + +template +inline void +Writer::write_int(const T value) +{ + uint8_t buffer[sizeof(T)]; + Util::int_to_big_endian(value, buffer); + write(buffer, sizeof(T)); +} + +inline void +Writer::write_str(const std::string& value) +{ + write(value.data(), value.length()); +} + +} // namespace core diff --git a/src/core/mainoptions.cpp b/src/core/mainoptions.cpp index 35aa2cc86..7bb3c0bc5 100644 --- a/src/core/mainoptions.cpp +++ b/src/core/mainoptions.cpp @@ -34,7 +34,7 @@ #include #include #include -#include +#include #include #include @@ -126,7 +126,7 @@ Options for secondary storage: --trim-dir; default: atime Options for scripting or debugging: - --checksum-file PATH print the checksum (64 bit XXH3) of the file at + --checksum-file PATH print the checksum (128 bit XXH3) of the file at PATH --dump-manifest PATH dump manifest file at PATH in text format --dump-result PATH dump result file at PATH in text format @@ -369,12 +369,13 @@ process_main_options(int argc, const char* const* argv) break; case CHECKSUM_FILE: { - util::XXH3_64 checksum; + util::XXH3_128 checksum; Fd fd(arg == "-" ? STDIN_FILENO : open(arg.c_str(), O_RDONLY)); Util::read_fd(*fd, [&checksum](const void* data, size_t size) { checksum.update(data, size); }); - PRINT(stdout, "{:016x}\n", checksum.digest()); + const auto digest = checksum.digest(); + PRINT(stdout, "{}\n", Util::format_base16(digest.bytes(), digest.size())); break; } diff --git a/src/core/types.hpp b/src/core/types.hpp index d4fbf6659..4ee52fa39 100644 --- a/src/core/types.hpp +++ b/src/core/types.hpp @@ -18,8 +18,10 @@ #pragma once +#include + namespace core { -enum class CacheEntryType { result, manifest }; +enum class CacheEntryType : uint8_t { result = 0, manifest = 1 }; } // namespace core diff --git a/src/storage/Storage.cpp b/src/storage/Storage.cpp index 50f697baf..497bfcb9c 100644 --- a/src/storage/Storage.cpp +++ b/src/storage/Storage.cpp @@ -392,10 +392,10 @@ get_shard_url(const Digest& key, double highest_score = -1.0; std::string best_shard; for (const auto& shard_config : shards) { - util::XXH3_64 checksum; - checksum.update(key.bytes(), key.size()); - checksum.update(shard_config.name.data(), shard_config.name.length()); - const double score = to_half_open_unit_interval(checksum.digest()); + util::XXH3_64 hash; + hash.update(key.bytes(), key.size()); + hash.update(shard_config.name.data(), shard_config.name.length()); + const double score = to_half_open_unit_interval(hash.digest()); ASSERT(score >= 0.0 && score < 1.0); const double weighted_score = score == 0.0 ? 0.0 : shard_config.weight / -std::log(score); diff --git a/src/storage/primary/PrimaryStorage_compress.cpp b/src/storage/primary/PrimaryStorage_compress.cpp index 892a27b21..6b40567ec 100644 --- a/src/storage/primary/PrimaryStorage_compress.cpp +++ b/src/storage/primary/PrimaryStorage_compress.cpp @@ -19,8 +19,6 @@ #include "PrimaryStorage.hpp" #include -#include -#include #include #include #include @@ -29,6 +27,10 @@ #include #include #include +#include +#include +#include +#include #include #include #include @@ -124,41 +126,20 @@ open_file(const std::string& path, const char* const mode) return f; } -static std::unique_ptr -create_reader(const CacheFile& cache_file, FILE* const stream) +static std::unique_ptr +create_reader(const CacheFile& cache_file, core::Reader& reader) { if (cache_file.type() == CacheFile::Type::unknown) { throw core::Error("unknown file type for {}", cache_file.path()); } - switch (cache_file.type()) { - case CacheFile::Type::result: - return std::make_unique( - stream, Result::k_magic, Result::k_version); - - case CacheFile::Type::manifest: - return std::make_unique( - stream, Manifest::k_magic, Manifest::k_version); - - case CacheFile::Type::unknown: - ASSERT(false); // Handled at function entry. - } - - ASSERT(false); + return std::make_unique(reader); } -static std::unique_ptr -create_writer(FILE* const stream, - const CacheEntryReader& reader, - const compression::Type compression_type, - const int8_t compression_level) +static std::unique_ptr +create_writer(core::Writer& writer, const core::CacheEntryHeader& header) { - return std::make_unique(stream, - reader.magic(), - reader.version(), - compression_type, - compression_level, - reader.payload_size()); + return std::make_unique(writer, header); } static void @@ -168,17 +149,18 @@ recompress_file(RecompressionStatistics& statistics, const nonstd::optional level) { auto file = open_file(cache_file.path(), "rb"); - auto reader = create_reader(cache_file, file.get()); + core::FileReader file_reader(file.get()); + auto reader = create_reader(cache_file, file_reader); const auto old_stat = Stat::stat(cache_file.path(), Stat::OnError::log); - const uint64_t content_size = reader->content_size(); + const uint64_t content_size = reader->header().entry_size; const int8_t wanted_level = level ? (*level == 0 ? compression::ZstdCompressor::default_compression_level : *level) : 0; - if (reader->compression_level() == wanted_level) { + if (reader->header().compression_level == wanted_level) { statistics.update(content_size, old_stat.size(), old_stat.size(), 0); return; } @@ -187,14 +169,15 @@ recompress_file(RecompressionStatistics& statistics, cache_file.path(), level ? FMT("level {}", wanted_level) : "uncompressed"); AtomicFile atomic_new_file(cache_file.path(), AtomicFile::Mode::binary); - auto writer = - create_writer(atomic_new_file.stream(), - *reader, - level ? compression::Type::zstd : compression::Type::none, - wanted_level); + core::FileWriter file_writer(atomic_new_file.stream()); + auto header = reader->header(); + header.compression_type = + level ? compression::Type::zstd : compression::Type::none; + header.compression_level = wanted_level; + auto writer = create_writer(file_writer, header); char buffer[CCACHE_READ_BUFFER_SIZE]; - size_t bytes_left = reader->payload_size(); + size_t bytes_left = reader->header().payload_size(); while (bytes_left > 0) { size_t bytes_to_read = std::min(bytes_left, sizeof(buffer)); reader->read(buffer, bytes_to_read); @@ -237,9 +220,10 @@ PrimaryStorage::get_compression_statistics( try { auto file = open_file(cache_file.path(), "rb"); - auto reader = create_reader(cache_file, file.get()); + core::FileReader file_reader(file.get()); + auto reader = create_reader(cache_file, file_reader); cs.compr_size += cache_file.lstat().size(); - cs.content_size += reader->content_size(); + cs.content_size += reader->header().entry_size; } catch (core::Error&) { cs.incompr_size += cache_file.lstat().size(); } diff --git a/unittest/test_NullCompression.cpp b/unittest/test_NullCompression.cpp index 6f8759698..d733fa9cd 100644 --- a/unittest/test_NullCompression.cpp +++ b/unittest/test_NullCompression.cpp @@ -22,6 +22,8 @@ #include #include #include +#include +#include #include "third_party/doctest.h" @@ -38,15 +40,17 @@ TEST_CASE("compression::Type::none roundtrip") TestContext test_context; File f("data.uncompressed", "w"); + core::FileWriter fw(f.get()); auto compressor = - Compressor::create_from_type(compression::Type::none, f.get(), 1); + Compressor::create_from_type(compression::Type::none, fw, 1); CHECK(compressor->actual_compression_level() == 0); compressor->write("foobar", 6); compressor->finalize(); f.open("data.uncompressed", "r"); + core::FileReader fr(f.get()); auto decompressor = - Decompressor::create_from_type(compression::Type::none, f.get()); + Decompressor::create_from_type(compression::Type::none, fr); char buffer[4]; decompressor->read(buffer, 4); @@ -56,7 +60,7 @@ TEST_CASE("compression::Type::none roundtrip") { // Not reached the end. CHECK_THROWS_WITH(decompressor->finalize(), - "garbage data at end of uncompressed stream"); + "Garbage data at end of uncompressed stream"); } SUBCASE("Read to end") @@ -69,7 +73,7 @@ TEST_CASE("compression::Type::none roundtrip") // Nothing left to read. CHECK_THROWS_WITH(decompressor->read(buffer, 1), - "failed to read from uncompressed stream"); + "Failed to read from file stream"); } } diff --git a/unittest/test_ZstdCompression.cpp b/unittest/test_ZstdCompression.cpp index fd06dc4e8..59c169f02 100644 --- a/unittest/test_ZstdCompression.cpp +++ b/unittest/test_ZstdCompression.cpp @@ -22,6 +22,8 @@ #include #include #include +#include +#include #include "third_party/doctest.h" @@ -38,15 +40,17 @@ TEST_CASE("Small compression::Type::zstd roundtrip") TestContext test_context; File f("data.zstd", "wb"); + core::FileWriter fw(f.get()); auto compressor = - Compressor::create_from_type(compression::Type::zstd, f.get(), 1); + Compressor::create_from_type(compression::Type::zstd, fw, 1); CHECK(compressor->actual_compression_level() == 1); compressor->write("foobar", 6); compressor->finalize(); f.open("data.zstd", "rb"); + core::FileReader fr(f.get()); auto decompressor = - Decompressor::create_from_type(compression::Type::zstd, f.get()); + Decompressor::create_from_type(compression::Type::zstd, fr); char buffer[4]; decompressor->read(buffer, 4); @@ -54,7 +58,7 @@ TEST_CASE("Small compression::Type::zstd roundtrip") // Not reached the end. CHECK_THROWS_WITH(decompressor->finalize(), - "garbage data at end of zstd input stream"); + "Garbage data at end of zstd input stream"); decompressor->read(buffer, 2); CHECK(memcmp(buffer, "ar", 2) == 0); @@ -64,7 +68,7 @@ TEST_CASE("Small compression::Type::zstd roundtrip") // Nothing left to read. CHECK_THROWS_WITH(decompressor->read(buffer, 1), - "failed to read from zstd input stream"); + "Failed to read from file stream"); } TEST_CASE("Large compressible compression::Type::zstd roundtrip") @@ -74,16 +78,18 @@ TEST_CASE("Large compressible compression::Type::zstd roundtrip") char data[] = "The quick brown fox jumps over the lazy dog"; File f("data.zstd", "wb"); + core::FileWriter fw(f.get()); auto compressor = - Compressor::create_from_type(compression::Type::zstd, f.get(), 1); + Compressor::create_from_type(compression::Type::zstd, fw, 1); for (size_t i = 0; i < 1000; i++) { compressor->write(data, sizeof(data)); } compressor->finalize(); f.open("data.zstd", "rb"); + core::FileReader fr(f.get()); auto decompressor = - Decompressor::create_from_type(compression::Type::zstd, f.get()); + Decompressor::create_from_type(compression::Type::zstd, fr); char buffer[sizeof(data)]; for (size_t i = 0; i < 1000; i++) { @@ -96,7 +102,7 @@ TEST_CASE("Large compressible compression::Type::zstd roundtrip") // Nothing left to read. CHECK_THROWS_WITH(decompressor->read(buffer, 1), - "failed to read from zstd input stream"); + "Failed to read from file stream"); } TEST_CASE("Large uncompressible compression::Type::zstd roundtrip") @@ -109,14 +115,16 @@ TEST_CASE("Large uncompressible compression::Type::zstd roundtrip") } File f("data.zstd", "wb"); + core::FileWriter fw(f.get()); auto compressor = - Compressor::create_from_type(compression::Type::zstd, f.get(), 1); + Compressor::create_from_type(compression::Type::zstd, fw, 1); compressor->write(data, sizeof(data)); compressor->finalize(); f.open("data.zstd", "rb"); + core::FileReader fr(f.get()); auto decompressor = - Decompressor::create_from_type(compression::Type::zstd, f.get()); + Decompressor::create_from_type(compression::Type::zstd, fr); char buffer[sizeof(data)]; decompressor->read(buffer, sizeof(buffer));