]> git.ipfire.org Git - thirdparty/ccache.git/commitdiff
feat: Improve cache entry format
authorJoel Rosdahl <joel@rosdahl.net>
Thu, 4 Nov 2021 07:20:33 +0000 (08:20 +0100)
committerJoel Rosdahl <joel@rosdahl.net>
Sat, 6 Nov 2021 13:30:58 +0000 (14:30 +0100)
Cache entry header changes:

* Changed magic bytes to represent a generic “ccache entry” instead of a
  specific result/manifest entry.
* Added a payload type field (currently manifest or result) separate
  from the magic bytes. This allows code that only operates on the
  “container” (e.g. recompression) to be ignorant of payload types and
  changes to payload formats.
* Added a creation timestamp field. This can be useful when debugging,
  for instance to match the timestamp with a build start/end.
* Added a field for the ccache version that created the cache entry.
  This can be useful for debugging, and it also makes it possible for
  future ccache versions to refuse using results from previous buggy
  ccache versions.
* Added a tag field to be used by a future feature.

Cache entry epilogue changes:

* Use 128-bit XXH3 instead of 64-bit XXH3 for the checksum since those
  extra bits are computed by the algorithm anyway.

Manifest/result entry changes:

* Added manifest/result format version fields. This means that the
  payload version can be stepped without stepping the outer format
  version, thereby making it possible for older ccache versions to for
  instance recompress the cache even though the payload format has
  changed.

Other improvements:

* Added generic Reader and Writer interfaces to decouple code from using
  FILE* directly.
* Refactored checksum handling into ChecksummingReader and
  ChecksummingWriter.

47 files changed:
misc/ccache.magic
src/CMakeLists.txt
src/CacheEntryReader.cpp [deleted file]
src/CacheEntryReader.hpp [deleted file]
src/CacheEntryWriter.cpp [deleted file]
src/CacheEntryWriter.hpp [deleted file]
src/Manifest.cpp
src/Result.cpp
src/Result.hpp
src/ResultDumper.cpp
src/ResultDumper.hpp
src/ResultExtractor.cpp
src/ResultExtractor.hpp
src/ResultRetriever.cpp
src/ResultRetriever.hpp
src/ccache.cpp
src/compression/Compressor.cpp
src/compression/Compressor.hpp
src/compression/Decompressor.cpp
src/compression/Decompressor.hpp
src/compression/NullCompressor.cpp
src/compression/NullCompressor.hpp
src/compression/NullDecompressor.cpp
src/compression/NullDecompressor.hpp
src/compression/ZstdCompressor.cpp
src/compression/ZstdCompressor.hpp
src/compression/ZstdDecompressor.cpp
src/compression/ZstdDecompressor.hpp
src/core/CMakeLists.txt
src/core/CacheEntryHeader.cpp [new file with mode: 0644]
src/core/CacheEntryHeader.hpp [new file with mode: 0644]
src/core/CacheEntryReader.cpp [new file with mode: 0644]
src/core/CacheEntryReader.hpp [new file with mode: 0644]
src/core/CacheEntryWriter.cpp [new file with mode: 0644]
src/core/CacheEntryWriter.hpp [new file with mode: 0644]
src/core/ChecksummingReader.hpp [new file with mode: 0644]
src/core/ChecksummingWriter.hpp [new file with mode: 0644]
src/core/FileReader.hpp [new file with mode: 0644]
src/core/FileWriter.hpp [new file with mode: 0644]
src/core/Reader.hpp [new file with mode: 0644]
src/core/Writer.hpp [new file with mode: 0644]
src/core/mainoptions.cpp
src/core/types.hpp
src/storage/Storage.cpp
src/storage/primary/PrimaryStorage_compress.cpp
unittest/test_NullCompression.cpp
unittest/test_ZstdCompression.cpp

index ea5a143d947fe2094b5e1faec57b75e9d1b142ed..ad08262f772090c05b176dd9bdb522c2d85e1ac0 100644 (file)
@@ -1,7 +1,4 @@
-# ccache manifest
-0       string          cCmF            ccache manifest
->4      ubyte           x               \b, version %d
-
-# ccache result
-0       string          cCrS            ccache result
->4      ubyte           x               \b, version %d
+0       beshort         0xCCAC          ccache entry
+>2      byte            x               \b, format version %d
+>3      byte            0               \b, result
+>3      byte            1               \b, manifest
index e480cf8c57cc8a57f03fddcd715d66f4b61ff5d7..1fb096957d2f9f4e8766a68b53b5428d87b15092 100644 (file)
@@ -2,8 +2,6 @@ set(
   source_files
   Args.cpp
   AtomicFile.cpp
-  CacheEntryReader.cpp
-  CacheEntryWriter.cpp
   Config.cpp
   Context.cpp
   Depfile.cpp
diff --git a/src/CacheEntryReader.cpp b/src/CacheEntryReader.cpp
deleted file mode 100644 (file)
index 79271c7..0000000
+++ /dev/null
@@ -1,97 +0,0 @@
-// Copyright (C) 2019-2021 Joel Rosdahl and other contributors
-//
-// See doc/AUTHORS.adoc for a complete list of contributors.
-//
-// This program is free software; you can redistribute it and/or modify it
-// under the terms of the GNU General Public License as published by the Free
-// Software Foundation; either version 3 of the License, or (at your option)
-// any later version.
-//
-// This program is distributed in the hope that it will be useful, but WITHOUT
-// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
-// more details.
-//
-// You should have received a copy of the GNU General Public License along with
-// this program; if not, write to the Free Software Foundation, Inc., 51
-// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-
-#include "CacheEntryReader.hpp"
-
-#include "fmtmacros.hpp"
-
-#include <compression/Compressor.hpp>
-#include <core/exceptions.hpp>
-
-#include "third_party/fmt/core.h"
-
-CacheEntryReader::CacheEntryReader(FILE* stream,
-                                   const uint8_t* expected_magic,
-                                   uint8_t expected_version)
-{
-  uint8_t header_bytes[15];
-  if (fread(header_bytes, sizeof(header_bytes), 1, stream) != 1) {
-    throw core::Error("Error reading header");
-  }
-
-  memcpy(m_magic, header_bytes, sizeof(m_magic));
-  m_version = header_bytes[4];
-  m_compression_type = compression::type_from_int(header_bytes[5]);
-  m_compression_level = header_bytes[6];
-  Util::big_endian_to_int(header_bytes + 7, m_content_size);
-
-  if (memcmp(m_magic, expected_magic, sizeof(m_magic)) != 0) {
-    throw core::Error("Bad magic value 0x{:02x}{:02x}{:02x}{:02x}",
-                      m_magic[0],
-                      m_magic[1],
-                      m_magic[2],
-                      m_magic[3]);
-  }
-  if (m_version != expected_version) {
-    throw core::Error(
-      "Unknown version (actual {}, expected {})", m_version, expected_version);
-  }
-
-  m_checksum.update(header_bytes, sizeof(header_bytes));
-  m_decompressor =
-    compression::Decompressor::create_from_type(m_compression_type, stream);
-}
-
-void
-CacheEntryReader::dump_header(FILE* dump_stream)
-{
-  PRINT(dump_stream, "Magic: {:.4}\n", m_magic);
-  PRINT(dump_stream, "Version: {}\n", m_version);
-  PRINT(dump_stream,
-        "Compression type: {}\n",
-        compression::type_to_string(m_compression_type));
-  PRINT(dump_stream, "Compression level: {}\n", m_compression_level);
-  PRINT(dump_stream, "Content size: {}\n", m_content_size);
-}
-
-void
-CacheEntryReader::read(void* data, size_t count)
-{
-  m_decompressor->read(data, count);
-  m_checksum.update(data, count);
-}
-
-void
-CacheEntryReader::finalize()
-{
-  uint64_t actual_digest = m_checksum.digest();
-
-  uint8_t buffer[8];
-  read(buffer, sizeof(buffer));
-  uint64_t expected_digest;
-  Util::big_endian_to_int(buffer, expected_digest);
-
-  if (actual_digest != expected_digest) {
-    throw core::Error(
-      "Incorrect checksum (actual 0x{:016x}, expected 0x{:016x})",
-      actual_digest,
-      expected_digest);
-  }
-
-  m_decompressor->finalize();
-}
diff --git a/src/CacheEntryReader.hpp b/src/CacheEntryReader.hpp
deleted file mode 100644 (file)
index 75806b1..0000000
+++ /dev/null
@@ -1,147 +0,0 @@
-// Copyright (C) 2019-2021 Joel Rosdahl and other contributors
-//
-// See doc/AUTHORS.adoc for a complete list of contributors.
-//
-// This program is free software; you can redistribute it and/or modify it
-// under the terms of the GNU General Public License as published by the Free
-// Software Foundation; either version 3 of the License, or (at your option)
-// any later version.
-//
-// This program is distributed in the hope that it will be useful, but WITHOUT
-// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
-// more details.
-//
-// You should have received a copy of the GNU General Public License along with
-// this program; if not, write to the Free Software Foundation, Inc., 51
-// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-
-#pragma once
-
-#include "Util.hpp"
-
-#include <compression/Decompressor.hpp>
-#include <util/XXH3_64.hpp>
-
-#include <cstdint>
-#include <cstdio>
-#include <memory>
-
-// This class knows how to read a cache entry with a common header and a
-// payload part that is different depending on the cache entry type (result or
-// manifest).
-class CacheEntryReader
-{
-public:
-  // Constructor.
-  //
-  // Parameters:
-  // - stream: Stream to read header and payload from.
-  // - expected_magic: Expected file format magic (first four bytes of the
-  //   file).
-  // - expected_version: Expected file format version.
-  CacheEntryReader(FILE* stream,
-                   const uint8_t* expected_magic,
-                   uint8_t expected_version);
-
-  // Dump header information in text format.
-  //
-  // Parameters:
-  // - dump_stream: Stream to write to.
-  void dump_header(FILE* dump_stream);
-
-  // Read data into a buffer from the payload.
-  //
-  // Parameters:
-  // - data: Buffer to write data to.
-  // - count: How many bytes to write.
-  //
-  // Throws Error on failure.
-  void read(void* data, size_t count);
-
-  // Read an unsigned integer from the payload.
-  //
-  // Parameters:
-  // - value: Variable to write to.
-  //
-  // Throws Error on failure.
-  template<typename T> void read(T& value);
-
-  // Close for reading.
-  //
-  // This method potentially verifies the end state after reading the cache
-  // entry and throws Error if any integrity issues are found.
-  void finalize();
-
-  // Get size of the payload,
-  uint64_t payload_size() const;
-
-  // Get content magic.
-  const uint8_t* magic() const;
-
-  // Get content version.
-  uint8_t version() const;
-
-  // Get compression type.
-  compression::Type compression_type() const;
-
-  // Get compression level.
-  int8_t compression_level() const;
-
-  // Get size of the content (header + payload + checksum).
-  uint64_t content_size() const;
-
-private:
-  std::unique_ptr<compression::Decompressor> m_decompressor;
-  util::XXH3_64 m_checksum;
-  uint8_t m_magic[4];
-  uint8_t m_version;
-  compression::Type m_compression_type;
-  int8_t m_compression_level;
-  uint64_t m_content_size;
-};
-
-template<typename T>
-inline void
-CacheEntryReader::read(T& value)
-{
-  uint8_t buffer[sizeof(T)];
-  read(buffer, sizeof(T));
-  Util::big_endian_to_int(buffer, value);
-}
-
-inline const uint8_t*
-CacheEntryReader::magic() const
-{
-  return m_magic;
-}
-
-inline uint8_t
-CacheEntryReader::version() const
-{
-  return m_version;
-}
-
-inline compression::Type
-CacheEntryReader::compression_type() const
-{
-  return m_compression_type;
-}
-
-inline int8_t
-CacheEntryReader::compression_level() const
-{
-  return m_compression_level;
-}
-
-inline uint64_t
-CacheEntryReader::payload_size() const
-{
-  return m_content_size - 15 - 8;
-}
-
-inline uint64_t
-CacheEntryReader::content_size() const
-{
-  return m_content_size;
-}
diff --git a/src/CacheEntryWriter.cpp b/src/CacheEntryWriter.cpp
deleted file mode 100644 (file)
index e043120..0000000
+++ /dev/null
@@ -1,61 +0,0 @@
-// Copyright (C) 2019-2021 Joel Rosdahl and other contributors
-//
-// See doc/AUTHORS.adoc for a complete list of contributors.
-//
-// This program is free software; you can redistribute it and/or modify it
-// under the terms of the GNU General Public License as published by the Free
-// Software Foundation; either version 3 of the License, or (at your option)
-// any later version.
-//
-// This program is distributed in the hope that it will be useful, but WITHOUT
-// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
-// more details.
-//
-// You should have received a copy of the GNU General Public License along with
-// this program; if not, write to the Free Software Foundation, Inc., 51
-// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-
-#include "CacheEntryWriter.hpp"
-
-#include <core/exceptions.hpp>
-
-CacheEntryWriter::CacheEntryWriter(FILE* stream,
-                                   const uint8_t* magic,
-                                   uint8_t version,
-                                   compression::Type compression_type,
-                                   int8_t compression_level,
-                                   uint64_t payload_size)
-  // clang-format off
-  : m_compressor(compression::Compressor::create_from_type(
-                   compression_type, stream, compression_level))
-// clang-format on
-{
-  uint8_t header_bytes[15];
-  memcpy(header_bytes, magic, 4);
-  header_bytes[4] = version;
-  header_bytes[5] = static_cast<uint8_t>(compression_type);
-  header_bytes[6] = m_compressor->actual_compression_level();
-  uint64_t content_size = 15 + payload_size + 8;
-  Util::int_to_big_endian(content_size, header_bytes + 7);
-  if (fwrite(header_bytes, sizeof(header_bytes), 1, stream) != 1) {
-    throw core::Error("Failed to write cache entry header");
-  }
-  m_checksum.update(header_bytes, sizeof(header_bytes));
-}
-
-void
-CacheEntryWriter::write(const void* data, size_t count)
-{
-  m_compressor->write(data, count);
-  m_checksum.update(data, count);
-}
-
-void
-CacheEntryWriter::finalize()
-{
-  uint8_t buffer[8];
-  Util::int_to_big_endian(m_checksum.digest(), buffer);
-  m_compressor->write(buffer, sizeof(buffer));
-  m_compressor->finalize();
-}
diff --git a/src/CacheEntryWriter.hpp b/src/CacheEntryWriter.hpp
deleted file mode 100644 (file)
index 2f3ae82..0000000
+++ /dev/null
@@ -1,87 +0,0 @@
-// Copyright (C) 2019-2021 Joel Rosdahl and other contributors
-//
-// See doc/AUTHORS.adoc for a complete list of contributors.
-//
-// This program is free software; you can redistribute it and/or modify it
-// under the terms of the GNU General Public License as published by the Free
-// Software Foundation; either version 3 of the License, or (at your option)
-// any later version.
-//
-// This program is distributed in the hope that it will be useful, but WITHOUT
-// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
-// more details.
-//
-// You should have received a copy of the GNU General Public License along with
-// this program; if not, write to the Free Software Foundation, Inc., 51
-// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-
-#pragma once
-
-#include "Util.hpp"
-
-#include <compression/Compressor.hpp>
-#include <util/XXH3_64.hpp>
-
-#include <cstdint>
-#include <cstdio>
-#include <memory>
-
-// This class knows how to write a cache entry with a common header and a
-// payload part that is different depending on the cache entry type (result or
-// manifest).
-class CacheEntryWriter
-{
-public:
-  // Constructor.
-  //
-  // Parameters:
-  // - stream: Stream to write header + payload to.
-  // - magic: File format magic (first four bytes of the file).
-  // - version: File format version.
-  // - compression_type: Compression type to use.
-  // - compression_level: Compression level to use.
-  // - payload_size: Payload size.
-  CacheEntryWriter(FILE* stream,
-                   const uint8_t* magic,
-                   uint8_t version,
-                   compression::Type compression_type,
-                   int8_t compression_level,
-                   uint64_t payload_size);
-
-  // Write data to the payload from a buffer.
-  //
-  // Parameters:
-  // - data: Data to write.
-  // - count: Size of data to write.
-  //
-  // Throws Error on failure.
-  void write(const void* data, size_t count);
-
-  // Write an unsigned integer to the payload.
-  //
-  // Parameters:
-  // - value: Value to write.
-  //
-  // Throws Error on failure.
-  template<typename T> void write(T value);
-
-  // Close for writing.
-  //
-  // This method potentially verifies the end state after writing the cache
-  // entry and throws Error if any integrity issues are found.
-  void finalize();
-
-private:
-  std::unique_ptr<compression::Compressor> m_compressor;
-  util::XXH3_64 m_checksum;
-};
-
-template<typename T>
-inline void
-CacheEntryWriter::write(T value)
-{
-  uint8_t buffer[sizeof(T)];
-  Util::int_to_big_endian(value, buffer);
-  write(buffer, sizeof(T));
-}
index a9ab17b1866e7bbea7871094460feb926f2ceb48..09d7683a0b7721759abc813e95dac838d96d22be 100644 (file)
@@ -19,8 +19,6 @@
 #include "Manifest.hpp"
 
 #include "AtomicFile.hpp"
-#include "CacheEntryReader.hpp"
-#include "CacheEntryWriter.hpp"
 #include "Config.hpp"
 #include "Context.hpp"
 #include "Digest.hpp"
 #include "fmtmacros.hpp"
 #include "hashutil.hpp"
 
+#include <ccache.hpp>
+#include <core/CacheEntryReader.hpp>
+#include <core/CacheEntryWriter.hpp>
+#include <core/FileReader.hpp>
+#include <core/FileWriter.hpp>
 #include <core/exceptions.hpp>
 #include <util/XXH3_64.hpp>
 
 //
 // Integers are big-endian.
 //
-// <manifest>      ::= <header> <body> <epilogue
-// <header>        ::= <magic> <version> <compr_type> <compr_level>
-//                     <content_len>
-// <magic>         ::= 4 bytes ("cCrS")
-// <version>       ::= uint8_t
-// <compr_type>    ::= <compr_none> | <compr_zstd>
-// <compr_none>    ::= 0 (uint8_t)
-// <compr_zstd>    ::= 1 (uint8_t)
-// <compr_level>   ::= int8_t
-// <content_len>   ::= uint64_t ; size of file if stored uncompressed
-// <body>          ::= <paths> <includes> <results> ; body is potentially
-//                                                  ; compressed
+// <payload>       ::= <format_ver> <paths> <includes> <results>
+// <format_ver>    ::= uint8_t
 // <paths>         ::= <n_paths> <path_entry>*
 // <n_paths>       ::= uint32_t
 // <path_entry>    ::= <path_len> <path>
 // <ctime>         ::= int64_t ; status change time
 // <results>       ::= <n_results> <result>*
 // <n_results>     ::= uint32_t
-// <result>        ::= <n_indexes> <include_index>* <name>
+// <result>        ::= <n_indexes> <include_index>* <key>
 // <n_indexes>     ::= uint32_t
 // <include_index> ::= uint32_t
-// <name>          ::= Digest::size() bytes
-// <epilogue>      ::= <checksum>
-// <checksum>      ::= uint64_t ; XXH3 of content bytes
-//
-// Sketch of concrete layout:
-
-// <magic>         4 bytes
-// <version>       1 byte
-// <compr_type>    1 byte
-// <compr_level>   1 byte
-// <content_len>   8 bytes
-// --- [potentially compressed from here] -------------------------------------
-// <n_paths>       4 bytes
-// <path_len>      2 bytes
-// <path>          path_len bytes
-// ...
-// ----------------------------------------------------------------------------
-// <n_includes>    4 bytes
-// <path_index>    4 bytes
-// <digest>        Digest::size() bytes
-// <fsize>         8 bytes
-// <mtime>         8 bytes
-// <ctime>         8 bytes
-// ...
-// ----------------------------------------------------------------------------
-// <n_results>     4 bytes
-// <n_indexes>     4 bytes
-// <include_index> 4 bytes
-// ...
-// <name>          Digest::size() bytes
-// ...
-// checksum        8 bytes
-//
-//
-// Version history
-// ===============
-//
-// 1: Introduced in ccache 3.0. (Files are always compressed with gzip.)
-// 2: Introduced in ccache 4.0.
+// <result_key>    ::= Digest::size() bytes
 
 using nonstd::nullopt;
 using nonstd::optional;
 
+const uint8_t k_manifest_format_version = 0;
 const uint32_t k_max_manifest_entries = 100;
 const uint32_t k_max_manifest_file_info_entries = 10000;
 
@@ -150,9 +106,9 @@ template<> struct hash<FileInfo>
   operator()(const FileInfo& file_info) const
   {
     static_assert(sizeof(FileInfo) == 48, "unexpected size"); // No padding.
-    util::XXH3_64 checksum;
-    checksum.update(&file_info, sizeof(file_info));
-    return checksum.digest();
+    util::XXH3_64 hash;
+    hash.update(&file_info, sizeof(file_info));
+    return hash.digest();
   }
 };
 
@@ -307,49 +263,49 @@ read_manifest(const std::string& path, FILE* dump_stream = nullptr)
     file_stream = file.get();
   }
 
-  CacheEntryReader reader(file_stream, Manifest::k_magic, Manifest::k_version);
+  core::FileReader file_reader(file_stream);
+  core::CacheEntryReader reader(file_reader);
 
   if (dump_stream) {
-    reader.dump_header(dump_stream);
+    reader.header().dump(dump_stream);
   }
 
-  auto mf = std::make_unique<ManifestData>();
+  const auto format_ver = reader.read_int<uint8_t>();
+  if (format_ver != k_manifest_format_version) {
+    throw core::Error("Unknown manifest format version: {}", format_ver);
+  }
 
-  uint32_t entry_count;
-  reader.read(entry_count);
-  for (uint32_t i = 0; i < entry_count; ++i) {
-    mf->files.emplace_back();
-    auto& entry = mf->files.back();
+  if (dump_stream) {
+    PRINT(dump_stream, "Manifest format version: {}\n", format_ver);
+  }
+
+  auto mf = std::make_unique<ManifestData>();
 
-    uint16_t length;
-    reader.read(length);
-    entry.assign(length, 0);
-    reader.read(&entry[0], length);
+  const auto file_count = reader.read_int<uint32_t>();
+  for (uint32_t i = 0; i < file_count; ++i) {
+    mf->files.push_back(reader.read_str(reader.read_int<uint16_t>()));
   }
 
-  reader.read(entry_count);
-  for (uint32_t i = 0; i < entry_count; ++i) {
+  const auto file_info_count = reader.read_int<uint32_t>();
+  for (uint32_t i = 0; i < file_info_count; ++i) {
     mf->file_infos.emplace_back();
     auto& entry = mf->file_infos.back();
 
-    reader.read(entry.index);
+    reader.read_int(entry.index);
     reader.read(entry.digest.bytes(), Digest::size());
-    reader.read(entry.fsize);
-    reader.read(entry.mtime);
-    reader.read(entry.ctime);
+    reader.read_int(entry.fsize);
+    reader.read_int(entry.mtime);
+    reader.read_int(entry.ctime);
   }
 
-  reader.read(entry_count);
-  for (uint32_t i = 0; i < entry_count; ++i) {
+  const auto result_count = reader.read_int<uint32_t>();
+  for (uint32_t i = 0; i < result_count; ++i) {
     mf->results.emplace_back();
     auto& entry = mf->results.back();
 
-    uint32_t file_info_count;
-    reader.read(file_info_count);
-    for (uint32_t j = 0; j < file_info_count; ++j) {
-      uint32_t file_info_index;
-      reader.read(file_info_index);
-      entry.file_info_indexes.push_back(file_info_index);
+    const auto file_info_index_count = reader.read_int<uint32_t>();
+    for (uint32_t j = 0; j < file_info_index_count; ++j) {
+      entry.file_info_indexes.push_back(reader.read_int<uint32_t>());
     }
     reader.read(entry.key.bytes(), Digest::size());
   }
@@ -364,6 +320,7 @@ write_manifest(const Config& config,
                const ManifestData& mf)
 {
   uint64_t payload_size = 0;
+  payload_size += 1; // format_ver
   payload_size += 4; // n_files
   for (const auto& file : mf.files) {
     payload_size += 2 + file.length();
@@ -378,32 +335,37 @@ write_manifest(const Config& config,
   }
 
   AtomicFile atomic_manifest_file(path, AtomicFile::Mode::binary);
-  CacheEntryWriter writer(atomic_manifest_file.stream(),
-                          Manifest::k_magic,
-                          Manifest::k_version,
-                          compression::type_from_config(config),
-                          compression::level_from_config(config),
-                          payload_size);
-  writer.write<uint32_t>(mf.files.size());
+  core::FileWriter file_writer(atomic_manifest_file.stream());
+  core::CacheEntryHeader header(core::CacheEntryType::manifest,
+                                compression::type_from_config(config),
+                                compression::level_from_config(config),
+                                time(nullptr),
+                                CCACHE_VERSION,
+                                "");
+  header.set_entry_size_from_payload_size(payload_size);
+
+  core::CacheEntryWriter writer(file_writer, header);
+  writer.write_int(k_manifest_format_version);
+  writer.write_int<uint32_t>(mf.files.size());
   for (const auto& file : mf.files) {
-    writer.write<uint16_t>(file.length());
-    writer.write(file.data(), file.length());
+    writer.write_int<uint16_t>(file.length());
+    writer.write_str(file);
   }
 
-  writer.write<uint32_t>(mf.file_infos.size());
+  writer.write_int<uint32_t>(mf.file_infos.size());
   for (const auto& file_info : mf.file_infos) {
-    writer.write<uint32_t>(file_info.index);
+    writer.write_int<uint32_t>(file_info.index);
     writer.write(file_info.digest.bytes(), Digest::size());
-    writer.write(file_info.fsize);
-    writer.write(file_info.mtime);
-    writer.write(file_info.ctime);
+    writer.write_int(file_info.fsize);
+    writer.write_int(file_info.mtime);
+    writer.write_int(file_info.ctime);
   }
 
-  writer.write<uint32_t>(mf.results.size());
+  writer.write_int<uint32_t>(mf.results.size());
   for (const auto& result : mf.results) {
-    writer.write<uint32_t>(result.file_info_indexes.size());
+    writer.write_int<uint32_t>(result.file_info_indexes.size());
     for (auto index : result.file_info_indexes) {
-      writer.write(index);
+      writer.write_int(index);
     }
     writer.write(result.key.bytes(), Digest::size());
   }
index 202ce820cbf7119d4229a171fc788b21d72d4e9b..e79334f347f624b380a99041392cd4599fa0773e 100644 (file)
@@ -19,8 +19,6 @@
 #include "Result.hpp"
 
 #include "AtomicFile.hpp"
-#include "CacheEntryReader.hpp"
-#include "CacheEntryWriter.hpp"
 #include "Config.hpp"
 #include "Context.hpp"
 #include "Fd.hpp"
 #include "Util.hpp"
 #include "fmtmacros.hpp"
 
+#include <ccache.hpp>
+#include <core/CacheEntryReader.hpp>
+#include <core/CacheEntryWriter.hpp>
+#include <core/FileReader.hpp>
+#include <core/FileWriter.hpp>
 #include <core/Statistic.hpp>
 #include <core/exceptions.hpp>
 #include <core/wincompat.hpp>
 //
 // Integers are big-endian.
 //
-// <result>               ::= <header> <body> <epilogue>
-// <header>               ::= <magic> <version> <compr_type> <compr_level>
-//                            <content_len>
-// <magic>                ::= 4 bytes ("cCrS")
-// <version>              ::= uint8_t
-// <compr_type>           ::= <compr_none> | <compr_zstd>
-// <compr_none>           ::= 0 (uint8_t)
-// <compr_zstd>           ::= 1 (uint8_t)
-// <compr_level>          ::= int8_t
-// <content_len>          ::= uint64_t ; size of file if stored uncompressed
-// <body>                 ::= <n_entries> <entry>* ; potentially compressed
+// <payload>              ::= <format_ver> <n_entries> <entry>*
+// <format_ver>           ::= uint8_t
 // <n_entries>            ::= uint8_t
 // <entry>                ::= <embedded_file_entry> | <raw_file_entry>
 // <embedded_file_entry>  ::= <embedded_file_marker> <suffix_len> <suffix>
 // <file_len>             ::= uint64_t
 // <epilogue>             ::= <checksum>
 // <checksum>             ::= uint64_t ; XXH3 of content bytes
-//
-// Sketch of concrete layout:
-//
-// <magic>                4 bytes
-// <version>              1 byte
-// <compr_type>           1 byte
-// <compr_level>          1 byte
-// <content_len>          8 bytes
-// --- [potentially compressed from here] -------------------------------------
-// <n_entries>            1 byte
-// <embedded_file_marker> 1 byte
-// <embedded_file_type>   1 byte
-// <data_len>             8 bytes
-// <data>                 data_len bytes
-// ...
-// <ref_marker>           1 byte
-// <key_len>              1 byte
-// <key>                  key_len bytes
-// ...
-// checksum               8 bytes
-//
-//
-// Version history
-// ===============
-//
-// 1: Introduced in ccache 4.0.
 
 using nonstd::nullopt;
 using nonstd::optional;
@@ -107,6 +75,8 @@ using nonstd::string_view;
 
 namespace {
 
+const uint8_t k_result_format_version = 0;
+
 // File data stored inside the result file.
 const uint8_t k_embedded_file_marker = 0;
 
@@ -252,12 +222,18 @@ Reader::read_result(Consumer& consumer)
     file_stream = file.get();
   }
 
-  CacheEntryReader cache_entry_reader(file_stream, k_magic, k_version);
+  core::FileReader file_reader(file_stream);
+  core::CacheEntryReader cache_entry_reader(file_reader);
+
+  const auto result_format_version = cache_entry_reader.read_int<uint8_t>();
+  if (result_format_version != k_result_format_version) {
+    throw core::Error("Unknown result format version: {}",
+                      result_format_version);
+  }
 
-  consumer.on_header(cache_entry_reader);
+  consumer.on_header(cache_entry_reader, result_format_version);
 
-  uint8_t n_entries;
-  cache_entry_reader.read(n_entries);
+  const auto n_entries = cache_entry_reader.read_int<uint8_t>();
 
   uint32_t i;
   for (i = 0; i < n_entries; ++i) {
@@ -273,12 +249,11 @@ Reader::read_result(Consumer& consumer)
 }
 
 void
-Reader::read_entry(CacheEntryReader& cache_entry_reader,
+Reader::read_entry(core::CacheEntryReader& cache_entry_reader,
                    uint32_t entry_number,
                    Reader::Consumer& consumer)
 {
-  uint8_t marker;
-  cache_entry_reader.read(marker);
+  const auto marker = cache_entry_reader.read_int<uint8_t>();
 
   switch (marker) {
   case k_embedded_file_marker:
@@ -289,12 +264,9 @@ Reader::read_entry(CacheEntryReader& cache_entry_reader,
     throw core::Error("Unknown entry type: {}", marker);
   }
 
-  UnderlyingFileTypeInt type;
-  cache_entry_reader.read(type);
-  FileType file_type = FileType(type);
-
-  uint64_t file_len;
-  cache_entry_reader.read(file_len);
+  const auto type = cache_entry_reader.read_int<UnderlyingFileTypeInt>();
+  const auto file_type = FileType(type);
+  const auto file_len = cache_entry_reader.read_int<uint64_t>();
 
   if (marker == k_embedded_file_marker) {
     consumer.on_entry_start(entry_number, file_type, file_len, nullopt);
@@ -365,14 +337,19 @@ Writer::do_finalize()
   }
 
   AtomicFile atomic_result_file(m_result_path, AtomicFile::Mode::binary);
-  CacheEntryWriter writer(atomic_result_file.stream(),
-                          k_magic,
-                          k_version,
-                          compression::type_from_config(m_ctx.config),
-                          compression::level_from_config(m_ctx.config),
-                          payload_size);
+  core::CacheEntryHeader header(core::CacheEntryType::result,
+                                compression::type_from_config(m_ctx.config),
+                                compression::level_from_config(m_ctx.config),
+                                time(nullptr),
+                                CCACHE_VERSION,
+                                "");
+  header.set_entry_size_from_payload_size(payload_size);
+
+  core::FileWriter file_writer(atomic_result_file.stream());
+  core::CacheEntryWriter writer(file_writer, header);
 
-  writer.write<uint8_t>(m_entries_to_write.size());
+  writer.write_int(k_result_format_version);
+  writer.write_int<uint8_t>(m_entries_to_write.size());
 
   uint32_t entry_number = 0;
   for (const auto& pair : m_entries_to_write) {
@@ -390,10 +367,10 @@ Writer::do_finalize()
         file_size,
         path);
 
-    writer.write<uint8_t>(store_raw ? k_raw_file_marker
-                                    : k_embedded_file_marker);
-    writer.write(UnderlyingFileTypeInt(file_type));
-    writer.write(file_size);
+    writer.write_int<uint8_t>(store_raw ? k_raw_file_marker
+                                        : k_embedded_file_marker);
+    writer.write_int(UnderlyingFileTypeInt(file_type));
+    writer.write_int(file_size);
 
     if (store_raw) {
       file_size_and_count_diff += write_raw_file_entry(path, entry_number);
@@ -411,7 +388,7 @@ Writer::do_finalize()
 }
 
 void
-Result::Writer::write_embedded_file_entry(CacheEntryWriter& writer,
+Result::Writer::write_embedded_file_entry(core::CacheEntryWriter& writer,
                                           const std::string& path,
                                           uint64_t file_size)
 {
index 994322dd4cd709486ac88136a1b00f5f39fe6b9d..f36cbb6cd09ec7a1f75e6385523049a2f571c53c 100644 (file)
 #include <string>
 #include <vector>
 
+namespace core {
+
 class CacheEntryReader;
 class CacheEntryWriter;
+
+} // namespace core
+
 class Context;
 
 namespace Result {
@@ -98,7 +103,8 @@ public:
   public:
     virtual ~Consumer() = default;
 
-    virtual void on_header(CacheEntryReader& cache_entry_reader) = 0;
+    virtual void on_header(core::CacheEntryReader& cache_entry_reader,
+                           uint8_t result_format_version) = 0;
     virtual void on_entry_start(uint32_t entry_number,
                                 FileType file_type,
                                 uint64_t file_len,
@@ -114,7 +120,7 @@ private:
   const std::string m_result_path;
 
   bool read_result(Consumer& consumer);
-  void read_entry(CacheEntryReader& cache_entry_reader,
+  void read_entry(core::CacheEntryReader& cache_entry_reader,
                   uint32_t entry_number,
                   Reader::Consumer& consumer);
 };
@@ -137,7 +143,7 @@ private:
   std::vector<std::pair<FileType, std::string>> m_entries_to_write;
 
   FileSizeAndCountDiff do_finalize();
-  static void write_embedded_file_entry(CacheEntryWriter& writer,
+  static void write_embedded_file_entry(core::CacheEntryWriter& writer,
                                         const std::string& path,
                                         uint64_t file_size);
   FileSizeAndCountDiff write_raw_file_entry(const std::string& path,
index 1e3bac40cd8f3ca788406ba2fcdc50404f06c274..220ca2228b140750901ca5590a384686fcb60eb0 100644 (file)
@@ -1,4 +1,4 @@
-// Copyright (C) 2020 Joel Rosdahl and other contributors
+// Copyright (C) 2020-2021 Joel Rosdahl and other contributors
 //
 // See doc/AUTHORS.adoc for a complete list of contributors.
 //
 
 #include "ResultDumper.hpp"
 
-#include "CacheEntryReader.hpp"
 #include "Context.hpp"
 #include "Logging.hpp"
 #include "fmtmacros.hpp"
 
+#include <core/CacheEntryReader.hpp>
+
 using nonstd::optional;
 
 ResultDumper::ResultDumper(FILE* stream) : m_stream(stream)
@@ -30,9 +31,11 @@ ResultDumper::ResultDumper(FILE* stream) : m_stream(stream)
 }
 
 void
-ResultDumper::on_header(CacheEntryReader& cache_entry_reader)
+ResultDumper::on_header(core::CacheEntryReader& cache_entry_reader,
+                        const uint8_t result_format_version)
 {
-  cache_entry_reader.dump_header(m_stream);
+  cache_entry_reader.header().dump(m_stream);
+  PRINT(m_stream, "Result format version: {}\n", result_format_version);
 }
 
 void
index c48d2ab59c048da404c981f85501f91e9aa8ed72..0274efaf4afb67d9cbec884e9969f4e3f5399140 100644 (file)
@@ -29,7 +29,8 @@ class ResultDumper : public Result::Reader::Consumer
 public:
   ResultDumper(FILE* stream);
 
-  void on_header(CacheEntryReader& cache_entry_reader) override;
+  void on_header(core::CacheEntryReader& cache_entry_reader,
+                 uint8_t result_format_version) override;
   void on_entry_start(uint32_t entry_number,
                       Result::FileType file_type,
                       uint64_t file_len,
index 077f324426f084152e0bb1dfce69d9747b67478d..5dcef476e7b18260a1e772811761925a867c4a18 100644 (file)
@@ -34,7 +34,8 @@ ResultExtractor::ResultExtractor(const std::string& directory)
 }
 
 void
-ResultExtractor::on_header(CacheEntryReader& /*cache_entry_reader*/)
+ResultExtractor::on_header(core::CacheEntryReader& /*cache_entry_reader*/,
+                           const uint8_t /*result_format_version*/)
 {
 }
 
index 0e801ff4a0e87aefcee4bc6c2f0a8a78e53976af..4ee2a3a9ba8a3cf6d175fc6089758d35fc86263e 100644 (file)
@@ -29,7 +29,8 @@ class ResultExtractor : public Result::Reader::Consumer
 public:
   ResultExtractor(const std::string& directory);
 
-  void on_header(CacheEntryReader& cache_entry_reader) override;
+  void on_header(core::CacheEntryReader& cache_entry_reader,
+                 uint8_t result_format_version) override;
   void on_entry_start(uint32_t entry_number,
                       Result::FileType file_type,
                       uint64_t file_len,
index 06dab86866dfa7c888f8b6ce6a67b4b72b4262e4..2e0bf4740aa1e3802a7369eeb879a4401705512a 100644 (file)
@@ -42,7 +42,8 @@ ResultRetriever::ResultRetriever(Context& ctx, bool rewrite_dependency_target)
 }
 
 void
-ResultRetriever::on_header(CacheEntryReader& /*cache_entry_reader*/)
+ResultRetriever::on_header(core::CacheEntryReader& /*cache_entry_reader*/,
+                           const uint8_t /*result_format_version*/)
 {
 }
 
index 4794afdef27364f67b6bdf280f7ae4bcf37adfab..3924965ab891b53d2fd98ca662b4f28defd2e693 100644 (file)
@@ -29,7 +29,8 @@ class ResultRetriever : public Result::Reader::Consumer
 public:
   ResultRetriever(Context& ctx, bool rewrite_dependency_target);
 
-  void on_header(CacheEntryReader& cache_entry_reader) override;
+  void on_header(core::CacheEntryReader& cache_entry_reader,
+                 uint8_t result_format_version) override;
   void on_entry_start(uint32_t entry_number,
                       Result::FileType file_type,
                       uint64_t file_len,
index f23f50ef766a1edeacaa5094df7c9b69bc7ac5eb..2d8197f7baf0934316936155b674515ac28c3226 100644 (file)
@@ -88,7 +88,7 @@ using nonstd::string_view;
 // different for the same input in a new ccache version, we can just change
 // this string. A typical example would be if the format of one of the files
 // stored in the cache changes in a backwards-incompatible way.
-const char HASH_PREFIX[] = "3";
+const char HASH_PREFIX[] = "4";
 
 namespace {
 
index cd695beea5c86cf3c7e8178b592cba0f368c17a4..dbcc665cfdc7852a26d365f25200617cd088fea7 100644 (file)
 #include "ZstdCompressor.hpp"
 #include "assertions.hpp"
 
+#include <core/Writer.hpp>
+
 #include <memory>
 
 namespace compression {
 
 std::unique_ptr<Compressor>
 Compressor::create_from_type(const Type type,
-                             FILE* const stream,
+                             core::Writer& writer,
                              const int8_t compression_level)
 {
   switch (type) {
   case compression::Type::none:
-    return std::make_unique<NullCompressor>(stream);
+    return std::make_unique<NullCompressor>(writer);
 
   case compression::Type::zstd:
-    return std::make_unique<ZstdCompressor>(stream, compression_level);
+    return std::make_unique<ZstdCompressor>(writer, compression_level);
   }
 
   ASSERT(false);
index 9c8e56393a31a56bd4521395955bff0dfa984757..deebbd997ce7dc03ce3e694a711bf1a65dd611a5 100644 (file)
 #pragma once
 
 #include <compression/types.hpp>
+#include <core/Writer.hpp>
 
 #include <cstdint>
-#include <cstdio>
 #include <memory>
 
+namespace core {
+
+class Writer;
+
+}
+
 namespace compression {
 
-class Compressor
+class Compressor : public core::Writer
 {
 public:
   virtual ~Compressor() = default;
 
-  // Create a compressor for the specified type.
-  //
-  // Parameters:
-  // - type: The type.
-  // - stream: The stream to write to.
-  // - compression_level: Desired compression level.
   static std::unique_ptr<Compressor>
-  create_from_type(Type type, FILE* stream, int8_t compression_level);
+  create_from_type(Type type, core::Writer& writer, int8_t compression_level);
 
-  // Get the actual compression level used for the compressed stream.
   virtual int8_t actual_compression_level() const = 0;
-
-  // Write data from a buffer to the compressed stream.
-  //
-  // Parameters:
-  // - data: Data to write.
-  // - count: Size of data to write.
-  //
-  // Throws Error on failure.
-  virtual void write(const void* data, size_t count) = 0;
-
-  // Write an unsigned integer to the compressed stream.
-  //
-  // Parameters:
-  // - value: Value to write.
-  //
-  // Throws Error on failure.
-  template<typename T> void write(T value);
-
-  // Finalize compression.
-  //
-  // This method checks that the end state of the compressed stream is correct
-  // and throws Error if not.
-  virtual void finalize() = 0;
 };
 
 } // namespace compression
index 3a76b1643e4791638f060300170c1245138060b5..6bfa713df5db89773fdba36de906e28e8d7dd7fa 100644 (file)
 namespace compression {
 
 std::unique_ptr<Decompressor>
-Decompressor::create_from_type(Type type, FILE* stream)
+Decompressor::create_from_type(Type type, core::Reader& reader)
 {
   switch (type) {
   case compression::Type::none:
-    return std::make_unique<NullDecompressor>(stream);
+    return std::make_unique<NullDecompressor>(reader);
 
   case compression::Type::zstd:
-    return std::make_unique<ZstdDecompressor>(stream);
+    return std::make_unique<ZstdDecompressor>(reader);
   }
 
   ASSERT(false);
index 8d6b173b06b7311a8361904ff15034d3f8c748bf..2223d59e0ce8ea4fa0b4a973a38d4160b52a13b2 100644 (file)
 #pragma once
 
 #include <compression/types.hpp>
+#include <core/Reader.hpp>
 
-#include <cstdio>
 #include <memory>
 
 namespace compression {
 
-class Decompressor
+class Decompressor : public core::Reader
 {
 public:
   virtual ~Decompressor() = default;
 
   // Create a decompressor for the specified type.
-  //
-  // Parameters:
-  // - type: The type.
-  // - stream: The stream to read from.
   static std::unique_ptr<Decompressor> create_from_type(Type type,
-                                                        FILE* stream);
-
-  // Read data into a buffer from the compressed stream.
-  //
-  // Parameters:
-  // - data: Buffer to write decompressed data to.
-  // - count: How many bytes to write.
-  //
-  // Throws Error on failure.
-  virtual void read(void* data, size_t count) = 0;
+                                                        core::Reader& reader);
 
   // Finalize decompression.
   //
index 4d5ee67a5afee5f1972b2563f4dd3e168ff016f0..a2cb76517ce481e96139bdea4418a27f8fe093ba 100644 (file)
@@ -22,7 +22,7 @@
 
 namespace compression {
 
-NullCompressor::NullCompressor(FILE* const stream) : m_stream(stream)
+NullCompressor::NullCompressor(core::Writer& writer) : m_writer(writer)
 {
 }
 
@@ -35,17 +35,13 @@ NullCompressor::actual_compression_level() const
 void
 NullCompressor::write(const void* const data, const size_t count)
 {
-  if (fwrite(data, 1, count, m_stream) != count) {
-    throw core::Error("failed to write to uncompressed stream");
-  }
+  m_writer.write(data, count);
 }
 
 void
 NullCompressor::finalize()
 {
-  if (fflush(m_stream) != 0) {
-    throw core::Error("failed to finalize uncompressed stream");
-  }
+  m_writer.finalize();
 }
 
 } // namespace compression
index 2f26c7b03ef25ef426a909ead229885a3666b493..154b568c75c42f7ece7133af5e12db4c111f33f0 100644 (file)
 
 #include <NonCopyable.hpp>
 
-#include <cstdio>
-
 namespace compression {
 
 // A compressor of an uncompressed stream.
 class NullCompressor : public Compressor, NonCopyable
 {
 public:
-  // Parameters:
-  // - stream: The file to write data to.
-  explicit NullCompressor(FILE* stream);
+  explicit NullCompressor(core::Writer& writer);
 
   int8_t actual_compression_level() const override;
   void write(const void* data, size_t count) override;
   void finalize() override;
 
 private:
-  FILE* m_stream;
+  core::Writer& m_writer;
 };
 
 } // namespace compression
index f25da6354cc331070eea23f22d9dfe9f84e78516..81693e4c28e14a4022d0ed7bbb34ce0c673e7786 100644 (file)
 
 namespace compression {
 
-NullDecompressor::NullDecompressor(FILE* const stream) : m_stream(stream)
+NullDecompressor::NullDecompressor(core::Reader& reader) : m_reader(reader)
 {
 }
 
-void
+size_t
 NullDecompressor::read(void* const data, const size_t count)
 {
-  if (fread(data, count, 1, m_stream) != 1) {
-    throw core::Error("failed to read from uncompressed stream");
-  }
+  return m_reader.read(data, count);
 }
 
 void
 NullDecompressor::finalize()
 {
-  if (fgetc(m_stream) != EOF) {
-    throw core::Error("garbage data at end of uncompressed stream");
+  bool eof;
+  try {
+    m_reader.read_int<uint8_t>();
+    eof = false;
+  } catch (core::Error&) {
+    eof = true;
+  }
+  if (!eof) {
+    throw core::Error("Garbage data at end of uncompressed stream");
   }
 }
 
index 722319d86fdfcda27f0d5b05466a83250a220959..c2c8f5debc3bff678af45f184599ca86ca1addcd 100644 (file)
 
 #include <NonCopyable.hpp>
 
-#include <cstdio>
-
 namespace compression {
 
 // A decompressor of an uncompressed stream.
 class NullDecompressor : public Decompressor, NonCopyable
 {
 public:
-  // Parameters:
-  // - stream: The file to read data from.
-  explicit NullDecompressor(FILE* stream);
+  explicit NullDecompressor(core::Reader& reader);
 
-  void read(void* data, size_t count) override;
+  size_t read(void* data, size_t count) override;
   void finalize() override;
 
 private:
-  FILE* m_stream;
+  core::Reader& m_reader;
 };
 
 } // namespace compression
index fffefd1e8bb909bb856cc9794f8f3e33d37c5bbc..73abe3b07f4442e204da2d4027b3241401e8fbde 100644 (file)
@@ -29,8 +29,8 @@
 
 namespace compression {
 
-ZstdCompressor::ZstdCompressor(FILE* const stream, int8_t compression_level)
-  : m_stream(stream),
+ZstdCompressor::ZstdCompressor(core::Writer& writer, int8_t compression_level)
+  : m_writer(writer),
     m_zstd_stream(ZSTD_createCStream()),
     m_zstd_in(std::make_unique<ZSTD_inBuffer_s>()),
     m_zstd_out(std::make_unique<ZSTD_outBuffer_s>())
@@ -94,9 +94,8 @@ ZstdCompressor::write(const void* const data, const size_t count)
     ret = ZSTD_compressStream(m_zstd_stream, m_zstd_out.get(), m_zstd_in.get());
     ASSERT(!(ZSTD_isError(ret)));
     const size_t compressed_bytes = m_zstd_out->pos;
-    if (fwrite(buffer, 1, compressed_bytes, m_stream) != compressed_bytes
-        || ferror(m_stream)) {
-      throw core::Error("failed to write to zstd output stream ");
+    if (compressed_bytes > 0) {
+      m_writer.write(buffer, compressed_bytes);
     }
   }
   ret = flush;
@@ -107,9 +106,8 @@ ZstdCompressor::write(const void* const data, const size_t count)
     m_zstd_out->pos = 0;
     ret = ZSTD_endStream(m_zstd_stream, m_zstd_out.get());
     const size_t compressed_bytes = m_zstd_out->pos;
-    if (fwrite(buffer, 1, compressed_bytes, m_stream) != compressed_bytes
-        || ferror(m_stream)) {
-      throw core::Error("failed to write to zstd output stream");
+    if (compressed_bytes > 0) {
+      m_writer.write(buffer, compressed_bytes);
     }
   }
 }
@@ -118,6 +116,7 @@ void
 ZstdCompressor::finalize()
 {
   write(nullptr, 0);
+  m_writer.finalize();
 }
 
 } // namespace compression
index 3a1c18e5e7232f535a2c1d3f18f28de0bbd90c12..efb6bfc08a0765528d511d252a53a59357463fce 100644 (file)
@@ -35,10 +35,7 @@ namespace compression {
 class ZstdCompressor : public Compressor, NonCopyable
 {
 public:
-  // Parameters:
-  // - stream: The file to write data to.
-  // - compression_level: Desired compression level.
-  ZstdCompressor(FILE* stream, int8_t compression_level);
+  ZstdCompressor(core::Writer& writer, int8_t compression_level);
 
   ~ZstdCompressor() override;
 
@@ -49,7 +46,7 @@ public:
   constexpr static uint8_t default_compression_level = 1;
 
 private:
-  FILE* m_stream;
+  core::Writer& m_writer;
   ZSTD_CCtx_s* m_zstd_stream;
   std::unique_ptr<ZSTD_inBuffer_s> m_zstd_in;
   std::unique_ptr<ZSTD_outBuffer_s> m_zstd_out;
index 916a6e8fe1af3c6122689fa9f8a3cc57cd34b025..9ceab28ba44a7721b717b75ee3ca14788ff0c14b 100644 (file)
@@ -24,8 +24,8 @@
 
 namespace compression {
 
-ZstdDecompressor::ZstdDecompressor(FILE* const stream)
-  : m_stream(stream),
+ZstdDecompressor::ZstdDecompressor(core::Reader& reader)
+  : m_reader(reader),
     m_input_size(0),
     m_input_consumed(0),
     m_zstd_stream(ZSTD_createDStream()),
@@ -43,17 +43,14 @@ ZstdDecompressor::~ZstdDecompressor()
   ZSTD_freeDStream(m_zstd_stream);
 }
 
-void
+size_t
 ZstdDecompressor::read(void* const data, const size_t count)
 {
   size_t bytes_read = 0;
   while (bytes_read < count) {
     ASSERT(m_input_size >= m_input_consumed);
     if (m_input_size == m_input_consumed) {
-      m_input_size = fread(m_input_buffer, 1, sizeof(m_input_buffer), m_stream);
-      if (m_input_size == 0) {
-        throw core::Error("failed to read from zstd input stream");
-      }
+      m_input_size = m_reader.read(m_input_buffer, sizeof(m_input_buffer));
       m_input_consumed = 0;
     }
 
@@ -67,7 +64,7 @@ ZstdDecompressor::read(void* const data, const size_t count)
     const size_t ret =
       ZSTD_decompressStream(m_zstd_stream, &m_zstd_out, &m_zstd_in);
     if (ZSTD_isError(ret)) {
-      throw core::Error("failed to read from zstd input stream");
+      throw core::Error("Failed to read from zstd input stream");
     }
     if (ret == 0) {
       m_reached_stream_end = true;
@@ -76,13 +73,15 @@ ZstdDecompressor::read(void* const data, const size_t count)
     bytes_read += m_zstd_out.pos;
     m_input_consumed += m_zstd_in.pos;
   }
+
+  return count;
 }
 
 void
 ZstdDecompressor::finalize()
 {
   if (!m_reached_stream_end) {
-    throw core::Error("garbage data at end of zstd input stream");
+    throw core::Error("Garbage data at end of zstd input stream");
   }
 }
 
index aee51b6d2f7e2cdad374c8c0537edff8fba6e0f0..0006ec40725156a0fa253bb8e60616a125114b4b 100644 (file)
@@ -30,17 +30,15 @@ namespace compression {
 class ZstdDecompressor : public Decompressor
 {
 public:
-  // Parameters:
-  // - stream: The file to read data from.
-  explicit ZstdDecompressor(FILE* stream);
+  explicit ZstdDecompressor(core::Reader& reader);
 
   ~ZstdDecompressor() override;
 
-  void read(void* data, size_t count) override;
+  size_t read(void* data, size_t count) override;
   void finalize() override;
 
 private:
-  FILE* m_stream;
+  core::Reader& m_reader;
   char m_input_buffer[CCACHE_READ_BUFFER_SIZE];
   size_t m_input_size;
   size_t m_input_consumed;
index 60d42973b7138de90bb25e114b2bead6861b20eb..060881203ec342087e4eacd6a8a84878b8dde560 100644 (file)
@@ -1,5 +1,8 @@
 set(
   sources
+  ${CMAKE_CURRENT_SOURCE_DIR}/CacheEntryHeader.cpp
+  ${CMAKE_CURRENT_SOURCE_DIR}/CacheEntryReader.cpp
+  ${CMAKE_CURRENT_SOURCE_DIR}/CacheEntryWriter.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/Statistics.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/StatisticsCounters.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/StatsLog.cpp
diff --git a/src/core/CacheEntryHeader.cpp b/src/core/CacheEntryHeader.cpp
new file mode 100644 (file)
index 0000000..e29ce80
--- /dev/null
@@ -0,0 +1,95 @@
+// Copyright (C) 2019-2021 Joel Rosdahl and other contributors
+//
+// See doc/AUTHORS.adoc for a complete list of contributors.
+//
+// This program is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License as published by the Free
+// Software Foundation; either version 3 of the License, or (at your option)
+// any later version.
+//
+// This program is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+// more details.
+//
+// You should have received a copy of the GNU General Public License along with
+// this program; if not, write to the Free Software Foundation, Inc., 51
+// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+#include "CacheEntryHeader.hpp"
+
+#include <fmtmacros.hpp>
+
+const size_t k_static_header_fields_size =
+  sizeof(core::CacheEntryHeader::magic)
+  + sizeof(core::CacheEntryHeader::entry_format_version)
+  + sizeof(core::CacheEntryHeader::entry_type)
+  + sizeof(core::CacheEntryHeader::compression_type)
+  + sizeof(core::CacheEntryHeader::compression_level)
+  + sizeof(core::CacheEntryHeader::creation_time)
+  + sizeof(core::CacheEntryHeader::entry_size)
+  // ccache_version length field:
+  + 1
+  // tag length field:
+  + 1;
+
+const size_t k_static_epilogue_fields_size =
+  sizeof(uint64_t) + sizeof(uint64_t);
+
+namespace core {
+
+CacheEntryHeader::CacheEntryHeader(const core::CacheEntryType entry_type_,
+                                   const compression::Type compression_type_,
+                                   const int8_t compression_level_,
+                                   const uint64_t creation_time_,
+                                   const std::string& ccache_version_,
+                                   const std::string& tag_,
+                                   const uint64_t entry_size_)
+  : magic(k_ccache_magic),
+    entry_format_version(k_entry_format_version),
+    entry_type(entry_type_),
+    compression_type(compression_type_),
+    compression_level(compression_level_),
+    creation_time(creation_time_),
+    ccache_version(ccache_version_),
+    tag(tag_),
+    entry_size(entry_size_)
+{
+}
+
+uint64_t
+CacheEntryHeader::payload_size() const
+{
+  return entry_size - non_payload_size();
+}
+
+void
+CacheEntryHeader::set_entry_size_from_payload_size(const uint64_t payload_size)
+{
+  entry_size = non_payload_size() + payload_size;
+}
+
+void
+CacheEntryHeader::dump(FILE* const stream) const
+{
+  PRINT(stream, "Magic: {:04x}\n", magic);
+  PRINT(stream, "Entry format version: {}\n", entry_format_version);
+  PRINT(stream, "Entry type: {}\n", entry_type);
+  PRINT(stream,
+        "Compression type: {}\n",
+        compression::type_to_string(compression_type));
+  PRINT(stream, "Compression level: {}\n", compression_level);
+  PRINT(stream, "Creation time: {}\n", creation_time);
+  PRINT(stream, "Ccache version: {}\n", ccache_version);
+  PRINT(stream, "Tag: {}\n", tag);
+  PRINT(stream, "Entry size: {}\n", entry_size);
+}
+
+size_t
+CacheEntryHeader::non_payload_size() const
+{
+  return k_static_header_fields_size + ccache_version.length() + tag.length()
+         + k_static_epilogue_fields_size;
+}
+
+} // namespace core
diff --git a/src/core/CacheEntryHeader.hpp b/src/core/CacheEntryHeader.hpp
new file mode 100644 (file)
index 0000000..cbbb7b4
--- /dev/null
@@ -0,0 +1,85 @@
+// Copyright (C) 2021 Joel Rosdahl and other contributors
+//
+// See doc/AUTHORS.adoc for a complete list of contributors.
+//
+// This program is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License as published by the Free
+// Software Foundation; either version 3 of the License, or (at your option)
+// any later version.
+//
+// This program is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+// more details.
+//
+// You should have received a copy of the GNU General Public License along with
+// this program; if not, write to the Free Software Foundation, Inc., 51
+// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+#pragma once
+
+#include <compression/types.hpp>
+#include <core/types.hpp>
+
+// Cache entry format
+// ==================
+//
+// Integers are big-endian.
+//
+// <entry>            ::= <header> <payload> <epilogue>
+// <header>           ::= <magic> <format_ver> <entry_type> <compr_type>
+//                        <compr_level> <creation_time> <ccache_ver> <tag>
+//                        <entry_size>
+// <magic>            ::= uint16_t (0xccac)
+// <format_ver>       ::= uint8_t
+// <entry_type>       ::= <result_entry> | <manifest_entry>
+// <result_entry>     ::= 0 (uint8_t)
+// <manifest_entry>   ::= 1 (uint8_t)
+// <compr_type>       ::= <compr_none> | <compr_zstd>
+// <compr_none>       ::= 0 (uint8_t)
+// <compr_zstd>       ::= 1 (uint8_t)
+// <compr_level>      ::= int8_t
+// <creation_time>    ::= uint64_t (Unix epoch time when entry was created)
+// <ccache_ver>       ::= string length (uint8_t) + string data
+// <tag>              ::= string length (uint8_t) + string data
+// <entry_size>       ::= uint64_t ; = size of file if stored uncompressed
+// ; potentially compressed from here
+// <payload>          ::= depends on entry_type
+// <epilogue>         ::= <checksum_high> <checksum_low>
+// <checksum_high>    ::= uint64_t ; XXH3-128 (high bits) of entry bytes
+// <checksum_low>     ::= uint64_t ; XXH3-128 (low bits) of entry bytes
+
+namespace core {
+
+const uint16_t k_ccache_magic = 0xccac;
+const uint16_t k_entry_format_version = 0;
+
+struct CacheEntryHeader
+{
+  CacheEntryHeader(core::CacheEntryType entry_type,
+                   compression::Type compression_type,
+                   int8_t compression_level,
+                   uint64_t creation_time,
+                   const std::string& ccache_version,
+                   const std::string& tag,
+                   uint64_t entry_size = 0);
+
+  uint16_t magic;
+  uint8_t entry_format_version;
+  core::CacheEntryType entry_type;
+  compression::Type compression_type;
+  int8_t compression_level;
+  uint64_t creation_time;
+  std::string ccache_version;
+  std::string tag;
+  uint64_t entry_size;
+
+  uint64_t payload_size() const;
+  void set_entry_size_from_payload_size(uint64_t payload_size);
+  void dump(FILE* stream) const;
+
+private:
+  size_t non_payload_size() const;
+};
+
+} // namespace core
diff --git a/src/core/CacheEntryReader.cpp b/src/core/CacheEntryReader.cpp
new file mode 100644 (file)
index 0000000..26ae366
--- /dev/null
@@ -0,0 +1,108 @@
+// Copyright (C) 2019-2021 Joel Rosdahl and other contributors
+//
+// See doc/AUTHORS.adoc for a complete list of contributors.
+//
+// This program is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License as published by the Free
+// Software Foundation; either version 3 of the License, or (at your option)
+// any later version.
+//
+// This program is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+// more details.
+//
+// You should have received a copy of the GNU General Public License along with
+// this program; if not, write to the Free Software Foundation, Inc., 51
+// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+#include "CacheEntryReader.hpp"
+
+#include <core/exceptions.hpp>
+
+namespace {
+
+core::CacheEntryType
+cache_entry_type_from_int(const uint8_t entry_type)
+{
+  switch (entry_type) {
+  case 0:
+    return core::CacheEntryType::result;
+    break;
+  case 1:
+    return core::CacheEntryType::manifest;
+    break;
+  default:
+    throw core::Error("Unknown entry type: {}", entry_type);
+  }
+}
+
+} // namespace
+
+namespace core {
+
+CacheEntryReader::CacheEntryReader(core::Reader& reader)
+  : m_checksumming_reader(reader)
+{
+  const auto magic = m_checksumming_reader.read_int<uint16_t>();
+  if (magic != core::k_ccache_magic) {
+    throw core::Error("Bad magic value: 0x{:04x}", magic);
+  }
+
+  const auto entry_format_version = m_checksumming_reader.read_int<uint8_t>();
+  if (entry_format_version != core::k_entry_format_version) {
+    throw core::Error("Unknown entry format version: {}", entry_format_version);
+  }
+
+  const auto entry_type = m_checksumming_reader.read_int<uint8_t>();
+  const auto compression_type = m_checksumming_reader.read_int<uint8_t>();
+  const auto compression_level = m_checksumming_reader.read_int<int8_t>();
+  const auto creation_time = m_checksumming_reader.read_int<uint64_t>();
+  const auto ccache_version =
+    m_checksumming_reader.read_str(m_checksumming_reader.read_int<uint8_t>());
+  const auto tag =
+    m_checksumming_reader.read_str(m_checksumming_reader.read_int<uint8_t>());
+  const auto entry_size = m_checksumming_reader.read_int<uint64_t>();
+
+  m_header = std::make_unique<CacheEntryHeader>(
+    cache_entry_type_from_int(entry_type),
+    compression::type_from_int(compression_type),
+    compression_level,
+    creation_time,
+    ccache_version,
+    tag,
+    entry_size);
+
+  m_decompressor = compression::Decompressor::create_from_type(
+    m_header->compression_type, reader);
+  m_checksumming_reader.set_reader(*m_decompressor);
+}
+
+size_t
+CacheEntryReader::read(void* const data, const size_t count)
+{
+  return m_checksumming_reader.read(data, count);
+}
+
+void
+CacheEntryReader::finalize()
+{
+  const util::XXH3_128::Digest actual = m_checksumming_reader.digest();
+  util::XXH3_128::Digest expected;
+  m_decompressor->read(expected.bytes(), expected.size());
+
+  // actual == null_digest: Checksumming is not enabled now.
+  // expected == null_digest: Checksumming was not enabled when the entry was
+  // created.
+  const util::XXH3_128::Digest null_digest;
+
+  if (actual != expected && actual != null_digest && expected != null_digest) {
+    throw core::Error("Incorrect checksum (actual {}, expected {})",
+                      Util::format_base16(actual.bytes(), actual.size()),
+                      Util::format_base16(expected.bytes(), expected.size()));
+  }
+
+  m_decompressor->finalize();
+}
+
+} // namespace core
diff --git a/src/core/CacheEntryReader.hpp b/src/core/CacheEntryReader.hpp
new file mode 100644 (file)
index 0000000..a4e5181
--- /dev/null
@@ -0,0 +1,61 @@
+// Copyright (C) 2019-2021 Joel Rosdahl and other contributors
+//
+// See doc/AUTHORS.adoc for a complete list of contributors.
+//
+// This program is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License as published by the Free
+// Software Foundation; either version 3 of the License, or (at your option)
+// any later version.
+//
+// This program is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+// more details.
+//
+// You should have received a copy of the GNU General Public License along with
+// this program; if not, write to the Free Software Foundation, Inc., 51
+// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+#pragma once
+
+#include <compression/Decompressor.hpp>
+#include <core/CacheEntryHeader.hpp>
+#include <core/ChecksummingReader.hpp>
+#include <core/Reader.hpp>
+#include <util/XXH3_128.hpp>
+
+namespace core {
+
+// This class knows how to read a cache entry with a format described in
+// CacheEntryHeader.
+class CacheEntryReader : public Reader
+{
+public:
+  // Read cache entry data from `reader`.
+  CacheEntryReader(Reader& reader);
+
+  size_t read(void* data, size_t count) override;
+  using Reader::read;
+
+  // Close for reading.
+  //
+  // This method potentially verifies the end state after reading the cache
+  // entry and throws `core::Error` if any integrity issues are found.
+  void finalize();
+
+  const CacheEntryHeader& header() const;
+
+private:
+  ChecksummingReader m_checksumming_reader;
+  std::unique_ptr<CacheEntryHeader> m_header;
+  util::XXH3_128 m_checksum;
+  std::unique_ptr<compression::Decompressor> m_decompressor;
+};
+
+inline const CacheEntryHeader&
+CacheEntryReader::header() const
+{
+  return *m_header;
+}
+
+} // namespace core
diff --git a/src/core/CacheEntryWriter.cpp b/src/core/CacheEntryWriter.cpp
new file mode 100644 (file)
index 0000000..b09b4c8
--- /dev/null
@@ -0,0 +1,61 @@
+// Copyright (C) 2019-2021 Joel Rosdahl and other contributors
+//
+// See doc/AUTHORS.adoc for a complete list of contributors.
+//
+// This program is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License as published by the Free
+// Software Foundation; either version 3 of the License, or (at your option)
+// any later version.
+//
+// This program is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+// more details.
+//
+// You should have received a copy of the GNU General Public License along with
+// this program; if not, write to the Free Software Foundation, Inc., 51
+// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+#include "CacheEntryWriter.hpp"
+
+#include <core/CacheEntryHeader.hpp>
+
+namespace core {
+
+CacheEntryWriter::CacheEntryWriter(core::Writer& writer,
+                                   const CacheEntryHeader& header)
+  : m_checksumming_writer(writer),
+    m_compressor(compression::Compressor::create_from_type(
+      header.compression_type, writer, header.compression_level))
+{
+  m_checksumming_writer.write_int(header.magic);
+  m_checksumming_writer.write_int(header.entry_format_version);
+  m_checksumming_writer.write_int(static_cast<uint8_t>(header.entry_type));
+  m_checksumming_writer.write_int(
+    static_cast<uint8_t>(header.compression_type));
+  m_checksumming_writer.write_int(header.compression_level);
+  m_checksumming_writer.write_int(header.creation_time);
+  m_checksumming_writer.write_int<uint8_t>(header.ccache_version.length());
+  m_checksumming_writer.write_str(header.ccache_version);
+  m_checksumming_writer.write_int<uint8_t>(header.tag.length());
+  m_checksumming_writer.write_str(header.tag);
+  m_checksumming_writer.write_int(header.entry_size);
+
+  m_checksumming_writer.set_writer(*m_compressor);
+}
+
+void
+CacheEntryWriter::write(const void* const data, const size_t count)
+{
+  m_checksumming_writer.write(data, count);
+}
+
+void
+CacheEntryWriter::finalize()
+{
+  const auto digest = m_checksumming_writer.digest();
+  m_compressor->write(digest.bytes(), digest.size());
+  m_compressor->finalize();
+}
+
+} // namespace core
diff --git a/src/core/CacheEntryWriter.hpp b/src/core/CacheEntryWriter.hpp
new file mode 100644 (file)
index 0000000..789eb0c
--- /dev/null
@@ -0,0 +1,50 @@
+// Copyright (C) 2019-2021 Joel Rosdahl and other contributors
+//
+// See doc/AUTHORS.adoc for a complete list of contributors.
+//
+// This program is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License as published by the Free
+// Software Foundation; either version 3 of the License, or (at your option)
+// any later version.
+//
+// This program is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+// more details.
+//
+// You should have received a copy of the GNU General Public License along with
+// this program; if not, write to the Free Software Foundation, Inc., 51
+// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+#pragma once
+
+#include <compression/Compressor.hpp>
+#include <core/ChecksummingWriter.hpp>
+#include <core/Writer.hpp>
+
+namespace core {
+
+struct CacheEntryHeader;
+
+// This class knows how to write a cache entry with a format described in
+// CacheEntryHeader.
+class CacheEntryWriter : public Writer
+{
+public:
+  CacheEntryWriter(Writer& writer, const CacheEntryHeader& header);
+
+  void write(const void* data, size_t count) override;
+  using Writer::write;
+
+  // Close for writing.
+  //
+  // This method potentially verifies the end state after writing the cache
+  // entry and throws `core::Error` if any integrity issues are found.
+  void finalize() override;
+
+private:
+  ChecksummingWriter m_checksumming_writer;
+  std::unique_ptr<compression::Compressor> m_compressor;
+};
+
+} // namespace core
diff --git a/src/core/ChecksummingReader.hpp b/src/core/ChecksummingReader.hpp
new file mode 100644 (file)
index 0000000..41c569f
--- /dev/null
@@ -0,0 +1,68 @@
+// Copyright (C) 2021 Joel Rosdahl and other contributors
+//
+// See doc/AUTHORS.adoc for a complete list of contributors.
+//
+// This program is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License as published by the Free
+// Software Foundation; either version 3 of the License, or (at your option)
+// any later version.
+//
+// This program is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+// more details.
+//
+// You should have received a copy of the GNU General Public License along with
+// this program; if not, write to the Free Software Foundation, Inc., 51
+// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+#pragma once
+
+#include <core/Reader.hpp>
+#include <util/XXH3_128.hpp>
+
+namespace core {
+
+class ChecksummingReader : public Reader
+{
+public:
+  ChecksummingReader(core::Reader& reader);
+
+  using core::Reader::read;
+  size_t read(void* data, size_t count) override;
+
+  void set_reader(core::Reader& reader);
+
+  util::XXH3_128::Digest digest() const;
+
+private:
+  core::Reader* m_reader;
+  util::XXH3_128 m_checksum;
+};
+
+inline ChecksummingReader::ChecksummingReader(core::Reader& reader)
+  : m_reader(&reader)
+{
+}
+
+inline size_t
+ChecksummingReader::read(void* const data, const size_t count)
+{
+  const auto bytes_read = m_reader->read(data, count);
+  m_checksum.update(data, bytes_read);
+  return bytes_read;
+}
+
+inline void
+ChecksummingReader::set_reader(core::Reader& reader)
+{
+  m_reader = &reader;
+}
+
+inline util::XXH3_128::Digest
+ChecksummingReader::digest() const
+{
+  return m_checksum.digest();
+}
+
+} // namespace core
diff --git a/src/core/ChecksummingWriter.hpp b/src/core/ChecksummingWriter.hpp
new file mode 100644 (file)
index 0000000..1d6b009
--- /dev/null
@@ -0,0 +1,74 @@
+// Copyright (C) 2021 Joel Rosdahl and other contributors
+//
+// See doc/AUTHORS.adoc for a complete list of contributors.
+//
+// This program is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License as published by the Free
+// Software Foundation; either version 3 of the License, or (at your option)
+// any later version.
+//
+// This program is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+// more details.
+//
+// You should have received a copy of the GNU General Public License along with
+// this program; if not, write to the Free Software Foundation, Inc., 51
+// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+#pragma once
+
+#include <core/Writer.hpp>
+#include <util/XXH3_128.hpp>
+
+namespace core {
+
+class ChecksummingWriter : public Writer
+{
+public:
+  ChecksummingWriter(core::Writer& writer);
+
+  using core::Writer::write;
+  void write(const void* data, size_t count) override;
+  void finalize() override;
+
+  void set_writer(core::Writer& writer);
+
+  util::XXH3_128::Digest digest() const;
+
+private:
+  core::Writer* m_writer;
+  util::XXH3_128 m_checksum;
+};
+
+inline ChecksummingWriter::ChecksummingWriter(core::Writer& writer)
+  : m_writer(&writer)
+{
+}
+
+inline void
+ChecksummingWriter::write(const void* const data, const size_t count)
+{
+  m_writer->write(data, count);
+  m_checksum.update(data, count);
+}
+
+inline void
+ChecksummingWriter::finalize()
+{
+  m_writer->finalize();
+}
+
+inline void
+ChecksummingWriter::set_writer(core::Writer& writer)
+{
+  m_writer = &writer;
+}
+
+inline util::XXH3_128::Digest
+ChecksummingWriter::digest() const
+{
+  return m_checksum.digest();
+}
+
+} // namespace core
diff --git a/src/core/FileReader.hpp b/src/core/FileReader.hpp
new file mode 100644 (file)
index 0000000..c7c7b8c
--- /dev/null
@@ -0,0 +1,56 @@
+// Copyright (C) 2021 Joel Rosdahl and other contributors
+//
+// See doc/AUTHORS.adoc for a complete list of contributors.
+//
+// This program is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License as published by the Free
+// Software Foundation; either version 3 of the License, or (at your option)
+// any later version.
+//
+// This program is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+// more details.
+//
+// You should have received a copy of the GNU General Public License along with
+// this program; if not, write to the Free Software Foundation, Inc., 51
+// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+#pragma once
+
+#include <core/Reader.hpp>
+#include <core/exceptions.hpp>
+
+#include <cstdio>
+
+namespace core {
+
+class FileReader : public Reader
+{
+public:
+  FileReader(FILE* stream);
+
+  size_t read(void* data, size_t size) override;
+
+private:
+  FILE* m_stream;
+};
+
+inline FileReader::FileReader(FILE* stream) : m_stream(stream)
+{
+}
+
+inline size_t
+FileReader::read(void* const data, const size_t size)
+{
+  if (size == 0) {
+    return 0;
+  }
+  const auto bytes_read = fread(data, 1, size, m_stream);
+  if (bytes_read == 0) {
+    throw core::Error("Failed to read from file stream");
+  }
+  return bytes_read;
+}
+
+} // namespace core
diff --git a/src/core/FileWriter.hpp b/src/core/FileWriter.hpp
new file mode 100644 (file)
index 0000000..f4cdc0b
--- /dev/null
@@ -0,0 +1,58 @@
+// Copyright (C) 2021 Joel Rosdahl and other contributors
+//
+// See doc/AUTHORS.adoc for a complete list of contributors.
+//
+// This program is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License as published by the Free
+// Software Foundation; either version 3 of the License, or (at your option)
+// any later version.
+//
+// This program is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+// more details.
+//
+// You should have received a copy of the GNU General Public License along with
+// this program; if not, write to the Free Software Foundation, Inc., 51
+// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+#pragma once
+
+#include <core/Writer.hpp>
+#include <core/exceptions.hpp>
+
+#include <cstdio>
+
+namespace core {
+
+class FileWriter : public Writer
+{
+public:
+  FileWriter(FILE* stream);
+
+  void write(const void* data, size_t size) override;
+  void finalize() override;
+
+private:
+  FILE* m_stream;
+};
+
+inline FileWriter::FileWriter(FILE* const stream) : m_stream(stream)
+{
+}
+
+inline void
+FileWriter::write(const void* const data, const size_t size)
+{
+  if (size > 0 && fwrite(data, size, 1, m_stream) != 1) {
+    throw core::Error("Failed to write to stream");
+  }
+}
+
+inline void
+FileWriter::finalize()
+{
+  fflush(m_stream);
+}
+
+} // namespace core
diff --git a/src/core/Reader.hpp b/src/core/Reader.hpp
new file mode 100644 (file)
index 0000000..a352b9a
--- /dev/null
@@ -0,0 +1,79 @@
+// Copyright (C) 2021 Joel Rosdahl and other contributors
+//
+// See doc/AUTHORS.adoc for a complete list of contributors.
+//
+// This program is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License as published by the Free
+// Software Foundation; either version 3 of the License, or (at your option)
+// any later version.
+//
+// This program is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+// more details.
+//
+// You should have received a copy of the GNU General Public License along with
+// this program; if not, write to the Free Software Foundation, Inc., 51
+// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+#pragma once
+
+#include <Util.hpp>
+#include <core/exceptions.hpp>
+
+#include <cstddef>
+#include <cstdint>
+#include <string>
+
+namespace core {
+
+class Reader
+{
+public:
+  virtual ~Reader() = default;
+
+  // Read `count` bytes into `data`, returning the actual number of bytes read
+  // if not enough data is available. Throws `core::Error` on failure, e.g. if
+  // no bytes could be read.
+  virtual size_t read(void* data, size_t count) = 0;
+
+  // Read an integer. Throws Error on failure.
+  template<typename T> T read_int();
+
+  // Read an integer into `value`. Throws Error on failure.
+  template<typename T> void read_int(T& value);
+
+  // Read a string of length `length`. Throws `core::Error` on failure.
+  std::string read_str(size_t length);
+};
+
+template<typename T>
+inline T
+Reader::read_int()
+{
+  uint8_t buffer[sizeof(T)];
+  const auto bytes_read = read(buffer, sizeof(T));
+  if (bytes_read != sizeof(T)) {
+    throw core::Error("Read underflow");
+  }
+  T value;
+  Util::big_endian_to_int(buffer, value);
+  return value;
+}
+
+template<typename T>
+inline void
+Reader::read_int(T& value)
+{
+  value = read_int<T>();
+}
+
+inline std::string
+Reader::read_str(const size_t length)
+{
+  std::string value(length, 0);
+  read(&value[0], length);
+  return value;
+}
+
+} // namespace core
diff --git a/src/core/Writer.hpp b/src/core/Writer.hpp
new file mode 100644 (file)
index 0000000..7474a5e
--- /dev/null
@@ -0,0 +1,64 @@
+// Copyright (C) 2021 Joel Rosdahl and other contributors
+//
+// See doc/AUTHORS.adoc for a complete list of contributors.
+//
+// This program is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License as published by the Free
+// Software Foundation; either version 3 of the License, or (at your option)
+// any later version.
+//
+// This program is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+// more details.
+//
+// You should have received a copy of the GNU General Public License along with
+// this program; if not, write to the Free Software Foundation, Inc., 51
+// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+#pragma once
+
+#include <Util.hpp>
+#include <assertions.hpp>
+
+#include <cstddef>
+#include <cstdint>
+#include <string>
+
+namespace core {
+
+class Writer
+{
+public:
+  virtual ~Writer() = default;
+
+  // Write `count` bytes from `data`. Throws `core::Error` on failure.
+  virtual void write(const void* data, size_t count) = 0;
+
+  // Write integer `value`. Throws `core::Error` on failure.
+  template<typename T> void write_int(T value);
+
+  // Write `value`. Throws `core::Error` on failure.
+  void write_str(const std::string& value);
+
+  // Finalize writing, e.g. flush written bytes and potentially check for error
+  // states. Throws `core::Error` on failure.
+  virtual void finalize() = 0;
+};
+
+template<typename T>
+inline void
+Writer::write_int(const T value)
+{
+  uint8_t buffer[sizeof(T)];
+  Util::int_to_big_endian(value, buffer);
+  write(buffer, sizeof(T));
+}
+
+inline void
+Writer::write_str(const std::string& value)
+{
+  write(value.data(), value.length());
+}
+
+} // namespace core
index 35aa2cc86fe39653fa5a32f4bc7c4c32e2fc998f..7bb3c0bc50c78abcbcb35207f7dceaadc2343eec 100644 (file)
@@ -34,7 +34,7 @@
 #include <storage/Storage.hpp>
 #include <storage/primary/PrimaryStorage.hpp>
 #include <util/TextTable.hpp>
-#include <util/XXH3_64.hpp>
+#include <util/XXH3_128.hpp>
 #include <util/expected.hpp>
 #include <util/string.hpp>
 
@@ -126,7 +126,7 @@ Options for secondary storage:
                                --trim-dir; default: atime
 
 Options for scripting or debugging:
-        --checksum-file PATH   print the checksum (64 bit XXH3) of the file at
+        --checksum-file PATH   print the checksum (128 bit XXH3) of the file at
                                PATH
         --dump-manifest PATH   dump manifest file at PATH in text format
         --dump-result PATH     dump result file at PATH in text format
@@ -369,12 +369,13 @@ process_main_options(int argc, const char* const* argv)
       break;
 
     case CHECKSUM_FILE: {
-      util::XXH3_64 checksum;
+      util::XXH3_128 checksum;
       Fd fd(arg == "-" ? STDIN_FILENO : open(arg.c_str(), O_RDONLY));
       Util::read_fd(*fd, [&checksum](const void* data, size_t size) {
         checksum.update(data, size);
       });
-      PRINT(stdout, "{:016x}\n", checksum.digest());
+      const auto digest = checksum.digest();
+      PRINT(stdout, "{}\n", Util::format_base16(digest.bytes(), digest.size()));
       break;
     }
 
index d4fbf6659941309158e00e48d08e008eb52261e4..4ee52fa398db9418c859b861a0b503f865d001be 100644 (file)
 
 #pragma once
 
+#include <cstdint>
+
 namespace core {
 
-enum class CacheEntryType { result, manifest };
+enum class CacheEntryType : uint8_t { result = 0, manifest = 1 };
 
 } // namespace core
index 50f697bafd08ecbd08dea3998234bd9d136e6b9e..497bfcb9c343e0dfcaaaa935f34298525684888f 100644 (file)
@@ -392,10 +392,10 @@ get_shard_url(const Digest& key,
   double highest_score = -1.0;
   std::string best_shard;
   for (const auto& shard_config : shards) {
-    util::XXH3_64 checksum;
-    checksum.update(key.bytes(), key.size());
-    checksum.update(shard_config.name.data(), shard_config.name.length());
-    const double score = to_half_open_unit_interval(checksum.digest());
+    util::XXH3_64 hash;
+    hash.update(key.bytes(), key.size());
+    hash.update(shard_config.name.data(), shard_config.name.length());
+    const double score = to_half_open_unit_interval(hash.digest());
     ASSERT(score >= 0.0 && score < 1.0);
     const double weighted_score =
       score == 0.0 ? 0.0 : shard_config.weight / -std::log(score);
index 892a27b21e141b3ea1ae5c8ff9f2bcbb20174a49..6b40567ec43bac53bcf3956dcd10a6d68d7666c0 100644 (file)
@@ -19,8 +19,6 @@
 #include "PrimaryStorage.hpp"
 
 #include <AtomicFile.hpp>
-#include <CacheEntryReader.hpp>
-#include <CacheEntryWriter.hpp>
 #include <Context.hpp>
 #include <File.hpp>
 #include <Logging.hpp>
 #include <ThreadPool.hpp>
 #include <assertions.hpp>
 #include <compression/ZstdCompressor.hpp>
+#include <core/CacheEntryReader.hpp>
+#include <core/CacheEntryWriter.hpp>
+#include <core/FileReader.hpp>
+#include <core/FileWriter.hpp>
 #include <core/exceptions.hpp>
 #include <core/wincompat.hpp>
 #include <fmtmacros.hpp>
@@ -124,41 +126,20 @@ open_file(const std::string& path, const char* const mode)
   return f;
 }
 
-static std::unique_ptr<CacheEntryReader>
-create_reader(const CacheFile& cache_file, FILE* const stream)
+static std::unique_ptr<core::CacheEntryReader>
+create_reader(const CacheFile& cache_file, core::Reader& reader)
 {
   if (cache_file.type() == CacheFile::Type::unknown) {
     throw core::Error("unknown file type for {}", cache_file.path());
   }
 
-  switch (cache_file.type()) {
-  case CacheFile::Type::result:
-    return std::make_unique<CacheEntryReader>(
-      stream, Result::k_magic, Result::k_version);
-
-  case CacheFile::Type::manifest:
-    return std::make_unique<CacheEntryReader>(
-      stream, Manifest::k_magic, Manifest::k_version);
-
-  case CacheFile::Type::unknown:
-    ASSERT(false); // Handled at function entry.
-  }
-
-  ASSERT(false);
+  return std::make_unique<core::CacheEntryReader>(reader);
 }
 
-static std::unique_ptr<CacheEntryWriter>
-create_writer(FILE* const stream,
-              const CacheEntryReader& reader,
-              const compression::Type compression_type,
-              const int8_t compression_level)
+static std::unique_ptr<core::CacheEntryWriter>
+create_writer(core::Writer& writer, const core::CacheEntryHeader& header)
 {
-  return std::make_unique<CacheEntryWriter>(stream,
-                                            reader.magic(),
-                                            reader.version(),
-                                            compression_type,
-                                            compression_level,
-                                            reader.payload_size());
+  return std::make_unique<core::CacheEntryWriter>(writer, header);
 }
 
 static void
@@ -168,17 +149,18 @@ recompress_file(RecompressionStatistics& statistics,
                 const nonstd::optional<int8_t> level)
 {
   auto file = open_file(cache_file.path(), "rb");
-  auto reader = create_reader(cache_file, file.get());
+  core::FileReader file_reader(file.get());
+  auto reader = create_reader(cache_file, file_reader);
 
   const auto old_stat = Stat::stat(cache_file.path(), Stat::OnError::log);
-  const uint64_t content_size = reader->content_size();
+  const uint64_t content_size = reader->header().entry_size;
   const int8_t wanted_level =
     level
       ? (*level == 0 ? compression::ZstdCompressor::default_compression_level
                      : *level)
       : 0;
 
-  if (reader->compression_level() == wanted_level) {
+  if (reader->header().compression_level == wanted_level) {
     statistics.update(content_size, old_stat.size(), old_stat.size(), 0);
     return;
   }
@@ -187,14 +169,15 @@ recompress_file(RecompressionStatistics& statistics,
       cache_file.path(),
       level ? FMT("level {}", wanted_level) : "uncompressed");
   AtomicFile atomic_new_file(cache_file.path(), AtomicFile::Mode::binary);
-  auto writer =
-    create_writer(atomic_new_file.stream(),
-                  *reader,
-                  level ? compression::Type::zstd : compression::Type::none,
-                  wanted_level);
+  core::FileWriter file_writer(atomic_new_file.stream());
+  auto header = reader->header();
+  header.compression_type =
+    level ? compression::Type::zstd : compression::Type::none;
+  header.compression_level = wanted_level;
+  auto writer = create_writer(file_writer, header);
 
   char buffer[CCACHE_READ_BUFFER_SIZE];
-  size_t bytes_left = reader->payload_size();
+  size_t bytes_left = reader->header().payload_size();
   while (bytes_left > 0) {
     size_t bytes_to_read = std::min(bytes_left, sizeof(buffer));
     reader->read(buffer, bytes_to_read);
@@ -237,9 +220,10 @@ PrimaryStorage::get_compression_statistics(
 
         try {
           auto file = open_file(cache_file.path(), "rb");
-          auto reader = create_reader(cache_file, file.get());
+          core::FileReader file_reader(file.get());
+          auto reader = create_reader(cache_file, file_reader);
           cs.compr_size += cache_file.lstat().size();
-          cs.content_size += reader->content_size();
+          cs.content_size += reader->header().entry_size;
         } catch (core::Error&) {
           cs.incompr_size += cache_file.lstat().size();
         }
index 6f87596986514d4fef6e9cd00b5a8b912dd3c578..d733fa9cdb9a2d72d18fba12a67e70c1e7fa4605 100644 (file)
@@ -22,6 +22,8 @@
 #include <compression/Compressor.hpp>
 #include <compression/Decompressor.hpp>
 #include <compression/types.hpp>
+#include <core/FileReader.hpp>
+#include <core/FileWriter.hpp>
 
 #include "third_party/doctest.h"
 
@@ -38,15 +40,17 @@ TEST_CASE("compression::Type::none roundtrip")
   TestContext test_context;
 
   File f("data.uncompressed", "w");
+  core::FileWriter fw(f.get());
   auto compressor =
-    Compressor::create_from_type(compression::Type::none, f.get(), 1);
+    Compressor::create_from_type(compression::Type::none, fw, 1);
   CHECK(compressor->actual_compression_level() == 0);
   compressor->write("foobar", 6);
   compressor->finalize();
 
   f.open("data.uncompressed", "r");
+  core::FileReader fr(f.get());
   auto decompressor =
-    Decompressor::create_from_type(compression::Type::none, f.get());
+    Decompressor::create_from_type(compression::Type::none, fr);
 
   char buffer[4];
   decompressor->read(buffer, 4);
@@ -56,7 +60,7 @@ TEST_CASE("compression::Type::none roundtrip")
   {
     // Not reached the end.
     CHECK_THROWS_WITH(decompressor->finalize(),
-                      "garbage data at end of uncompressed stream");
+                      "Garbage data at end of uncompressed stream");
   }
 
   SUBCASE("Read to end")
@@ -69,7 +73,7 @@ TEST_CASE("compression::Type::none roundtrip")
 
     // Nothing left to read.
     CHECK_THROWS_WITH(decompressor->read(buffer, 1),
-                      "failed to read from uncompressed stream");
+                      "Failed to read from file stream");
   }
 }
 
index fd06dc4e8425d4189d06055923001bf711e4f0d5..59c169f02d63e93648b3dd1f9873b7a7e9abbe07 100644 (file)
@@ -22,6 +22,8 @@
 #include <compression/Compressor.hpp>
 #include <compression/Decompressor.hpp>
 #include <compression/types.hpp>
+#include <core/FileReader.hpp>
+#include <core/FileWriter.hpp>
 
 #include "third_party/doctest.h"
 
@@ -38,15 +40,17 @@ TEST_CASE("Small compression::Type::zstd roundtrip")
   TestContext test_context;
 
   File f("data.zstd", "wb");
+  core::FileWriter fw(f.get());
   auto compressor =
-    Compressor::create_from_type(compression::Type::zstd, f.get(), 1);
+    Compressor::create_from_type(compression::Type::zstd, fw, 1);
   CHECK(compressor->actual_compression_level() == 1);
   compressor->write("foobar", 6);
   compressor->finalize();
 
   f.open("data.zstd", "rb");
+  core::FileReader fr(f.get());
   auto decompressor =
-    Decompressor::create_from_type(compression::Type::zstd, f.get());
+    Decompressor::create_from_type(compression::Type::zstd, fr);
 
   char buffer[4];
   decompressor->read(buffer, 4);
@@ -54,7 +58,7 @@ TEST_CASE("Small compression::Type::zstd roundtrip")
 
   // Not reached the end.
   CHECK_THROWS_WITH(decompressor->finalize(),
-                    "garbage data at end of zstd input stream");
+                    "Garbage data at end of zstd input stream");
 
   decompressor->read(buffer, 2);
   CHECK(memcmp(buffer, "ar", 2) == 0);
@@ -64,7 +68,7 @@ TEST_CASE("Small compression::Type::zstd roundtrip")
 
   // Nothing left to read.
   CHECK_THROWS_WITH(decompressor->read(buffer, 1),
-                    "failed to read from zstd input stream");
+                    "Failed to read from file stream");
 }
 
 TEST_CASE("Large compressible compression::Type::zstd roundtrip")
@@ -74,16 +78,18 @@ TEST_CASE("Large compressible compression::Type::zstd roundtrip")
   char data[] = "The quick brown fox jumps over the lazy dog";
 
   File f("data.zstd", "wb");
+  core::FileWriter fw(f.get());
   auto compressor =
-    Compressor::create_from_type(compression::Type::zstd, f.get(), 1);
+    Compressor::create_from_type(compression::Type::zstd, fw, 1);
   for (size_t i = 0; i < 1000; i++) {
     compressor->write(data, sizeof(data));
   }
   compressor->finalize();
 
   f.open("data.zstd", "rb");
+  core::FileReader fr(f.get());
   auto decompressor =
-    Decompressor::create_from_type(compression::Type::zstd, f.get());
+    Decompressor::create_from_type(compression::Type::zstd, fr);
 
   char buffer[sizeof(data)];
   for (size_t i = 0; i < 1000; i++) {
@@ -96,7 +102,7 @@ TEST_CASE("Large compressible compression::Type::zstd roundtrip")
 
   // Nothing left to read.
   CHECK_THROWS_WITH(decompressor->read(buffer, 1),
-                    "failed to read from zstd input stream");
+                    "Failed to read from file stream");
 }
 
 TEST_CASE("Large uncompressible compression::Type::zstd roundtrip")
@@ -109,14 +115,16 @@ TEST_CASE("Large uncompressible compression::Type::zstd roundtrip")
   }
 
   File f("data.zstd", "wb");
+  core::FileWriter fw(f.get());
   auto compressor =
-    Compressor::create_from_type(compression::Type::zstd, f.get(), 1);
+    Compressor::create_from_type(compression::Type::zstd, fw, 1);
   compressor->write(data, sizeof(data));
   compressor->finalize();
 
   f.open("data.zstd", "rb");
+  core::FileReader fr(f.get());
   auto decompressor =
-    Decompressor::create_from_type(compression::Type::zstd, f.get());
+    Decompressor::create_from_type(compression::Type::zstd, fr);
 
   char buffer[sizeof(data)];
   decompressor->read(buffer, sizeof(buffer));