]> git.ipfire.org Git - thirdparty/ccache.git/commitdiff
feat: Merge local manifest with fetched remote manifest
authorJoel Rosdahl <joel@rosdahl.net>
Wed, 14 Sep 2022 19:19:14 +0000 (21:19 +0200)
committerJoel Rosdahl <joel@rosdahl.net>
Wed, 21 Sep 2022 15:06:31 +0000 (17:06 +0200)
With read-only secondary storage, it can happen that primary storage has
a manifest named M with a result entry R1, while secondary storage also
has a manifest M but with result R2. On a compilation that matches R2,
ccache will first succeed to look up M in primary storage, fail to find
R2 and then get M from secondary storage where R2 can be found. Since M
already exists locally, ccache will simply return the cache hit but not
store knowledge of R2 locally. On a rebuild of R2, ccache therefore
needs to fetch from secondary storage again.

The improvement brought by this commit is that ccache now merges the
manifests from primary and secondary storage and stores the merged
version in primary storage. In other words, ccache setups with read-only
secondary storage will be able to accumulate local header file
combinations and seamlessly combine them with changes from secondary
storage.

Closes #1049.

src/ccache.cpp
src/core/Manifest.cpp
src/storage/primary/PrimaryStorage.cpp
test/suites/secondary_file.bash

index 8edb551ce08a8d41da5ce0cf8dae449e903e1a5e..7bd3e4f3d72094df6680371d50a89e81c8ca40e3 100644 (file)
@@ -1730,10 +1730,12 @@ hash_direct_mode_specific_data(Context& ctx,
   manifest_key = hash.digest();
 
   MTR_BEGIN("manifest", "manifest_get");
+  size_t read_manifests = 0;
   ctx.storage.get(
     *manifest_key, core::CacheEntryType::manifest, [&](util::Bytes&& value) {
       try {
         read_manifest(ctx, value);
+        ++read_manifests;
         result_key = ctx.manifest.look_up_result_digest(ctx);
       } catch (const core::Error& e) {
         LOG("Failed to look up result key in manifest: {}", e.what());
@@ -1747,6 +1749,14 @@ hash_direct_mode_specific_data(Context& ctx,
       }
     });
   MTR_END("manifest", "manifest_get");
+  if (read_manifests > 1) {
+    MTR_SCOPE("manifest", "merge");
+    LOG("Storing merged manifest {} locally", manifest_key->to_string());
+    core::CacheEntry::Header header(ctx.config, core::CacheEntryType::manifest);
+    ctx.storage.primary.put(*manifest_key,
+                            core::CacheEntryType::manifest,
+                            core::CacheEntry::serialize(header, ctx.manifest));
+  }
 
   return {};
 }
index 060fd930eda39179eaf127e99663de0c72a6b373..7922be7c94598ffda823b669a1f89bf924d6ce7e 100644 (file)
@@ -81,7 +81,9 @@ const uint8_t Manifest::k_format_version = 0;
 void
 Manifest::read(nonstd::span<const uint8_t> data)
 {
-  clear();
+  std::vector<std::string> files;
+  std::vector<FileInfo> file_infos;
+  std::vector<ResultEntry> results;
 
   core::CacheEntryDataReader reader(data);
 
@@ -94,13 +96,13 @@ Manifest::read(nonstd::span<const uint8_t> data)
 
   const auto file_count = reader.read_int<uint32_t>();
   for (uint32_t i = 0; i < file_count; ++i) {
-    m_files.emplace_back(reader.read_str(reader.read_int<uint16_t>()));
+    files.emplace_back(reader.read_str(reader.read_int<uint16_t>()));
   }
 
   const auto file_info_count = reader.read_int<uint32_t>();
   for (uint32_t i = 0; i < file_info_count; ++i) {
-    m_file_infos.emplace_back();
-    auto& entry = m_file_infos.back();
+    file_infos.emplace_back();
+    auto& entry = file_infos.back();
 
     reader.read_int(entry.index);
     reader.read_and_copy_bytes({entry.digest.bytes(), Digest::size()});
@@ -111,8 +113,8 @@ Manifest::read(nonstd::span<const uint8_t> data)
 
   const auto result_count = reader.read_int<uint32_t>();
   for (uint32_t i = 0; i < result_count; ++i) {
-    m_results.emplace_back();
-    auto& entry = m_results.back();
+    results.emplace_back();
+    auto& entry = results.back();
 
     const auto file_info_index_count = reader.read_int<uint32_t>();
     for (uint32_t j = 0; j < file_info_index_count; ++j) {
@@ -120,6 +122,27 @@ Manifest::read(nonstd::span<const uint8_t> data)
     }
     reader.read_and_copy_bytes({entry.key.bytes(), Digest::size()});
   }
+
+  if (m_results.empty()) {
+    m_files = std::move(files);
+    m_file_infos = std::move(file_infos);
+    m_results = std::move(results);
+  } else {
+    for (const auto& result : results) {
+      std::unordered_map<std::string, Digest> included_files;
+      std::unordered_map<std::string, FileStats> included_files_stats;
+      for (auto file_info_index : result.file_info_indexes) {
+        const auto& file_info = file_infos[file_info_index];
+        included_files.emplace(files[file_info.index], file_info.digest);
+        included_files_stats.emplace(
+          files[file_info.index],
+          FileStats{file_info.fsize, file_info.mtime, file_info.ctime});
+      }
+      add_result(result.key, included_files, [&](const std::string& path) {
+        return included_files_stats[path];
+      });
+    }
+  }
 }
 
 std::optional<Digest>
index c9e00ee80177ed920f77d8f8f0b257f2e724f01c..32c7f581e81483d776bfa6b04ee3f70fd053b412 100644 (file)
@@ -211,7 +211,8 @@ PrimaryStorage::put(const Digest& key,
 
   const auto cache_file = look_up_cache_file(key, type);
   if (only_if_missing && cache_file.stat) {
-    LOG("Not storing {} since it already exists", cache_file.path);
+    LOG("Not storing {} in primary storage since it already exists",
+        cache_file.path);
     return;
   }
 
index 9d958c99c5bd462f5f651176ff4cced0c666d8fa..b67bcf66feb5896d145aa183641a22c0d2a1b2d9 100644 (file)
@@ -292,4 +292,131 @@ SUITE_secondary_file() {
         expect_stat secondary_storage_miss 0
         expect_file_count 2 '*' secondary # CACHEDIR.TAG + manifest, not result
     fi
+
+    # -------------------------------------------------------------------------
+    TEST "Manifest handling"
+
+    echo 'int x;' >test.h
+    backdate test.h
+    echo '#include "test.h"' >test.c
+
+    $CCACHE_COMPILE -c test.c
+    expect_stat direct_cache_hit 0
+    expect_stat cache_miss 1
+    expect_stat primary_storage_hit 0
+    expect_stat primary_storage_miss 2 # miss: manifest + result
+    expect_stat secondary_storage_hit 0
+    expect_stat secondary_storage_miss 2 # miss: manifest + result
+
+    # Both primary and secondary now have an "int x;" key in the manifest.
+
+    echo 'int y;' >test.h
+    backdate test.h
+
+    $CCACHE_COMPILE -c test.c
+    expect_stat direct_cache_hit 0
+    expect_stat cache_miss 2
+    expect_stat primary_storage_hit 1 # hit: manifest without key
+    expect_stat primary_storage_miss 3 # miss: result
+    expect_stat secondary_storage_hit 1 # his: manifest without key
+    expect_stat secondary_storage_miss 3 # miss: result
+
+    # Both primary and secondary now have "int x;" and "int y;" keys in the manifest.
+
+    $CCACHE -C >/dev/null
+
+    # Now only secondary has "int x;" and "int y;" keys in the manifest. We
+    # should now be able to get secondary hit without involving primary.
+
+    echo 'int x;' >test.h
+    backdate test.h
+
+    $CCACHE_COMPILE -c test.c
+    expect_stat direct_cache_hit 1
+    expect_stat cache_miss 2
+    expect_stat primary_storage_hit 1
+    expect_stat primary_storage_miss 5 # miss: manifest + result
+    expect_stat secondary_storage_hit 3 # hit: manifest + result
+    expect_stat secondary_storage_miss 3
+
+    # Should be able to get secondary hit without involving primary.
+
+    echo 'int y;' >test.h
+    backdate test.h
+
+    $CCACHE_COMPILE -c test.c
+    expect_stat direct_cache_hit 2
+    expect_stat cache_miss 2
+    expect_stat primary_storage_hit 2 # hit: manifest with key (downloaded from previous step)
+    expect_stat primary_storage_miss 6 # miss: result
+    expect_stat secondary_storage_hit 4 # hit: result
+    expect_stat secondary_storage_miss 3
+
+    # -------------------------------------------------------------------------
+    TEST "Manifest merging"
+
+    echo 'int x;' >test.h
+    backdate test.h
+    echo '#include "test.h"' >test.c
+
+    $CCACHE_COMPILE -c test.c
+    expect_stat direct_cache_hit 0
+    expect_stat cache_miss 1
+    expect_stat primary_storage_hit 0
+    expect_stat primary_storage_miss 2 # miss: manifest + result
+    expect_stat secondary_storage_hit 0
+    expect_stat secondary_storage_miss 2 # miss: manifest + result
+
+    $CCACHE -C >/dev/null
+
+    # Now secondary has an "int x;" key in the manifest and primary has none.
+
+    echo 'int y;' >test.h
+    backdate test.h
+
+    CCACHE_SECONDARY_STORAGE= $CCACHE_COMPILE -c test.c
+    expect_stat direct_cache_hit 0
+    expect_stat cache_miss 2
+    expect_stat primary_storage_hit 0
+    expect_stat primary_storage_miss 4 # miss: manifest + result
+    expect_stat secondary_storage_hit 0
+    expect_stat secondary_storage_miss 2
+
+    # Now primary has "int y;" while secondary still has "int x;".
+
+    echo 'int x;' >test.h
+    backdate test.h
+
+    $CCACHE_COMPILE -c test.c
+    expect_stat direct_cache_hit 1
+    expect_stat cache_miss 2
+    expect_stat primary_storage_hit 1 # hit: manifest without key
+    expect_stat primary_storage_miss 5 # miss: result
+    expect_stat secondary_storage_hit 2 # hit: manifest + result
+    expect_stat secondary_storage_miss 2
+
+    # Primary's manifest with "int y;" was merged with secondary's "int x;"
+    # above, so we should now be able to get "int x;" and "int y;" hits locally.
+
+    echo 'int y;' >test.h
+    backdate test.h
+
+    CCACHE_SECONDARY_STORAGE= $CCACHE_COMPILE -c test.c
+    expect_stat direct_cache_hit 2
+    expect_stat cache_miss 2
+    expect_stat primary_storage_hit 3 # hit: manifest + result
+    expect_stat primary_storage_miss 5
+    expect_stat secondary_storage_hit 2
+    expect_stat secondary_storage_miss 2
+
+    echo 'int x;' >test.h
+    backdate test.h
+
+    CCACHE_SECONDARY_STORAGE= $CCACHE_COMPILE -c test.c
+    expect_stat direct_cache_hit 3
+    expect_stat cache_miss 2
+    expect_stat primary_storage_hit 5 # hit: manifest + result
+    expect_stat primary_storage_miss 5
+    expect_stat secondary_storage_hit 2
+    expect_stat secondary_storage_miss 2
 }