From: Joel Rosdahl Date: Wed, 14 Sep 2022 19:19:14 +0000 (+0200) Subject: feat: Merge local manifest with fetched remote manifest X-Git-Tag: v4.7~51 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=f48b80c3f2e9153a98b876ad6e37c75d603d1918;p=thirdparty%2Fccache.git feat: Merge local manifest with fetched remote manifest With read-only secondary storage, it can happen that primary storage has a manifest named M with a result entry R1, while secondary storage also has a manifest M but with result R2. On a compilation that matches R2, ccache will first succeed to look up M in primary storage, fail to find R2 and then get M from secondary storage where R2 can be found. Since M already exists locally, ccache will simply return the cache hit but not store knowledge of R2 locally. On a rebuild of R2, ccache therefore needs to fetch from secondary storage again. The improvement brought by this commit is that ccache now merges the manifests from primary and secondary storage and stores the merged version in primary storage. In other words, ccache setups with read-only secondary storage will be able to accumulate local header file combinations and seamlessly combine them with changes from secondary storage. Closes #1049. --- diff --git a/src/ccache.cpp b/src/ccache.cpp index 8edb551ce..7bd3e4f3d 100644 --- a/src/ccache.cpp +++ b/src/ccache.cpp @@ -1730,10 +1730,12 @@ hash_direct_mode_specific_data(Context& ctx, manifest_key = hash.digest(); MTR_BEGIN("manifest", "manifest_get"); + size_t read_manifests = 0; ctx.storage.get( *manifest_key, core::CacheEntryType::manifest, [&](util::Bytes&& value) { try { read_manifest(ctx, value); + ++read_manifests; result_key = ctx.manifest.look_up_result_digest(ctx); } catch (const core::Error& e) { LOG("Failed to look up result key in manifest: {}", e.what()); @@ -1747,6 +1749,14 @@ hash_direct_mode_specific_data(Context& ctx, } }); MTR_END("manifest", "manifest_get"); + if (read_manifests > 1) { + MTR_SCOPE("manifest", "merge"); + LOG("Storing merged manifest {} locally", manifest_key->to_string()); + core::CacheEntry::Header header(ctx.config, core::CacheEntryType::manifest); + ctx.storage.primary.put(*manifest_key, + core::CacheEntryType::manifest, + core::CacheEntry::serialize(header, ctx.manifest)); + } return {}; } diff --git a/src/core/Manifest.cpp b/src/core/Manifest.cpp index 060fd930e..7922be7c9 100644 --- a/src/core/Manifest.cpp +++ b/src/core/Manifest.cpp @@ -81,7 +81,9 @@ const uint8_t Manifest::k_format_version = 0; void Manifest::read(nonstd::span data) { - clear(); + std::vector files; + std::vector file_infos; + std::vector results; core::CacheEntryDataReader reader(data); @@ -94,13 +96,13 @@ Manifest::read(nonstd::span data) const auto file_count = reader.read_int(); for (uint32_t i = 0; i < file_count; ++i) { - m_files.emplace_back(reader.read_str(reader.read_int())); + files.emplace_back(reader.read_str(reader.read_int())); } const auto file_info_count = reader.read_int(); for (uint32_t i = 0; i < file_info_count; ++i) { - m_file_infos.emplace_back(); - auto& entry = m_file_infos.back(); + file_infos.emplace_back(); + auto& entry = file_infos.back(); reader.read_int(entry.index); reader.read_and_copy_bytes({entry.digest.bytes(), Digest::size()}); @@ -111,8 +113,8 @@ Manifest::read(nonstd::span data) const auto result_count = reader.read_int(); for (uint32_t i = 0; i < result_count; ++i) { - m_results.emplace_back(); - auto& entry = m_results.back(); + results.emplace_back(); + auto& entry = results.back(); const auto file_info_index_count = reader.read_int(); for (uint32_t j = 0; j < file_info_index_count; ++j) { @@ -120,6 +122,27 @@ Manifest::read(nonstd::span data) } reader.read_and_copy_bytes({entry.key.bytes(), Digest::size()}); } + + if (m_results.empty()) { + m_files = std::move(files); + m_file_infos = std::move(file_infos); + m_results = std::move(results); + } else { + for (const auto& result : results) { + std::unordered_map included_files; + std::unordered_map included_files_stats; + for (auto file_info_index : result.file_info_indexes) { + const auto& file_info = file_infos[file_info_index]; + included_files.emplace(files[file_info.index], file_info.digest); + included_files_stats.emplace( + files[file_info.index], + FileStats{file_info.fsize, file_info.mtime, file_info.ctime}); + } + add_result(result.key, included_files, [&](const std::string& path) { + return included_files_stats[path]; + }); + } + } } std::optional diff --git a/src/storage/primary/PrimaryStorage.cpp b/src/storage/primary/PrimaryStorage.cpp index c9e00ee80..32c7f581e 100644 --- a/src/storage/primary/PrimaryStorage.cpp +++ b/src/storage/primary/PrimaryStorage.cpp @@ -211,7 +211,8 @@ PrimaryStorage::put(const Digest& key, const auto cache_file = look_up_cache_file(key, type); if (only_if_missing && cache_file.stat) { - LOG("Not storing {} since it already exists", cache_file.path); + LOG("Not storing {} in primary storage since it already exists", + cache_file.path); return; } diff --git a/test/suites/secondary_file.bash b/test/suites/secondary_file.bash index 9d958c99c..b67bcf66f 100644 --- a/test/suites/secondary_file.bash +++ b/test/suites/secondary_file.bash @@ -292,4 +292,131 @@ SUITE_secondary_file() { expect_stat secondary_storage_miss 0 expect_file_count 2 '*' secondary # CACHEDIR.TAG + manifest, not result fi + + # ------------------------------------------------------------------------- + TEST "Manifest handling" + + echo 'int x;' >test.h + backdate test.h + echo '#include "test.h"' >test.c + + $CCACHE_COMPILE -c test.c + expect_stat direct_cache_hit 0 + expect_stat cache_miss 1 + expect_stat primary_storage_hit 0 + expect_stat primary_storage_miss 2 # miss: manifest + result + expect_stat secondary_storage_hit 0 + expect_stat secondary_storage_miss 2 # miss: manifest + result + + # Both primary and secondary now have an "int x;" key in the manifest. + + echo 'int y;' >test.h + backdate test.h + + $CCACHE_COMPILE -c test.c + expect_stat direct_cache_hit 0 + expect_stat cache_miss 2 + expect_stat primary_storage_hit 1 # hit: manifest without key + expect_stat primary_storage_miss 3 # miss: result + expect_stat secondary_storage_hit 1 # his: manifest without key + expect_stat secondary_storage_miss 3 # miss: result + + # Both primary and secondary now have "int x;" and "int y;" keys in the manifest. + + $CCACHE -C >/dev/null + + # Now only secondary has "int x;" and "int y;" keys in the manifest. We + # should now be able to get secondary hit without involving primary. + + echo 'int x;' >test.h + backdate test.h + + $CCACHE_COMPILE -c test.c + expect_stat direct_cache_hit 1 + expect_stat cache_miss 2 + expect_stat primary_storage_hit 1 + expect_stat primary_storage_miss 5 # miss: manifest + result + expect_stat secondary_storage_hit 3 # hit: manifest + result + expect_stat secondary_storage_miss 3 + + # Should be able to get secondary hit without involving primary. + + echo 'int y;' >test.h + backdate test.h + + $CCACHE_COMPILE -c test.c + expect_stat direct_cache_hit 2 + expect_stat cache_miss 2 + expect_stat primary_storage_hit 2 # hit: manifest with key (downloaded from previous step) + expect_stat primary_storage_miss 6 # miss: result + expect_stat secondary_storage_hit 4 # hit: result + expect_stat secondary_storage_miss 3 + + # ------------------------------------------------------------------------- + TEST "Manifest merging" + + echo 'int x;' >test.h + backdate test.h + echo '#include "test.h"' >test.c + + $CCACHE_COMPILE -c test.c + expect_stat direct_cache_hit 0 + expect_stat cache_miss 1 + expect_stat primary_storage_hit 0 + expect_stat primary_storage_miss 2 # miss: manifest + result + expect_stat secondary_storage_hit 0 + expect_stat secondary_storage_miss 2 # miss: manifest + result + + $CCACHE -C >/dev/null + + # Now secondary has an "int x;" key in the manifest and primary has none. + + echo 'int y;' >test.h + backdate test.h + + CCACHE_SECONDARY_STORAGE= $CCACHE_COMPILE -c test.c + expect_stat direct_cache_hit 0 + expect_stat cache_miss 2 + expect_stat primary_storage_hit 0 + expect_stat primary_storage_miss 4 # miss: manifest + result + expect_stat secondary_storage_hit 0 + expect_stat secondary_storage_miss 2 + + # Now primary has "int y;" while secondary still has "int x;". + + echo 'int x;' >test.h + backdate test.h + + $CCACHE_COMPILE -c test.c + expect_stat direct_cache_hit 1 + expect_stat cache_miss 2 + expect_stat primary_storage_hit 1 # hit: manifest without key + expect_stat primary_storage_miss 5 # miss: result + expect_stat secondary_storage_hit 2 # hit: manifest + result + expect_stat secondary_storage_miss 2 + + # Primary's manifest with "int y;" was merged with secondary's "int x;" + # above, so we should now be able to get "int x;" and "int y;" hits locally. + + echo 'int y;' >test.h + backdate test.h + + CCACHE_SECONDARY_STORAGE= $CCACHE_COMPILE -c test.c + expect_stat direct_cache_hit 2 + expect_stat cache_miss 2 + expect_stat primary_storage_hit 3 # hit: manifest + result + expect_stat primary_storage_miss 5 + expect_stat secondary_storage_hit 2 + expect_stat secondary_storage_miss 2 + + echo 'int x;' >test.h + backdate test.h + + CCACHE_SECONDARY_STORAGE= $CCACHE_COMPILE -c test.c + expect_stat direct_cache_hit 3 + expect_stat cache_miss 2 + expect_stat primary_storage_hit 5 # hit: manifest + result + expect_stat primary_storage_miss 5 + expect_stat secondary_storage_hit 2 + expect_stat secondary_storage_miss 2 }