From: Joel Rosdahl Date: Mon, 12 Sep 2022 12:06:45 +0000 (+0200) Subject: refactor(storage): Pass cache entries via memory instead of files X-Git-Tag: v4.7~55 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=d663cb51088f28b921262a54aea6e1f0d809182f;p=thirdparty%2Fccache.git refactor(storage): Pass cache entries via memory instead of files --- diff --git a/src/Context.hpp b/src/Context.hpp index 80aa64187..a2e1ec0a7 100644 --- a/src/Context.hpp +++ b/src/Context.hpp @@ -30,6 +30,7 @@ # include "InodeCache.hpp" #endif +#include #include #include @@ -92,6 +93,9 @@ public: // Storage (fronting primary and secondary storage backends). storage::Storage storage; + // Direct mode manifest. + core::Manifest manifest; + #ifdef INODE_CACHE_SUPPORTED // InodeCache that caches source file hashes when enabled. mutable InodeCache inode_cache; diff --git a/src/ccache.cpp b/src/ccache.cpp index 4a10147e3..8197c1c97 100644 --- a/src/ccache.cpp +++ b/src/ccache.cpp @@ -752,39 +752,22 @@ do_execute(Context& ctx, Args& args, const bool capture_stdout = true) return DoExecuteResult{status, stdout_data, *stderr_data_result}; } -static core::Manifest -read_manifest(const std::string& path) +static void +read_manifest(Context& ctx, nonstd::span cache_entry_data) { - core::Manifest manifest; try { - const auto cache_entry_data = - util::value_or_throw(util::read_file(path)); core::CacheEntry cache_entry(cache_entry_data); cache_entry.verify_checksum(); - manifest.read(cache_entry.payload()); + ctx.manifest.read(cache_entry.payload()); } catch (const core::Error& e) { - LOG("Error reading {}: {}", path, e.what()); + LOG("Error reading manifest: {}", e.what()); } - return manifest; } static void -save_manifest(const Config& config, - core::Manifest& manifest, - const std::string& path) -{ - core::CacheEntry::Header header(config, core::CacheEntryType::manifest); - const auto cache_entry_data = core::CacheEntry::serialize(header, manifest); - AtomicFile atomic_manifest_file(path, AtomicFile::Mode::binary); - atomic_manifest_file.write(cache_entry_data); - atomic_manifest_file.commit(); -} - -// Create or update the manifest file. -static void -update_manifest_file(Context& ctx, - const Digest& manifest_key, - const Digest& result_key) +update_manifest(Context& ctx, + const Digest& manifest_key, + const Digest& result_key) { if (ctx.config.read_only() || ctx.config.read_only_direct()) { return; @@ -800,24 +783,17 @@ update_manifest_file(Context& ctx, (ctx.config.sloppiness().is_enabled(core::Sloppy::file_stat_matches)) || ctx.args_info.output_is_precompiled_header; - ctx.storage.put( - manifest_key, core::CacheEntryType::manifest, [&](const auto& path) { - LOG("Adding result key to {}", path); - try { - auto manifest = read_manifest(path); - const bool added = manifest.add_result(result_key, - ctx.included_files, - ctx.time_of_compilation, - save_timestamp); - if (added) { - save_manifest(ctx.config, manifest, path); - } - return added; - } catch (const core::Error& e) { - LOG("Failed to add result key to {}: {}", path, e.what()); - return false; - } - }); + const bool added = ctx.manifest.add_result( + result_key, ctx.included_files, ctx.time_of_compilation, save_timestamp); + if (added) { + core::CacheEntry::Header header(ctx.config, core::CacheEntryType::manifest); + ctx.storage.put(manifest_key, + core::CacheEntryType::manifest, + core::CacheEntry::serialize(header, ctx.manifest)); + LOG("Added result key to manifest {}", manifest_key.to_string()); + } else { + LOG("Did not add result key to manifest {}", manifest_key.to_string()); + } } struct FindCoverageFileResult @@ -860,7 +836,7 @@ find_coverage_file(const Context& ctx) static bool write_result(Context& ctx, - const std::string& result_path, + const Digest& result_key, const Stat& obj_stat, const std::string& stdout_data, const std::string& stderr_data) @@ -914,36 +890,11 @@ write_result(Context& ctx, core::CacheEntry::Header header(ctx.config, core::CacheEntryType::result); const auto cache_entry_data = core::CacheEntry::serialize(header, serializer); - const auto raw_files = serializer.get_raw_files(); if (!raw_files.empty()) { - Util::ensure_dir_exists(Util::dir_name(result_path)); + ctx.storage.primary.put_raw_files(result_key, raw_files); } - for (auto [file_number, source_path] : raw_files) { - const auto dest_path = storage::primary::PrimaryStorage::get_raw_file_path( - result_path, file_number); - const auto old_stat = Stat::stat(dest_path); - try { - Util::clone_hard_link_or_copy_file( - ctx.config, source_path, dest_path, true); - } catch (core::Error& e) { - LOG("Failed to store {} as raw file {}: {}", - source_path, - dest_path, - e.what()); - throw; - } - const auto new_stat = Stat::stat(dest_path); - ctx.storage.primary.increment_statistic( - Statistic::cache_size_kibibyte, - Util::size_change_kibibyte(old_stat, new_stat)); - ctx.storage.primary.increment_statistic( - Statistic::files_in_cache, (new_stat ? 1 : 0) - (old_stat ? 1 : 0)); - } - - AtomicFile atomic_result_file(result_path, AtomicFile::Mode::binary); - atomic_result_file.write(cache_entry_data); - atomic_result_file.commit(); + ctx.storage.put(result_key, core::CacheEntryType::result, cache_entry_data); return true; } @@ -1115,20 +1066,9 @@ to_cache(Context& ctx, } MTR_BEGIN("result", "result_put"); - const bool added = ctx.storage.put( - *result_key, core::CacheEntryType::result, [&](const auto& path) { - try { - return write_result( - ctx, path, obj_stat, result->stdout_data, result->stderr_data); - } catch (core::Error& e) { - LOG("Error writing to {}: {}", path, e.what()); - return false; - } - }); + write_result( + ctx, *result_key, obj_stat, result->stdout_data, result->stderr_data); MTR_END("result", "result_put"); - if (!added) { - return nonstd::make_unexpected(Statistic::internal_error); - } // Everything OK. Util::send_to_fd(ctx, result->stderr_data, STDERR_FILENO); @@ -1771,18 +1711,17 @@ hash_direct_mode_specific_data(Context& ctx, manifest_key = hash.digest(); - auto manifest_path = ctx.storage.get(*manifest_key, - core::CacheEntryType::manifest, - storage::Storage::Mode::primary_only); + auto cache_entry_data = ctx.storage.get(*manifest_key, + core::CacheEntryType::manifest, + storage::Storage::Mode::primary_only); - if (manifest_path) { - LOG("Looking for result key in {}", *manifest_path); + if (cache_entry_data) { MTR_BEGIN("manifest", "manifest_get"); try { - const auto manifest = read_manifest(*manifest_path); - result_key = manifest.look_up_result_digest(ctx); + read_manifest(ctx, *cache_entry_data); + result_key = ctx.manifest.look_up_result_digest(ctx); } catch (const core::Error& e) { - LOG("Failed to look up result key in {}: {}", *manifest_path, e.what()); + LOG("Failed to look up result key in manifest: {}", e.what()); } MTR_END("manifest", "manifest_get"); if (result_key) { @@ -1793,18 +1732,17 @@ hash_direct_mode_specific_data(Context& ctx, } // Check secondary storage if not found in primary if (!result_key) { - manifest_path = ctx.storage.get(*manifest_key, - core::CacheEntryType::manifest, - storage::Storage::Mode::secondary_only); - if (manifest_path) { - LOG("Looking for result key in fetched secondary manifest {}", - *manifest_path); + cache_entry_data = ctx.storage.get(*manifest_key, + core::CacheEntryType::manifest, + storage::Storage::Mode::secondary_only); + if (cache_entry_data) { + LOG_RAW("Looking for result key in fetched secondary manifest"); MTR_BEGIN("manifest", "secondary_manifest_get"); try { - const auto manifest = read_manifest(*manifest_path); - result_key = manifest.look_up_result_digest(ctx); + read_manifest(ctx, *cache_entry_data); + result_key = ctx.manifest.look_up_result_digest(ctx); } catch (const core::Error& e) { - LOG("Failed to look up result key in {}: {}", *manifest_path, e.what()); + LOG("Failed to look up result key in manifest: {}", e.what()); } MTR_END("manifest", "secondary_manifest_get"); if (result_key) { @@ -2012,28 +1950,25 @@ from_cache(Context& ctx, FromCacheCallMode mode, const Digest& result_key) MTR_SCOPE("cache", "from_cache"); // Get result from cache. - const auto result_path = + const auto cache_entry_data = ctx.storage.get(result_key, core::CacheEntryType::result); - if (!result_path) { + if (!cache_entry_data) { return false; } try { - const auto cache_entry_data = util::value_or_throw( - util::read_file(*result_path), - FMT("Failed to read {}: ", *result_path)); - - core::CacheEntry cache_entry(cache_entry_data); + core::CacheEntry cache_entry(*cache_entry_data); cache_entry.verify_checksum(); core::Result::Deserializer deserializer(cache_entry.payload()); - core::ResultRetriever result_retriever(ctx, result_path); + core::ResultRetriever result_retriever(ctx, result_key); deserializer.visit(result_retriever); } catch (core::ResultRetriever::WriteError& e) { - LOG( - "Write error when retrieving result from {}: {}", *result_path, e.what()); + LOG("Write error when retrieving result from {}: {}", + result_key.to_string(), + e.what()); return nonstd::make_unexpected(Statistic::bad_output_file); } catch (core::Error& e) { - LOG("Failed to get result from {}: {}", *result_path, e.what()); + LOG("Failed to get result from {}: {}", result_key.to_string(), e.what()); return false; } @@ -2496,7 +2431,8 @@ do_cache_compilation(Context& ctx, const char* const* argv) return nonstd::make_unexpected(from_cache_result.error()); } else if (*from_cache_result) { if (ctx.config.direct_mode() && manifest_key && put_result_in_manifest) { - update_manifest_file(ctx, *manifest_key, *result_key); + MTR_SCOPE("cache", "update_manifest"); + update_manifest(ctx, *manifest_key, *result_key); } return Statistic::preprocessed_cache_hit; } @@ -2529,7 +2465,7 @@ do_cache_compilation(Context& ctx, const char* const* argv) if (ctx.config.direct_mode()) { ASSERT(manifest_key); MTR_SCOPE("cache", "update_manifest"); - update_manifest_file(ctx, *manifest_key, *result_key); + update_manifest(ctx, *manifest_key, *result_key); } return ctx.config.recache() ? Statistic::recache : Statistic::cache_miss; diff --git a/src/core/ResultRetriever.cpp b/src/core/ResultRetriever.cpp index b3506f824..89f4389d8 100644 --- a/src/core/ResultRetriever.cpp +++ b/src/core/ResultRetriever.cpp @@ -44,9 +44,9 @@ namespace core { using Result::FileType; ResultRetriever::ResultRetriever(const Context& ctx, - std::optional path) + std::optional result_key) : m_ctx(ctx), - m_path(path) + m_result_key(result_key) { } @@ -93,11 +93,11 @@ ResultRetriever::on_raw_file(uint8_t file_number, Result::file_type_to_string(file_type), file_size); - if (!m_path) { + if (!m_result_key) { throw core::Error("Raw entry for non-local result"); } const auto raw_file_path = - storage::primary::PrimaryStorage::get_raw_file_path(*m_path, file_number); + m_ctx.storage.primary.get_raw_file_path(*m_result_key, file_number); const auto st = Stat::stat(raw_file_path, Stat::OnError::throw_error); if (st.size() != file_size) { throw core::Error( diff --git a/src/core/ResultRetriever.hpp b/src/core/ResultRetriever.hpp index d2e814ba0..640a761a1 100644 --- a/src/core/ResultRetriever.hpp +++ b/src/core/ResultRetriever.hpp @@ -20,6 +20,7 @@ #include "Fd.hpp" +#include #include #include @@ -41,7 +42,7 @@ public: //`path` should be the path to the local result entry file if the result comes // from primary storage. ResultRetriever(const Context& ctx, - std::optional path = std::nullopt); + std::optional result_key = std::nullopt); void on_embedded_file(uint8_t file_number, Result::FileType file_type, @@ -52,7 +53,7 @@ public: private: const Context& m_ctx; - std::optional m_path; + std::optional m_result_key; std::string get_dest_path(Result::FileType file_type) const; diff --git a/src/storage/Storage.cpp b/src/storage/Storage.cpp index 90172876f..9382e025f 100644 --- a/src/storage/Storage.cpp +++ b/src/storage/Storage.cpp @@ -43,6 +43,8 @@ #include #include +#include +#include #include #include @@ -208,11 +210,11 @@ Storage::Storage(const Config& config) : primary(config), m_config(config) { } +// Define the destructor in the implementation file to avoid having to declare +// SecondaryStorageEntry and its constituents in the header file. +// NOLINTNEXTLINE(modernize-use-equals-default) Storage::~Storage() { - for (const auto& tmp_file : m_tmp_files) { - Util::unlink_tmp(tmp_file); - } } void @@ -227,7 +229,7 @@ Storage::finalize() primary.finalize(); } -std::optional +std::optional Storage::get(const Digest& key, const core::CacheEntryType type, const Mode mode) @@ -235,28 +237,14 @@ Storage::get(const Digest& key, MTR_SCOPE("storage", "get"); if (mode != Mode::secondary_only) { - auto path = primary.get(key, type); - primary.increment_statistic(path ? core::Statistic::primary_storage_hit - : core::Statistic::primary_storage_miss); - if (path) { + auto value = primary.get(key, type); + primary.increment_statistic(value ? core::Statistic::primary_storage_hit + : core::Statistic::primary_storage_miss); + if (value) { if (m_config.reshare()) { - // Temporary optimization until primary storage API has been refactored - // to pass data via memory instead of files. - const bool should_put_in_secondary_storage = std::any_of( - m_secondary_storages.begin(), - m_secondary_storages.end(), - [](const auto& entry) { return !entry->config.read_only; }); - if (should_put_in_secondary_storage) { - const auto value = util::read_file(*path); - if (!value) { - LOG("Failed to read {}: {}", *path, value.error()); - return path; // Don't indicate failure since primary storage was OK. - } - put_in_secondary_storage(key, *value, true); - } + put_in_secondary_storage(key, *value, true); } - - return path; + return value; } } @@ -264,61 +252,22 @@ Storage::get(const Digest& key, return std::nullopt; } - const auto value = get_from_secondary_storage(key); - if (!value) { - return std::nullopt; + auto value = get_from_secondary_storage(key); + if (value) { + primary.put(key, type, *value); } - - TemporaryFile tmp_file(FMT("{}/tmp.get", m_config.temporary_dir())); - m_tmp_files.push_back(tmp_file.path); - try { - util::write_file(tmp_file.path, *value); - } catch (const core::Error& e) { - throw core::Fatal(FMT("Error writing to {}: {}", tmp_file.path, e.what())); - } - - primary.put(key, type, [&](const auto& path) { - try { - Util::ensure_dir_exists(Util::dir_name(path)); - Util::copy_file(tmp_file.path, path); - } catch (const core::Error& e) { - LOG("Failed to copy {} to {}: {}", tmp_file.path, path, e.what()); - // Don't indicate failure since get from primary storage was OK. - } - return true; - }); - - return tmp_file.path; + return value; } -bool +void Storage::put(const Digest& key, const core::CacheEntryType type, - const storage::EntryWriter& entry_writer) + nonstd::span value) { MTR_SCOPE("storage", "put"); - const auto path = primary.put(key, type, entry_writer); - if (!path) { - return false; - } - - // Temporary optimization until primary storage API has been refactored to - // pass data via memory instead of files. - const bool should_put_in_secondary_storage = - std::any_of(m_secondary_storages.begin(), - m_secondary_storages.end(), - [](const auto& entry) { return !entry->config.read_only; }); - if (should_put_in_secondary_storage) { - const auto value = util::read_file(*path); - if (!value) { - LOG("Failed to read {}: {}", *path, value.error()); - return true; // Don't indicate failure since primary storage was OK. - } - put_in_secondary_storage(key, *value, false); - } - - return true; + primary.put(key, type, value); + put_in_secondary_storage(key, value, false); } void diff --git a/src/storage/Storage.hpp b/src/storage/Storage.hpp index 5dee65a0d..eba46897d 100644 --- a/src/storage/Storage.hpp +++ b/src/storage/Storage.hpp @@ -22,10 +22,10 @@ #include #include #include +#include #include -#include #include #include #include @@ -51,15 +51,14 @@ public: primary::PrimaryStorage primary; - // Returns a path to a file containing the value. enum class Mode { secondary_fallback, secondary_only, primary_only }; - std::optional get(const Digest& key, + std::optional get(const Digest& key, core::CacheEntryType type, const Mode mode = Mode::secondary_fallback); - bool put(const Digest& key, + void put(const Digest& key, core::CacheEntryType type, - const storage::EntryWriter& entry_writer); + nonstd::span value); void remove(const Digest& key, core::CacheEntryType type); @@ -69,7 +68,6 @@ public: private: const Config& m_config; std::vector> m_secondary_storages; - std::vector m_tmp_files; void add_secondary_storages(); diff --git a/src/storage/primary/PrimaryStorage.cpp b/src/storage/primary/PrimaryStorage.cpp index c8e19167e..8a520d3e7 100644 --- a/src/storage/primary/PrimaryStorage.cpp +++ b/src/storage/primary/PrimaryStorage.cpp @@ -18,6 +18,7 @@ #include "PrimaryStorage.hpp" +#include #include #include #include @@ -175,14 +176,19 @@ PrimaryStorage::finalize() } } -std::optional +std::optional PrimaryStorage::get(const Digest& key, const core::CacheEntryType type) const { MTR_SCOPE("primary_storage", "get"); const auto cache_file = look_up_cache_file(key, type); if (!cache_file.stat) { - LOG("No {} in primary storage", key.to_string()); + LOG("No {} {} in primary storage", key.to_string(), core::to_string(type)); + return std::nullopt; + } + const auto value = util::read_file(cache_file.path); + if (!value) { + LOG("Failed to read {}: {}", cache_file.path, value.error()); return std::nullopt; } @@ -191,13 +197,14 @@ PrimaryStorage::get(const Digest& key, const core::CacheEntryType type) const // Update modification timestamp to save file from LRU cleanup. util::set_timestamps(cache_file.path); - return cache_file.path; + + return *value; } -std::optional +void PrimaryStorage::put(const Digest& key, const core::CacheEntryType type, - const storage::EntryWriter& entry_writer) + nonstd::span value) { MTR_SCOPE("primary_storage", "put"); @@ -214,15 +221,21 @@ PrimaryStorage::put(const Digest& key, break; } - if (!entry_writer(cache_file.path)) { - LOG("Did not store {} in primary storage", key.to_string()); - return std::nullopt; + try { + AtomicFile result_file(cache_file.path, AtomicFile::Mode::binary); + result_file.write(value); + result_file.commit(); + } + + catch (core::Error& e) { + LOG("Failed to write to {}: {}", cache_file.path, e.what()); + return; } const auto new_stat = Stat::stat(cache_file.path, Stat::OnError::log); if (!new_stat) { LOG("Failed to stat {}: {}", cache_file.path, strerror(errno)); - return std::nullopt; + return; } LOG("Stored {} in primary storage ({})", key.to_string(), cache_file.path); @@ -240,8 +253,6 @@ PrimaryStorage::put(const Digest& key, // the stat call if we exit early. util::create_cachedir_tag( FMT("{}/{}", m_config.cache_dir(), key.to_string()[0])); - - return cache_file.path; } void @@ -274,6 +285,45 @@ PrimaryStorage::get_raw_file_path(std::string_view result_path, return FMT("{}{}W", prefix, file_number); } +std::string +PrimaryStorage::get_raw_file_path(const Digest& result_key, + uint8_t file_number) const +{ + const auto cache_file = + look_up_cache_file(result_key, core::CacheEntryType::result); + return get_raw_file_path(cache_file.path, file_number); +} + +void +PrimaryStorage::put_raw_files( + const Digest& key, + const std::vector raw_files) +{ + const auto cache_file = look_up_cache_file(key, core::CacheEntryType::result); + Util::ensure_dir_exists(Util::dir_name(cache_file.path)); + + for (auto [file_number, source_path] : raw_files) { + const auto dest_path = get_raw_file_path(cache_file.path, file_number); + const auto old_stat = Stat::stat(dest_path); + try { + Util::clone_hard_link_or_copy_file( + m_config, source_path, dest_path, true); + m_added_raw_files.push_back(dest_path); + } catch (core::Error& e) { + LOG("Failed to store {} as raw file {}: {}", + source_path, + dest_path, + e.what()); + throw; + } + const auto new_stat = Stat::stat(dest_path); + increment_statistic(Statistic::cache_size_kibibyte, + Util::size_change_kibibyte(old_stat, new_stat)); + increment_statistic(Statistic::files_in_cache, + (new_stat ? 1 : 0) - (old_stat ? 1 : 0)); + } +} + void PrimaryStorage::increment_statistic(const Statistic statistic, const int64_t value) @@ -387,6 +437,17 @@ PrimaryStorage::update_stats_and_maybe_move_cache_file( // Two ccache processes may move the file at the same time, so failure // to rename is OK. } + for (const auto& raw_file : m_added_raw_files) { + try { + Util::rename(raw_file, + FMT("{}/{}", + Util::dir_name(wanted_path), + Util::base_name(raw_file))); + } catch (const core::Error&) { + // Two ccache processes may move the file at the same time, so failure + // to rename is OK. + } + } } } return counters; diff --git a/src/storage/primary/PrimaryStorage.hpp b/src/storage/primary/PrimaryStorage.hpp index 7a33198a7..58ed02add 100644 --- a/src/storage/primary/PrimaryStorage.hpp +++ b/src/storage/primary/PrimaryStorage.hpp @@ -19,13 +19,18 @@ #pragma once #include +#include #include #include #include #include +#include + +#include #include #include +#include class Config; @@ -49,18 +54,23 @@ public: // --- Cache entry handling --- - // Returns a path to a file containing the value. - std::optional get(const Digest& key, + std::optional get(const Digest& key, core::CacheEntryType type) const; - std::optional put(const Digest& key, - core::CacheEntryType type, - const storage::EntryWriter& entry_writer); + void put(const Digest& key, + core::CacheEntryType type, + nonstd::span value); void remove(const Digest& key, core::CacheEntryType type); static std::string get_raw_file_path(std::string_view result_path, uint8_t file_number); + std::string get_raw_file_path(const Digest& result_key, + uint8_t file_number) const; + + void + put_raw_files(const Digest& key, + const std::vector raw_files); // --- Statistics --- @@ -113,6 +123,8 @@ private: std::string m_manifest_path; std::string m_result_path; + std::vector m_added_raw_files; + struct LookUpCacheFileResult { std::string path;