From c7c0837a23fe9dc79613bf3dd4ddd8d91c58d541 Mon Sep 17 00:00:00 2001 From: Joel Rosdahl Date: Tue, 15 Jun 2021 21:13:10 +0200 Subject: [PATCH] Refactor main primary storage backend code into storage/primary This is the code that deals with retrieving and storing cache entries. --- src/CMakeLists.txt | 5 +- src/Context.cpp | 5 +- src/Context.hpp | 86 +--- src/Manifest.cpp | 33 +- src/Manifest.hpp | 2 +- src/Result.cpp | 2 +- src/Util.hpp | 2 +- src/ccache.cpp | 630 ++++++++----------------- src/core/CMakeLists.txt | 6 + src/core/types.hpp | 29 ++ src/storage/CMakeLists.txt | 8 + src/storage/Storage.cpp | 65 +++ src/storage/Storage.hpp | 61 +++ src/storage/primary/CMakeLists.txt | 6 + src/storage/primary/PrimaryStorage.cpp | 386 +++++++++++++++ src/storage/primary/PrimaryStorage.hpp | 102 ++++ src/storage/types.hpp | 33 ++ src/util/CMakeLists.txt | 1 + src/util/file_utils.cpp | 48 ++ src/util/file_utils.hpp | 29 ++ 20 files changed, 993 insertions(+), 546 deletions(-) create mode 100644 src/core/CMakeLists.txt create mode 100644 src/core/types.hpp create mode 100644 src/storage/CMakeLists.txt create mode 100644 src/storage/Storage.cpp create mode 100644 src/storage/Storage.hpp create mode 100644 src/storage/primary/CMakeLists.txt create mode 100644 src/storage/primary/PrimaryStorage.cpp create mode 100644 src/storage/primary/PrimaryStorage.hpp create mode 100644 src/storage/types.hpp create mode 100644 src/util/file_utils.cpp create mode 100644 src/util/file_utils.hpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 0a6e8bfe9..ed64aaa1a 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -41,7 +41,8 @@ set( execute.cpp hashutil.cpp language.cpp - version.cpp) + version.cpp +) if(INODE_CACHE_SUPPORTED) list(APPEND source_files InodeCache.cpp) @@ -79,5 +80,7 @@ target_link_libraries( target_include_directories(ccache_lib PRIVATE ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}) +add_subdirectory(core) +add_subdirectory(storage) add_subdirectory(third_party) add_subdirectory(util) diff --git a/src/Context.cpp b/src/Context.cpp index cba865a53..b5a24e3ec 100644 --- a/src/Context.cpp +++ b/src/Context.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Joel Rosdahl and other contributors +// Copyright (C) 2020-2021 Joel Rosdahl and other contributors // // See doc/AUTHORS.adoc for a complete list of contributors. // @@ -32,7 +32,8 @@ using nonstd::string_view; Context::Context() : actual_cwd(Util::get_actual_cwd()), - apparent_cwd(Util::get_apparent_cwd(actual_cwd)) + apparent_cwd(Util::get_apparent_cwd(actual_cwd)), + storage(config) #ifdef INODE_CACHE_SUPPORTED , inode_cache(config) diff --git a/src/Context.hpp b/src/Context.hpp index 00df66ea4..b727cbb5b 100644 --- a/src/Context.hpp +++ b/src/Context.hpp @@ -34,6 +34,8 @@ # include "InodeCache.hpp" #endif +#include + #include "third_party/nonstd/optional.hpp" #include "third_party/nonstd/string_view.hpp" @@ -63,20 +65,6 @@ public: // The original argument list. Args orig_args; - // Name (represented as a hash) of the file containing the manifest for the - // cached result. - const nonstd::optional& manifest_name() const; - - // Full path to the file containing the manifest (cachedir/a/b/cdef[...]M), if - // any. - const nonstd::optional& manifest_path() const; - - // Name (represented as a hash) of the file containing the cached result. - const nonstd::optional& result_name() const; - - // Full path to the file containing the result (cachedir/a/b/cdef[...]R). - const nonstd::optional& result_path() const; - // Time of compilation. Used to see if include files have changed after // compilation. time_t time_of_compilation = 0; @@ -102,19 +90,14 @@ public: // Headers (or directories with headers) to ignore in manifest mode. std::vector ignore_header_paths; + // Storage (fronting primary and secondary storage backends). + storage::Storage storage; + #ifdef INODE_CACHE_SUPPORTED // InodeCache that caches source file hashes when enabled. mutable InodeCache inode_cache; #endif - // Statistics updates which get written into the statistics file belonging to - // the result. - Counters counter_updates; - - // Statistics updates which get written into the statistics file belonging to - // the manifest. - Counters manifest_counter_updates; - // PID of currently executing compiler that we have started, if any. 0 means // no ongoing compilation. pid_t compiler_pid = 0; @@ -135,21 +118,10 @@ public: std::unique_ptr mini_trace; #endif - void set_manifest_name(const Digest& name); - void set_manifest_path(const std::string& path); - void set_result_name(const Digest& name); - void set_result_path(const std::string& path); - // Register a temporary file to remove at program exit. void register_pending_tmp_file(const std::string& path); private: - nonstd::optional m_manifest_name; - nonstd::optional m_manifest_path; - - nonstd::optional m_result_name; - nonstd::optional m_result_path; - // Options to ignore for the hash. std::vector m_ignore_options; @@ -165,56 +137,8 @@ private: void unlink_pending_tmp_files_signal_safe(); // called from signal handler }; -inline const nonstd::optional& -Context::manifest_name() const -{ - return m_manifest_name; -} - -inline const nonstd::optional& -Context::manifest_path() const -{ - return m_manifest_path; -} - -inline const nonstd::optional& -Context::result_name() const -{ - return m_result_name; -} - -inline const nonstd::optional& -Context::result_path() const -{ - return m_result_path; -} - inline const std::vector& Context::ignore_options() const { return m_ignore_options; } - -inline void -Context::set_manifest_name(const Digest& name) -{ - m_manifest_name = name; -} - -inline void -Context::set_manifest_path(const std::string& path) -{ - m_manifest_path = path; -} - -inline void -Context::set_result_name(const Digest& name) -{ - m_result_name = name; -} - -inline void -Context::set_result_path(const std::string& path) -{ - m_result_path = path; -} diff --git a/src/Manifest.cpp b/src/Manifest.cpp index 5dc06fc24..0dce17b42 100644 --- a/src/Manifest.cpp +++ b/src/Manifest.cpp @@ -164,14 +164,14 @@ struct ResultEntry // Indexes to file_infos. std::vector file_info_indexes; - // Name of the result. - Digest name; + // Key of the result. + Digest key; }; bool operator==(const ResultEntry& lhs, const ResultEntry& rhs) { - return lhs.file_info_indexes == rhs.file_info_indexes && lhs.name == rhs.name; + return lhs.file_info_indexes == rhs.file_info_indexes && lhs.key == rhs.key; } struct ManifestData @@ -182,12 +182,12 @@ struct ManifestData // Information about referenced include files. std::vector file_infos; - // Result names plus references to include file infos. + // Result keys plus references to include file infos. std::vector results; bool add_result_entry( - const Digest& result_digest, + const Digest& result_key, const std::unordered_map& included_files, time_t time_of_compilation, bool save_timestamp) @@ -214,7 +214,7 @@ struct ManifestData save_timestamp)); } - ResultEntry entry{std::move(file_info_indexes), result_digest}; + ResultEntry entry{std::move(file_info_indexes), result_key}; if (std::find(results.begin(), results.end(), entry) == results.end()) { results.push_back(std::move(entry)); return true; @@ -343,7 +343,7 @@ read_manifest(const std::string& path, FILE* dump_stream = nullptr) reader.read(file_info_index); entry.file_info_indexes.push_back(file_info_index); } - reader.read(entry.name.bytes(), Digest::size()); + reader.read(entry.key.bytes(), Digest::size()); } reader.finalize(); @@ -397,7 +397,7 @@ write_manifest(const Config& config, for (auto index : result.file_info_indexes) { writer.write(index); } - writer.write(result.name.bytes(), Digest::size()); + writer.write(result.key.bytes(), Digest::size()); } writer.finalize(); @@ -494,17 +494,14 @@ const std::string k_file_suffix = "M"; const uint8_t k_magic[4] = {'c', 'C', 'm', 'F'}; const uint8_t k_version = 2; -// Try to get the result name from a manifest file. Returns nullopt on failure. +// Try to get the result key from a manifest file. Returns nullopt on failure. optional get(const Context& ctx, const std::string& path) { std::unique_ptr mf; try { mf = read_manifest(path); - if (mf) { - // Update modification timestamp to save files from LRU cleanup. - Util::update_mtime(path); - } else { + if (!mf) { LOG_RAW("No such manifest file"); return nullopt; } @@ -520,19 +517,19 @@ get(const Context& ctx, const std::string& path) for (uint32_t i = mf->results.size(); i > 0; i--) { if (verify_result( ctx, *mf, mf->results[i - 1], stated_files, hashed_files)) { - return mf->results[i - 1].name; + return mf->results[i - 1].key; } } return nullopt; } -// Put the result name into a manifest file given a set of included files. +// Put the result key into a manifest file given a set of included files. // Returns true on success, otherwise false. bool put(const Config& config, const std::string& path, - const Digest& result_name, + const Digest& result_key, const std::unordered_map& included_files, time_t time_of_compilation, @@ -579,7 +576,7 @@ put(const Config& config, } bool added = mf->add_result_entry( - result_name, included_files, time_of_compilation, save_timestamp); + result_key, included_files, time_of_compilation, save_timestamp); if (added) { try { @@ -631,7 +628,7 @@ dump(const std::string& path, FILE* stream) PRINT(stream, " {}", file_info_index); } PRINT_RAW(stream, "\n"); - PRINT(stream, " Name: {}\n", mf->results[i].name.to_string()); + PRINT(stream, " Key: {}\n", mf->results[i].key.to_string()); } return true; diff --git a/src/Manifest.hpp b/src/Manifest.hpp index d05c2294c..0d18c9bf7 100644 --- a/src/Manifest.hpp +++ b/src/Manifest.hpp @@ -40,7 +40,7 @@ extern const uint8_t k_version; nonstd::optional get(const Context& ctx, const std::string& path); bool put(const Config& config, const std::string& path, - const Digest& result_name, + const Digest& result_key, const std::unordered_map& included_files, time_t time_of_compilation, bool save_timestamp); diff --git a/src/Result.cpp b/src/Result.cpp index 7a6701409..4c0cdb83f 100644 --- a/src/Result.cpp +++ b/src/Result.cpp @@ -359,7 +359,7 @@ Writer::do_finalize() for (const auto& pair : m_entries_to_write) { const auto file_type = pair.first; const auto& path = pair.second; - LOG("Storing result {}", path); + LOG("Storing result file {}", path); const bool store_raw = should_store_raw_file(m_ctx.config, file_type); uint64_t file_size = Stat::stat(path, Stat::OnError::throw_error).size(); diff --git a/src/Util.hpp b/src/Util.hpp index 417718a2c..c4431a496 100644 --- a/src/Util.hpp +++ b/src/Util.hpp @@ -423,7 +423,7 @@ size_change_kibibyte(const Stat& old_stat, const Stat& new_stat) } // Split `input` into words at any of the characters listed in `separators`. -// These words are a view into `input`; empty words are omitted. `separators` +// These words are a view into `input`; empty words are omitted. `separators2` // must neither be the empty string nor a nullptr. std::vector split_into_views(nonstd::string_view input, const char* separators); diff --git a/src/ccache.cpp b/src/ccache.cpp index 49a52ea46..7528a2503 100644 --- a/src/ccache.cpp +++ b/src/ccache.cpp @@ -54,6 +54,8 @@ #include "hashutil.hpp" #include "language.hpp" +#include + #include "third_party/fmt/core.h" #include "third_party/nonstd/optional.hpp" #include "third_party/nonstd/string_view.hpp" @@ -72,10 +74,12 @@ extern "C" { # include "Win32Util.hpp" #endif +// System headers #include #include #include #include +// End of system headers #ifndef MYNAME # define MYNAME "ccache" @@ -154,31 +158,6 @@ Options for scripting or debugging: See also the manual on . )"; -// How often (in seconds) to scan $CCACHE_DIR/tmp for left-over temporary -// files. -const int k_tempdir_cleanup_interval = 2 * 24 * 60 * 60; // 2 days - -// Maximum files per cache directory. This constant is somewhat arbitrarily -// chosen to be large enough to avoid unnecessary cache levels but small enough -// not to make esoteric file systems (with bad performance for large -// directories) too slow. It could be made configurable, but hopefully there -// will be no need to do that. -const uint64_t k_max_cache_files_per_directory = 2000; - -// Minimum number of cache levels ($CCACHE_DIR/1/2/stored_file). -const uint8_t k_min_cache_levels = 2; - -// Maximum number of cache levels ($CCACHE_DIR/1/2/3/stored_file). -// -// On a cache miss, (k_max_cache_levels - k_min_cache_levels + 1) cache lookups -// (i.e. stat system calls) will be performed for a cache entry. -// -// An assumption made here is that if a cache is so large that it holds more -// than 16^4 * k_max_cache_files_per_directory files then we can assume that the -// file system is sane enough to handle more than -// k_max_cache_files_per_directory. -const uint8_t k_max_cache_levels = 4; - // This is a string that identifies the current "version" of the hash sum // computed by ccache. If, for any reason, we want to force the hash sum to be // different for the same input in a new ccache version, we can just change @@ -249,34 +228,6 @@ add_prefix(const Context& ctx, Args& args, const std::string& prefix_command) } } -static void -clean_up_internal_tempdir(const Config& config) -{ - time_t now = time(nullptr); - auto dir_st = Stat::stat(config.cache_dir(), Stat::OnError::log); - if (!dir_st || dir_st.mtime() + k_tempdir_cleanup_interval >= now) { - // No cleanup needed. - return; - } - - Util::update_mtime(config.cache_dir()); - - const std::string& temp_dir = config.temporary_dir(); - if (!Stat::lstat(temp_dir)) { - return; - } - - Util::traverse(temp_dir, [now](const std::string& path, bool is_dir) { - if (is_dir) { - return; - } - auto st = Stat::lstat(path, Stat::OnError::log); - if (st && st.mtime() + k_tempdir_cleanup_interval < now) { - Util::unlink_tmp(path); - } - }); -} - static std::string prepare_debug_path(const std::string& debug_dir, const std::string& output_obj, @@ -735,7 +686,7 @@ process_preprocessed_file(Context& ctx, // Extract the used includes from the dependency file. Note that we cannot // distinguish system headers from other includes here. static optional -result_name_from_depfile(Context& ctx, Hash& hash) +result_key_from_depfile(Context& ctx, Hash& hash) { std::string file_content; try { @@ -826,96 +777,40 @@ do_execute(Context& ctx, return status; } -struct LookUpCacheFileResult -{ - std::string path; - Stat stat; - uint8_t level; -}; - -static LookUpCacheFileResult -look_up_cache_file(const std::string& cache_dir, - const Digest& name, - nonstd::string_view suffix) -{ - const auto name_string = FMT("{}{}", name.to_string(), suffix); - - for (uint8_t level = k_min_cache_levels; level <= k_max_cache_levels; - ++level) { - const auto path = Util::get_path_in_cache(cache_dir, level, name_string); - const auto stat = Stat::stat(path); - if (stat) { - return {path, stat, level}; - } - } - - const auto shallowest_path = - Util::get_path_in_cache(cache_dir, k_min_cache_levels, name_string); - return {shallowest_path, Stat(), k_min_cache_levels}; -} - // Create or update the manifest file. static void -update_manifest_file(Context& ctx) +update_manifest_file(Context& ctx, + const Digest& manifest_key, + const Digest& result_key) { - if (!ctx.config.direct_mode() || ctx.config.read_only() - || ctx.config.read_only_direct()) { + if (ctx.config.read_only() || ctx.config.read_only_direct()) { return; } - ASSERT(ctx.manifest_path()); - ASSERT(ctx.result_path()); - MTR_BEGIN("manifest", "manifest_put"); - const auto old_stat = Stat::stat(*ctx.manifest_path()); - // See comment in get_file_hash_index for why saving of timestamps is forced // for precompiled headers. const bool save_timestamp = (ctx.config.sloppiness() & SLOPPY_FILE_STAT_MATCHES) || ctx.args_info.output_is_precompiled_header; - LOG("Adding result name to {}", *ctx.manifest_path()); - if (!Manifest::put(ctx.config, - *ctx.manifest_path(), - *ctx.result_name(), - ctx.included_files, - ctx.time_of_compilation, - save_timestamp)) { - LOG("Failed to add result name to {}", *ctx.manifest_path()); - } else { - const auto new_stat = Stat::stat(*ctx.manifest_path(), Stat::OnError::log); - ctx.manifest_counter_updates.increment( - Statistic::cache_size_kibibyte, - Util::size_change_kibibyte(old_stat, new_stat)); - ctx.manifest_counter_updates.increment(Statistic::files_in_cache, - !old_stat && new_stat ? 1 : 0); - } - MTR_END("manifest", "manifest_put"); -} + ctx.storage.put( + manifest_key, core::CacheEntryType::manifest, [&](const std::string& path) { + LOG("Adding result key to {}", path); + if (!Manifest::put(ctx.config, + path, + result_key, + ctx.included_files, + ctx.time_of_compilation, + save_timestamp)) { + LOG("Failed to add result key to {}", path); + return false; + } + return true; + }); -static void -create_cachedir_tag(const Context& ctx) -{ - constexpr char cachedir_tag[] = - "Signature: 8a477f597d28d172789f06886806bc55\n" - "# This file is a cache directory tag created by ccache.\n" - "# For information about cache directory tags, see:\n" - "#\thttp://www.brynosaurus.com/cachedir/\n"; - - const std::string path = FMT("{}/{}/CACHEDIR.TAG", - ctx.config.cache_dir(), - ctx.result_name()->to_string()[0]); - const auto stat = Stat::stat(path); - if (stat) { - return; - } - try { - Util::write_file(path, cachedir_tag); - } catch (const Error& e) { - LOG("Failed to create {}: {}", path, e.what()); - } + MTR_END("manifest", "manifest_put"); } struct FindCoverageFileResult @@ -956,10 +851,68 @@ find_coverage_file(const Context& ctx) return {true, found_file, found_file == mangled_form}; } -// Run the real compiler and put the result in cache. static void +write_result(Context& ctx, + const std::string& result_path, + const Stat& obj_stat, + const std::string& stderr_path) +{ + Result::Writer result_writer(ctx, result_path); + + const auto stderr_stat = Stat::stat(stderr_path, Stat::OnError::log); + if (!stderr_stat) { + throw Failure(Statistic::internal_error); + } + + if (stderr_stat.size() > 0) { + result_writer.write(Result::FileType::stderr_output, stderr_path); + } + if (obj_stat) { + result_writer.write(Result::FileType::object, ctx.args_info.output_obj); + } + if (ctx.args_info.generating_dependencies) { + result_writer.write(Result::FileType::dependency, ctx.args_info.output_dep); + } + if (ctx.args_info.generating_coverage) { + const auto coverage_file = find_coverage_file(ctx); + if (!coverage_file.found) { + throw Failure(Statistic::internal_error); + } + result_writer.write(coverage_file.mangled + ? Result::FileType::coverage_mangled + : Result::FileType::coverage_unmangled, + coverage_file.path); + } + if (ctx.args_info.generating_stackusage) { + result_writer.write(Result::FileType::stackusage, ctx.args_info.output_su); + } + if (ctx.args_info.generating_diagnostics) { + result_writer.write(Result::FileType::diagnostic, ctx.args_info.output_dia); + } + if (ctx.args_info.seen_split_dwarf && Stat::stat(ctx.args_info.output_dwo)) { + // Only store .dwo file if it was created by the compiler (GCC and Clang + // behave differently e.g. for "-gsplit-dwarf -g1"). + result_writer.write(Result::FileType::dwarf_object, + ctx.args_info.output_dwo); + } + + const auto file_size_and_count_diff = result_writer.finalize(); + if (file_size_and_count_diff) { + ctx.storage.primary().increment_statistic( + Statistic::cache_size_kibibyte, file_size_and_count_diff->size_kibibyte); + ctx.storage.primary().increment_statistic(Statistic::files_in_cache, + file_size_and_count_diff->count); + } else { + LOG("Error: {}", file_size_and_count_diff.error()); + throw Failure(Statistic::internal_error); + } +} + +// Run the real compiler and put the result in cache. Returns the result key. +static Digest to_cache(Context& ctx, Args& args, + nonstd::optional result_key, const Args& depend_extra_args, Hash* depend_mode_hash) { @@ -1065,13 +1018,14 @@ to_cache(Context& ctx, if (ctx.config.depend_mode()) { ASSERT(depend_mode_hash); - auto result_name = result_name_from_depfile(ctx, *depend_mode_hash); - if (!result_name) { + result_key = result_key_from_depfile(ctx, *depend_mode_hash); + if (!result_key) { throw Failure(Statistic::internal_error); } - ctx.set_result_name(*result_name); } + ASSERT(result_key); + bool produce_dep_file = ctx.args_info.generating_dependencies && ctx.args_info.output_dep != "/dev/null"; @@ -1092,86 +1046,29 @@ to_cache(Context& ctx, throw Failure(Statistic::compiler_produced_empty_output); } - const auto stderr_stat = Stat::stat(tmp_stderr_path, Stat::OnError::log); - if (!stderr_stat) { - throw Failure(Statistic::internal_error); - } - - MTR_BEGIN("file", "file_put"); - - const auto result_file = look_up_cache_file( - ctx.config.cache_dir(), *ctx.result_name(), Result::k_file_suffix); - ctx.set_result_path(result_file.path); - Result::Writer result_writer(ctx, result_file.path); - - if (stderr_stat.size() > 0) { - result_writer.write(Result::FileType::stderr_output, tmp_stderr_path); - } - if (obj_stat) { - result_writer.write(Result::FileType::object, ctx.args_info.output_obj); - } - if (ctx.args_info.generating_dependencies) { - result_writer.write(Result::FileType::dependency, ctx.args_info.output_dep); - } - if (ctx.args_info.generating_coverage) { - const auto coverage_file = find_coverage_file(ctx); - if (!coverage_file.found) { - throw Failure(Statistic::internal_error); - } - result_writer.write(coverage_file.mangled - ? Result::FileType::coverage_mangled - : Result::FileType::coverage_unmangled, - coverage_file.path); - } - if (ctx.args_info.generating_stackusage) { - result_writer.write(Result::FileType::stackusage, ctx.args_info.output_su); - } - if (ctx.args_info.generating_diagnostics) { - result_writer.write(Result::FileType::diagnostic, ctx.args_info.output_dia); - } - if (ctx.args_info.seen_split_dwarf && Stat::stat(ctx.args_info.output_dwo)) { - // Only store .dwo file if it was created by the compiler (GCC and Clang - // behave differently e.g. for "-gsplit-dwarf -g1"). - result_writer.write(Result::FileType::dwarf_object, - ctx.args_info.output_dwo); - } - - const auto file_size_and_count_diff = result_writer.finalize(); - if (file_size_and_count_diff) { - LOG("Stored in cache: {}", result_file.path); - ctx.counter_updates.increment(Statistic::cache_size_kibibyte, - file_size_and_count_diff->size_kibibyte); - ctx.counter_updates.increment(Statistic::files_in_cache, - file_size_and_count_diff->count); - } else { - LOG("Error: {}", file_size_and_count_diff.error()); - } - - auto new_result_stat = Stat::stat(result_file.path, Stat::OnError::log); - if (!new_result_stat) { + MTR_BEGIN("result", "result_put"); + try { + ctx.storage.put( + *result_key, core::CacheEntryType::result, [&](const std::string& path) { + write_result(ctx, path, obj_stat, tmp_stderr_path); + return true; + }); + } catch (const Error& e) { + LOG("Error: {}", e.what()); throw Failure(Statistic::internal_error); } - ctx.counter_updates.increment( - Statistic::cache_size_kibibyte, - Util::size_change_kibibyte(result_file.stat, new_result_stat)); - ctx.counter_updates.increment(Statistic::files_in_cache, - result_file.stat ? 0 : 1); - - MTR_END("file", "file_put"); - - // Make sure we have a CACHEDIR.TAG in the cache part of cache_dir. This can - // be done almost anywhere, but we might as well do it near the end as we save - // the stat call if we exit early. - create_cachedir_tag(ctx); + MTR_END("result", "result_put"); // Everything OK. Util::send_to_stderr(ctx, Util::read_file(tmp_stderr_path)); + + return *result_key; } -// Find the result name by running the compiler in preprocessor mode and +// Find the result key by running the compiler in preprocessor mode and // hashing the result. static Digest -get_result_name_from_cpp(Context& ctx, Args& args, Hash& hash) +get_result_key_from_cpp(Context& ctx, Args& args, Hash& hash) { ctx.time_of_compilation = time(nullptr); @@ -1547,14 +1444,14 @@ option_should_be_ignored(const std::string& arg, } // Update a hash sum with information specific to the direct and preprocessor -// modes and calculate the result name. Returns the result name on success, -// otherwise nullopt. -static optional -calculate_result_name(Context& ctx, - const Args& args, - Args& preprocessor_args, - Hash& hash, - bool direct_mode) +// modes and calculate the result key. Returns the result key on success, and +// if direct_mode is true also the manifest key. +static std::pair, nonstd::optional> +calculate_result_and_manifest_key(Context& ctx, + const Args& args, + Args& preprocessor_args, + Hash& hash, + bool direct_mode) { bool found_ccbin = false; @@ -1773,7 +1670,9 @@ calculate_result_name(Context& ctx, hash.hash(arch); } - optional result_name; + nonstd::optional result_key; + nonstd::optional manifest_key; + if (direct_mode) { // Hash environment variables that affect the preprocessor output. const char* envvars[] = {"CPATH", @@ -1812,42 +1711,38 @@ calculate_result_name(Context& ctx, if (result & HASH_SOURCE_CODE_FOUND_TIME) { LOG_RAW("Disabling direct mode"); ctx.config.set_direct_mode(false); - return nullopt; + return {nullopt, nullopt}; } - const auto manifest_name = hash.digest(); - ctx.set_manifest_name(manifest_name); + manifest_key = hash.digest(); - const auto manifest_file = look_up_cache_file( - ctx.config.cache_dir(), manifest_name, Manifest::k_file_suffix); - ctx.set_manifest_path(manifest_file.path); + const auto manifest_path = + ctx.storage.get(*manifest_key, core::CacheEntryType::manifest); - if (manifest_file.stat) { - LOG("Looking for result name in {}", manifest_file.path); + if (manifest_path) { + LOG("Looking for result key in {}", *manifest_path); MTR_BEGIN("manifest", "manifest_get"); - result_name = Manifest::get(ctx, manifest_file.path); + result_key = Manifest::get(ctx, *manifest_path); MTR_END("manifest", "manifest_get"); - if (result_name) { - LOG_RAW("Got result name from manifest"); + if (result_key) { + LOG_RAW("Got result key from manifest"); } else { - LOG_RAW("Did not find result name in manifest"); + LOG_RAW("Did not find result key in manifest"); } - } else { - LOG("No manifest with name {} in the cache", manifest_name.to_string()); } } else { if (ctx.args_info.arch_args.empty()) { - result_name = get_result_name_from_cpp(ctx, preprocessor_args, hash); - LOG_RAW("Got result name from preprocessor"); + result_key = get_result_key_from_cpp(ctx, preprocessor_args, hash); + LOG_RAW("Got result key from preprocessor"); } else { preprocessor_args.push_back("-arch"); for (size_t i = 0; i < ctx.args_info.arch_args.size(); ++i) { preprocessor_args.push_back(ctx.args_info.arch_args[i]); - result_name = get_result_name_from_cpp(ctx, preprocessor_args, hash); - LOG("Got result name from preprocessor with -arch {}", + result_key = get_result_key_from_cpp(ctx, preprocessor_args, hash); + LOG("Got result key from preprocessor with -arch {}", ctx.args_info.arch_args[i]); if (i != ctx.args_info.arch_args.size() - 1) { - result_name = nullopt; + result_key = nullopt; } preprocessor_args.pop_back(); } @@ -1855,14 +1750,14 @@ calculate_result_name(Context& ctx, } } - return result_name; + return {result_key, manifest_key}; } enum class FromCacheCallMode { direct, cpp }; // Try to return the compile result from cache. static optional -from_cache(Context& ctx, FromCacheCallMode mode) +from_cache(Context& ctx, FromCacheCallMode mode, const Digest& result_key) { UmaskScope umask_scope(ctx.original_umask); @@ -1889,14 +1784,13 @@ from_cache(Context& ctx, FromCacheCallMode mode) MTR_BEGIN("cache", "from_cache"); // Get result from cache. - const auto result_file = look_up_cache_file( - ctx.config.cache_dir(), *ctx.result_name(), Result::k_file_suffix); - if (!result_file.stat) { - LOG("No result with name {} in the cache", ctx.result_name()->to_string()); + const auto result_path = + ctx.storage.get(result_key, core::CacheEntryType::result); + if (!result_path) { return nullopt; } - ctx.set_result_path(result_file.path); - Result::Reader result_reader(result_file.path); + + Result::Reader result_reader(*result_path); ResultRetriever result_retriever( ctx, should_rewrite_dependency_target(ctx.args_info)); @@ -1907,9 +1801,6 @@ from_cache(Context& ctx, FromCacheCallMode mode) return nullopt; } - // Update modification timestamp to save file from LRU cleanup. - Util::update_mtime(*ctx.result_path()); - LOG_RAW("Succeeded getting cached result"); return mode == FromCacheCallMode::direct ? Statistic::direct_cache_hit @@ -1967,6 +1858,7 @@ static void initialize(Context& ctx, int argc, const char* const* argv) { ctx.orig_args = Args::from_argv(argc, argv); + ctx.storage.initialize(); LOG("=== CCACHE {} STARTED =========================================", CCACHE_VERSION); @@ -2014,171 +1906,32 @@ configuration_printer(const std::string& key, static int cache_compilation(int argc, const char* const* argv); static Statistic do_cache_compilation(Context& ctx, const char* const* argv); -static uint8_t -calculate_wanted_cache_level(uint64_t files_in_level_1) +static void +finalize_at_exit(Context& ctx) { - uint64_t files_per_directory = files_in_level_1 / 16; - for (uint8_t i = k_min_cache_levels; i <= k_max_cache_levels; ++i) { - if (files_per_directory < k_max_cache_files_per_directory) { - return i; + try { + if (ctx.config.disable()) { + // Just log result, don't update statistics. + LOG_RAW("Result: disabled"); + return; } - files_per_directory /= 16; - } - return k_max_cache_levels; -} - -static optional -update_stats_and_maybe_move_cache_file(const Context& ctx, - const Digest& name, - const std::string& current_path, - const Counters& counter_updates, - const std::string& file_suffix) -{ - if (counter_updates.all_zero()) { - return nullopt; - } - - // Use stats file in the level one subdirectory for cache bookkeeping counters - // since cleanup is performed on level one. Use stats file in the level two - // subdirectory for other counters to reduce lock contention. - const bool use_stats_on_level_1 = - counter_updates.get(Statistic::cache_size_kibibyte) != 0 - || counter_updates.get(Statistic::files_in_cache) != 0; - std::string level_string = FMT("{:x}", name.bytes()[0] >> 4); - if (!use_stats_on_level_1) { - level_string += FMT("/{:x}", name.bytes()[0] & 0xF); - } - const auto stats_file = - FMT("{}/{}/stats", ctx.config.cache_dir(), level_string); - auto counters = Statistics::update(stats_file, [&counter_updates](auto& cs) { - cs.increment(counter_updates); - }); - if (!counters) { - return nullopt; - } - - if (use_stats_on_level_1) { - // Only consider moving the cache file to another level when we have read - // the level 1 stats file since it's only then we know the proper - // files_in_cache value. - const auto wanted_level = - calculate_wanted_cache_level(counters->get(Statistic::files_in_cache)); - const auto wanted_path = Util::get_path_in_cache( - ctx.config.cache_dir(), wanted_level, name.to_string() + file_suffix); - if (current_path != wanted_path) { - Util::ensure_dir_exists(Util::dir_name(wanted_path)); - LOG("Moving {} to {}", current_path, wanted_path); - try { - Util::rename(current_path, wanted_path); - } catch (const Error&) { - // Two ccache processes may move the file at the same time, so failure - // to rename is OK. + if (!ctx.config.log_file().empty() || ctx.config.debug()) { + const auto result = ctx.storage.primary().get_result_message(); + if (result) { + LOG("Result: {}", *result); } } - } - return counters; -} - -static void -finalize_stats_and_trigger_cleanup(Context& ctx) -{ - const auto& config = ctx.config; - - if (config.disable()) { - // Just log result, don't update statistics. - LOG_RAW("Result: disabled"); - return; - } - - if (!config.log_file().empty() || config.debug()) { - const auto result = Statistics::get_result_message(ctx.counter_updates); - if (result) { - LOG("Result: {}", *result); - } - } - if (!config.stats_log().empty()) { - const auto result_id = Statistics::get_result_id(ctx.counter_updates); - if (result_id) { - Statistics::log_result( - config.stats_log(), ctx.args_info.input_file, *result_id); + if (!ctx.config.stats_log().empty()) { + const auto result_id = ctx.storage.primary().get_result_id(); + if (result_id) { + Statistics::log_result( + ctx.config.stats_log(), ctx.args_info.input_file, *result_id); + } } - } - if (!config.stats()) { - return; - } - - if (!ctx.result_path()) { - ASSERT(ctx.counter_updates.get(Statistic::cache_size_kibibyte) == 0); - ASSERT(ctx.counter_updates.get(Statistic::files_in_cache) == 0); - - // Context::set_result_path hasn't been called yet, so we just choose one of - // the stats files in the 256 level 2 directories. - const auto bucket = getpid() % 256; - const auto stats_file = - FMT("{}/{:x}/{:x}/stats", config.cache_dir(), bucket / 16, bucket % 16); - Statistics::update(stats_file, - [&ctx](auto& cs) { cs.increment(ctx.counter_updates); }); - return; - } - - if (ctx.manifest_path()) { - update_stats_and_maybe_move_cache_file(ctx, - *ctx.manifest_name(), - *ctx.manifest_path(), - ctx.manifest_counter_updates, - Manifest::k_file_suffix); - } - - const auto counters = - update_stats_and_maybe_move_cache_file(ctx, - *ctx.result_name(), - *ctx.result_path(), - ctx.counter_updates, - Result::k_file_suffix); - if (!counters) { - return; - } - - const auto subdir = - FMT("{}/{:x}", config.cache_dir(), ctx.result_name()->bytes()[0] >> 4); - bool need_cleanup = false; - - if (config.max_files() != 0 - && counters->get(Statistic::files_in_cache) > config.max_files() / 16) { - LOG("Need to clean up {} since it holds {} files (limit: {} files)", - subdir, - counters->get(Statistic::files_in_cache), - config.max_files() / 16); - need_cleanup = true; - } - if (config.max_size() != 0 - && counters->get(Statistic::cache_size_kibibyte) - > config.max_size() / 1024 / 16) { - LOG("Need to clean up {} since it holds {} KiB (limit: {} KiB)", - subdir, - counters->get(Statistic::cache_size_kibibyte), - config.max_size() / 1024 / 16); - need_cleanup = true; - } - - if (need_cleanup) { - const double factor = config.limit_multiple() / 16; - const uint64_t max_size = round(config.max_size() * factor); - const uint32_t max_files = round(config.max_files() * factor); - const time_t max_age = 0; - clean_up_dir( - subdir, max_size, max_files, max_age, [](double /*progress*/) {}); - } -} - -static void -finalize_at_exit(Context& ctx) -{ - try { - finalize_stats_and_trigger_cleanup(ctx); + ctx.storage.finalize(); } catch (const ErrorBase& e) { // finalize_at_exit must not throw since it's called by a destructor. LOG("Error while finalizing stats: {}", e.what()); @@ -2215,10 +1968,10 @@ cache_compilation(int argc, const char* const* argv) try { Statistic statistic = do_cache_compilation(ctx, argv); - ctx.counter_updates.increment(statistic); + ctx.storage.primary().increment_statistic(statistic); } catch (const Failure& e) { if (e.statistic() != Statistic::none) { - ctx.counter_updates.increment(e.statistic()); + ctx.storage.primary().increment_statistic(e.statistic()); } if (e.exit_code()) { @@ -2266,18 +2019,13 @@ do_cache_compilation(Context& ctx, const char* const* argv) throw Failure(Statistic::internal_error); } - MTR_BEGIN("main", "clean_up_internal_tempdir"); - if (ctx.config.temporary_dir() == ctx.config.cache_dir() + "/tmp") { - clean_up_internal_tempdir(ctx.config); - } - MTR_END("main", "clean_up_internal_tempdir"); - if (!ctx.config.log_file().empty() || ctx.config.debug()) { ctx.config.visit_items(configuration_logger); } // Guess compiler after logging the config value in order to be able to - // display "compiler_type = auto" before overwriting the value with the guess. + // display "compiler_type = auto" before overwriting the value with the + // guess. if (ctx.config.compiler_type() == CompilerType::auto_guess) { ctx.config.set_compiler_type(guess_compiler(ctx.orig_args[0])); } @@ -2367,20 +2115,20 @@ do_cache_compilation(Context& ctx, const char* const* argv) args_to_hash.push_back(processed.extra_args_to_hash); bool put_result_in_manifest = false; - optional result_name; - optional result_name_from_manifest; + optional result_key; + optional result_key_from_manifest; + optional manifest_key; + if (ctx.config.direct_mode()) { LOG_RAW("Trying direct lookup"); MTR_BEGIN("hash", "direct_hash"); Args dummy_args; - result_name = - calculate_result_name(ctx, args_to_hash, dummy_args, direct_hash, true); + std::tie(result_key, manifest_key) = calculate_result_and_manifest_key( + ctx, args_to_hash, dummy_args, direct_hash, true); MTR_END("hash", "direct_hash"); - if (result_name) { - ctx.set_result_name(*result_name); - + if (result_key) { // If we can return from cache at this point then do so. - auto result = from_cache(ctx, FromCacheCallMode::direct); + auto result = from_cache(ctx, FromCacheCallMode::direct, *result_key); if (result) { return *result; } @@ -2389,7 +2137,7 @@ do_cache_compilation(Context& ctx, const char* const* argv) // was already found in manifest, so don't re-add it later. put_result_in_manifest = false; - result_name_from_manifest = result_name; + result_key_from_manifest = result_key; } else { // Add result to manifest later. put_result_in_manifest = true; @@ -2408,21 +2156,17 @@ do_cache_compilation(Context& ctx, const char* const* argv) init_hash_debug(ctx, cpp_hash, 'p', "PREPROCESSOR MODE", debug_text_file); MTR_BEGIN("hash", "cpp_hash"); - result_name = calculate_result_name( - ctx, args_to_hash, processed.preprocessor_args, cpp_hash, false); + result_key = + calculate_result_and_manifest_key( + ctx, args_to_hash, processed.preprocessor_args, cpp_hash, false) + .first; MTR_END("hash", "cpp_hash"); - // calculate_result_name does not return nullopt if the last (direct_mode) - // argument is false. - ASSERT(result_name); - ctx.set_result_name(*result_name); - - if (result_name_from_manifest && result_name_from_manifest != result_name) { - // manifest_path is guaranteed to be set when calculate_result_name - // returns a non-nullopt result in direct mode, i.e. when - // result_name_from_manifest is set. - ASSERT(ctx.manifest_path()); + // calculate_result_and_manifest_key always returns a non-nullopt result_key + // if the last argument (direct_mode) is false. + ASSERT(result_key); + if (result_key_from_manifest && result_key_from_manifest != result_key) { // The hash from manifest differs from the hash of the preprocessor // output. This could be because: // @@ -2438,16 +2182,16 @@ do_cache_compilation(Context& ctx, const char* const* argv) LOG_RAW("Hash from manifest doesn't match preprocessor output"); LOG_RAW("Likely reason: different CCACHE_BASEDIRs used"); LOG_RAW("Removing manifest as a safety measure"); - Util::unlink_safe(*ctx.manifest_path()); + ctx.storage.remove(*result_key, core::CacheEntryType::result); put_result_in_manifest = true; } // If we can return from cache at this point then do. - auto result = from_cache(ctx, FromCacheCallMode::cpp); + const auto result = from_cache(ctx, FromCacheCallMode::cpp, *result_key); if (result) { - if (put_result_in_manifest) { - update_manifest_file(ctx); + if (manifest_key && put_result_in_manifest) { + update_manifest_file(ctx, *manifest_key, *result_key); } return *result; } @@ -2465,11 +2209,15 @@ do_cache_compilation(Context& ctx, const char* const* argv) // Run real compiler, sending output to cache. MTR_BEGIN("cache", "to_cache"); - to_cache(ctx, - processed.compiler_args, - ctx.args_info.depend_extra_args, - depend_mode_hash); - update_manifest_file(ctx); + result_key = to_cache(ctx, + processed.compiler_args, + result_key, + ctx.args_info.depend_extra_args, + depend_mode_hash); + if (ctx.config.direct_mode()) { + ASSERT(manifest_key); + update_manifest_file(ctx, *manifest_key, *result_key); + } MTR_END("cache", "to_cache"); return Statistic::cache_miss; @@ -2754,8 +2502,8 @@ ccache_main(int argc, const char* const* argv) PRINT(stderr, USAGE_TEXT, CCACHE_NAME, CCACHE_NAME); exit(EXIT_FAILURE); } - // If the first argument isn't an option, then assume we are being passed - // a compiler name and options. + // If the first argument isn't an option, then assume we are being + // passed a compiler name and options. if (argv[1][0] == '-') { return handle_main_options(argc, argv); } diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt new file mode 100644 index 000000000..169216b4d --- /dev/null +++ b/src/core/CMakeLists.txt @@ -0,0 +1,6 @@ +# set( +# sources +# ${CMAKE_CURRENT_SOURCE_DIR}/file.cpp +# ) + +# target_sources(ccache_lib PRIVATE ${sources}) diff --git a/src/core/types.hpp b/src/core/types.hpp new file mode 100644 index 000000000..42179c1c0 --- /dev/null +++ b/src/core/types.hpp @@ -0,0 +1,29 @@ +// Copyright (C) 2021 Joel Rosdahl and other contributors +// +// See doc/AUTHORS.adoc for a complete list of contributors. +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License as published by the Free +// Software Foundation; either version 3 of the License, or (at your option) +// any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +// more details. +// +// You should have received a copy of the GNU General Public License along with +// this program; if not, write to the Free Software Foundation, Inc., 51 +// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +#pragma once + +// System headers +#include +// End of system headers + +namespace core { + +enum class CacheEntryType { result, manifest }; + +} // namespace core diff --git a/src/storage/CMakeLists.txt b/src/storage/CMakeLists.txt new file mode 100644 index 000000000..70694d030 --- /dev/null +++ b/src/storage/CMakeLists.txt @@ -0,0 +1,8 @@ +add_subdirectory(primary) + +set( + sources + ${CMAKE_CURRENT_SOURCE_DIR}/Storage.cpp +) + +target_sources(ccache_lib PRIVATE ${sources}) diff --git a/src/storage/Storage.cpp b/src/storage/Storage.cpp new file mode 100644 index 000000000..14d555e57 --- /dev/null +++ b/src/storage/Storage.cpp @@ -0,0 +1,65 @@ +// Copyright (C) 2021 Joel Rosdahl and other contributors +// +// See doc/AUTHORS.adoc for a complete list of contributors. +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License as published by the Free +// Software Foundation; either version 3 of the License, or (at your option) +// any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +// more details. +// +// You should have received a copy of the GNU General Public License along with +// this program; if not, write to the Free Software Foundation, Inc., 51 +// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +#include "Storage.hpp" + +namespace storage { + +Storage::Storage(const Config& config) : m_primary_storage(config) +{ +} + +void +Storage::initialize() +{ + m_primary_storage.initialize(); +} + +void +Storage::finalize() +{ + m_primary_storage.finalize(); +} + +primary::PrimaryStorage& +Storage::primary() +{ + return m_primary_storage; +} + +nonstd::optional +Storage::get(const Digest& key, const core::CacheEntryType type) +{ + return m_primary_storage.get(key, type); +} + +bool +Storage::put(const Digest& key, + const core::CacheEntryType type, + const storage::CacheEntryWriter& entry_writer) +{ + return m_primary_storage.put(key, type, entry_writer).has_value(); +} + +void +Storage::remove(const Digest& key, const core::CacheEntryType type) +{ + m_primary_storage.remove(key, type); +} + +} // namespace storage diff --git a/src/storage/Storage.hpp b/src/storage/Storage.hpp new file mode 100644 index 000000000..f6af9de39 --- /dev/null +++ b/src/storage/Storage.hpp @@ -0,0 +1,61 @@ +// Copyright (C) 2021 Joel Rosdahl and other contributors +// +// See doc/AUTHORS.adoc for a complete list of contributors. +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License as published by the Free +// Software Foundation; either version 3 of the License, or (at your option) +// any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +// more details. +// +// You should have received a copy of the GNU General Public License along with +// this program; if not, write to the Free Software Foundation, Inc., 51 +// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +#pragma once + +#include "types.hpp" + +#include +#include + +#include + +// System headers +#include +#include +// End of system headers + +class Digest; + +namespace storage { + +class Storage +{ +public: + Storage(const Config& config); + + void initialize(); + void finalize(); + + primary::PrimaryStorage& primary(); + + // Returns a path to a file containing the value. + nonstd::optional get(const Digest& key, + core::CacheEntryType type); + + bool put(const Digest& key, + core::CacheEntryType type, + const storage::CacheEntryWriter& entry_writer); + + void remove(const Digest& key, core::CacheEntryType type); + +private: + primary::PrimaryStorage m_primary_storage; +}; + +} // namespace storage diff --git a/src/storage/primary/CMakeLists.txt b/src/storage/primary/CMakeLists.txt new file mode 100644 index 000000000..f74323c6e --- /dev/null +++ b/src/storage/primary/CMakeLists.txt @@ -0,0 +1,6 @@ +set( + sources + ${CMAKE_CURRENT_SOURCE_DIR}/PrimaryStorage.cpp +) + +target_sources(ccache_lib PRIVATE ${sources}) diff --git a/src/storage/primary/PrimaryStorage.cpp b/src/storage/primary/PrimaryStorage.cpp new file mode 100644 index 000000000..df8d7b6aa --- /dev/null +++ b/src/storage/primary/PrimaryStorage.cpp @@ -0,0 +1,386 @@ +// Copyright (C) 2021 Joel Rosdahl and other contributors +// +// See doc/AUTHORS.adoc for a complete list of contributors. +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License as published by the Free +// Software Foundation; either version 3 of the License, or (at your option) +// any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +// more details. +// +// You should have received a copy of the GNU General Public License along with +// this program; if not, write to the Free Software Foundation, Inc., 51 +// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +#include "PrimaryStorage.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace storage { +namespace primary { + +// How often (in seconds) to scan $CCACHE_DIR/tmp for left-over temporary +// files. +const int k_tempdir_cleanup_interval = 2 * 24 * 60 * 60; // 2 days + +// Maximum files per cache directory. This constant is somewhat arbitrarily +// chosen to be large enough to avoid unnecessary cache levels but small enough +// not to make esoteric file systems (with bad performance for large +// directories) too slow. It could be made configurable, but hopefully there +// will be no need to do that. +const uint64_t k_max_cache_files_per_directory = 2000; + +// Minimum number of cache levels ($CCACHE_DIR/1/2/stored_file). +const uint8_t k_min_cache_levels = 2; + +// Maximum number of cache levels ($CCACHE_DIR/1/2/3/stored_file). +// +// On a cache miss, (k_max_cache_levels - k_min_cache_levels + 1) cache lookups +// (i.e. stat system calls) will be performed for a cache entry. +// +// An assumption made here is that if a cache is so large that it holds more +// than 16^4 * k_max_cache_files_per_directory files then we can assume that the +// file system is sane enough to handle more than +// k_max_cache_files_per_directory. +const uint8_t k_max_cache_levels = 4; + +static std::string +suffix_from_type(const core::CacheEntryType type) +{ + switch (type) { + case core::CacheEntryType::manifest: + return "M"; + + case core::CacheEntryType::result: + return "R"; + } + + ASSERT(false); +} + +static uint8_t +calculate_wanted_cache_level(const uint64_t files_in_level_1) +{ + uint64_t files_per_directory = files_in_level_1 / 16; + for (uint8_t i = k_min_cache_levels; i <= k_max_cache_levels; ++i) { + if (files_per_directory < k_max_cache_files_per_directory) { + return i; + } + files_per_directory /= 16; + } + return k_max_cache_levels; +} + +PrimaryStorage::PrimaryStorage(const Config& config) : m_config(config) +{ +} + +void +PrimaryStorage::initialize() +{ + MTR_BEGIN("primary_storage", "clean_up_internal_tempdir"); + if (m_config.temporary_dir() == m_config.cache_dir() + "/tmp") { + clean_up_internal_tempdir(); + } + MTR_END("primary_storage", "clean_up_internal_tempdir"); +} + +void +PrimaryStorage::finalize() +{ + if (!m_config.stats()) { + return; + } + + if (m_manifest_key) { + // A manifest entry was written. + ASSERT(!m_manifest_path.empty()); + update_stats_and_maybe_move_cache_file(*m_manifest_key, + m_manifest_path, + m_manifest_counter_updates, + core::CacheEntryType::manifest); + } + + if (!m_result_key) { + // No result entry was written, so we just choose one of the stats files in + // the 256 level 2 directories. + + ASSERT(m_result_counter_updates.get(Statistic::cache_size_kibibyte) == 0); + ASSERT(m_result_counter_updates.get(Statistic::files_in_cache) == 0); + + const auto bucket = getpid() % 256; + const auto stats_file = + FMT("{}/{:x}/{:x}/stats", m_config.cache_dir(), bucket / 16, bucket % 16); + Statistics::update( + stats_file, [&](auto& cs) { cs.increment(m_result_counter_updates); }); + return; + } + + ASSERT(!m_result_path.empty()); + + const auto counters = + update_stats_and_maybe_move_cache_file(*m_result_key, + m_result_path, + m_result_counter_updates, + core::CacheEntryType::result); + if (!counters) { + return; + } + + const auto subdir = + FMT("{}/{:x}", m_config.cache_dir(), m_result_key->bytes()[0] >> 4); + bool need_cleanup = false; + + if (m_config.max_files() != 0 + && counters->get(Statistic::files_in_cache) > m_config.max_files() / 16) { + LOG("Need to clean up {} since it holds {} files (limit: {} files)", + subdir, + counters->get(Statistic::files_in_cache), + m_config.max_files() / 16); + need_cleanup = true; + } + if (m_config.max_size() != 0 + && counters->get(Statistic::cache_size_kibibyte) + > m_config.max_size() / 1024 / 16) { + LOG("Need to clean up {} since it holds {} KiB (limit: {} KiB)", + subdir, + counters->get(Statistic::cache_size_kibibyte), + m_config.max_size() / 1024 / 16); + need_cleanup = true; + } + + if (need_cleanup) { + const double factor = m_config.limit_multiple() / 16; + const uint64_t max_size = round(m_config.max_size() * factor); + const uint32_t max_files = round(m_config.max_files() * factor); + const time_t max_age = 0; + clean_up_dir( + subdir, max_size, max_files, max_age, [](double /*progress*/) {}); + } +} + +nonstd::optional +PrimaryStorage::get(const Digest& key, const core::CacheEntryType type) const +{ + const auto cache_file = look_up_cache_file(key, type); + if (!cache_file.stat) { + LOG("No {} in primary storage", key.to_string()); + return nonstd::nullopt; + } + + LOG( + "Retrieved {} from primary storage ({})", key.to_string(), cache_file.path); + + // Update modification timestamp to save file from LRU cleanup. + Util::update_mtime(cache_file.path); + return cache_file.path; +} + +nonstd::optional +PrimaryStorage::put(const Digest& key, + const core::CacheEntryType type, + const storage::CacheEntryWriter& entry_writer) +{ + const auto cache_file = look_up_cache_file(key, type); + switch (type) { + case core::CacheEntryType::manifest: + m_manifest_key = key; + m_manifest_path = cache_file.path; + break; + + case core::CacheEntryType::result: + m_result_key = key; + m_result_path = cache_file.path; + break; + } + + if (!entry_writer(cache_file.path)) { + LOG("Did not store {} in primary storage", key.to_string()); + return nonstd::nullopt; + } + + const auto new_stat = Stat::stat(cache_file.path, Stat::OnError::log); + if (!new_stat) { + LOG("Failed to stat {}: {}", cache_file.path, strerror(errno)); + return nonstd::nullopt; + } + + LOG("Stored {} in primary storage ({})", key.to_string(), cache_file.path); + + auto& counter_updates = (type == core::CacheEntryType::manifest) + ? m_manifest_counter_updates + : m_result_counter_updates; + counter_updates.increment( + Statistic::cache_size_kibibyte, + Util::size_change_kibibyte(cache_file.stat, new_stat)); + counter_updates.increment(Statistic::files_in_cache, cache_file.stat ? 0 : 1); + + // Make sure we have a CACHEDIR.TAG in the cache part of cache_dir. This can + // be done almost anywhere, but we might as well do it near the end as we save + // the stat call if we exit early. + util::create_cachedir_tag( + FMT("{}/{}", m_config.cache_dir(), key.to_string()[0])); + + return cache_file.path; +} + +void +PrimaryStorage::remove(const Digest& key, const core::CacheEntryType type) +{ + const auto cache_file = look_up_cache_file(key, type); + if (cache_file.stat) { + Util::unlink_safe(cache_file.path); + LOG( + "Removed {} from primary storage ({})", key.to_string(), cache_file.path); + } else { + LOG("No {} to remove from primary storage", key.to_string()); + } +} + +void +PrimaryStorage::increment_statistic(const Statistic statistic, + const int64_t value) +{ + m_result_counter_updates.increment(statistic, value); +} + +// Return a machine-readable string representing the final ccache result, or +// nullopt if there was no result. +nonstd::optional +PrimaryStorage::get_result_id() const +{ + return Statistics::get_result_id(m_result_counter_updates); +} + +// Return a human-readable string representing the final ccache result, or +// nullopt if there was no result. +nonstd::optional +PrimaryStorage::get_result_message() const +{ + return Statistics::get_result_message(m_result_counter_updates); +} + +// Private methods + +PrimaryStorage::LookUpCacheFileResult +PrimaryStorage::look_up_cache_file(const Digest& key, + const core::CacheEntryType type) const +{ + const auto key_string = FMT("{}{}", key.to_string(), suffix_from_type(type)); + + for (uint8_t level = k_min_cache_levels; level <= k_max_cache_levels; + ++level) { + const auto path = + Util::get_path_in_cache(m_config.cache_dir(), level, key_string); + const auto stat = Stat::stat(path); + if (stat) { + return {path, stat, level}; + } + } + + const auto shallowest_path = Util::get_path_in_cache( + m_config.cache_dir(), k_min_cache_levels, key_string); + return {shallowest_path, Stat(), k_min_cache_levels}; +} + +void +PrimaryStorage::clean_up_internal_tempdir() +{ + const time_t now = time(nullptr); + const auto dir_st = Stat::stat(m_config.cache_dir(), Stat::OnError::log); + if (!dir_st || dir_st.mtime() + k_tempdir_cleanup_interval >= now) { + // No cleanup needed. + return; + } + + Util::update_mtime(m_config.cache_dir()); + + const std::string& temp_dir = m_config.temporary_dir(); + if (!Stat::lstat(temp_dir)) { + return; + } + + Util::traverse(temp_dir, [now](const std::string& path, bool is_dir) { + if (is_dir) { + return; + } + const auto st = Stat::lstat(path, Stat::OnError::log); + if (st && st.mtime() + k_tempdir_cleanup_interval < now) { + Util::unlink_tmp(path); + } + }); +} + +nonstd::optional +PrimaryStorage::update_stats_and_maybe_move_cache_file( + const Digest& key, + const std::string& current_path, + const Counters& counter_updates, + const core::CacheEntryType type) +{ + if (counter_updates.all_zero()) { + return nonstd::nullopt; + } + + // Use stats file in the level one subdirectory for cache bookkeeping counters + // since cleanup is performed on level one. Use stats file in the level two + // subdirectory for other counters to reduce lock contention. + const bool use_stats_on_level_1 = + counter_updates.get(Statistic::cache_size_kibibyte) != 0 + || counter_updates.get(Statistic::files_in_cache) != 0; + std::string level_string = FMT("{:x}", key.bytes()[0] >> 4); + if (!use_stats_on_level_1) { + level_string += FMT("/{:x}", key.bytes()[0] & 0xF); + } + const auto stats_file = + FMT("{}/{}/stats", m_config.cache_dir(), level_string); + + const auto counters = + Statistics::update(stats_file, [&counter_updates](auto& cs) { + cs.increment(counter_updates); + }); + if (!counters) { + return nonstd::nullopt; + } + + if (use_stats_on_level_1) { + // Only consider moving the cache file to another level when we have read + // the level 1 stats file since it's only then we know the proper + // files_in_cache value. + const auto wanted_level = + calculate_wanted_cache_level(counters->get(Statistic::files_in_cache)); + const auto wanted_path = + Util::get_path_in_cache(m_config.cache_dir(), + wanted_level, + key.to_string() + suffix_from_type(type)); + if (current_path != wanted_path) { + Util::ensure_dir_exists(Util::dir_name(wanted_path)); + LOG("Moving {} to {}", current_path, wanted_path); + try { + Util::rename(current_path, wanted_path); + } catch (const Error&) { + // Two ccache processes may move the file at the same time, so failure + // to rename is OK. + } + } + } + return counters; +} + +} // namespace primary +} // namespace storage diff --git a/src/storage/primary/PrimaryStorage.hpp b/src/storage/primary/PrimaryStorage.hpp new file mode 100644 index 000000000..1bc97ab59 --- /dev/null +++ b/src/storage/primary/PrimaryStorage.hpp @@ -0,0 +1,102 @@ +// Copyright (C) 2021 Joel Rosdahl and other contributors +// +// See doc/AUTHORS.adoc for a complete list of contributors. +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License as published by the Free +// Software Foundation; either version 3 of the License, or (at your option) +// any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +// more details. +// +// You should have received a copy of the GNU General Public License along with +// this program; if not, write to the Free Software Foundation, Inc., 51 +// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +#pragma once + +#include +#include +#include +#include + +#include + +class Config; +class Counters; + +namespace storage { +namespace primary { + +class PrimaryStorage +{ +public: + PrimaryStorage(const Config& config); + + void initialize(); + void finalize(); + + // Returns a path to a file containing the value. + nonstd::optional get(const Digest& key, + core::CacheEntryType type) const; + + nonstd::optional + put(const Digest& key, + core::CacheEntryType type, + const storage::CacheEntryWriter& entry_writer); + + void remove(const Digest& key, core::CacheEntryType type); + + void increment_statistic(Statistic statistic, int64_t value = 1); + + // Return a machine-readable string representing the final ccache result, or + // nullopt if there was no result. + nonstd::optional get_result_id() const; + + // Return a human-readable string representing the final ccache result, or + // nullopt if there was no result. + nonstd::optional get_result_message() const; + +private: + const Config& m_config; + + // Main statistics updates (result statistics and size/count change for result + // file) which get written into the statistics file belonging to the result + // file. + Counters m_result_counter_updates; + + // Statistics updates (only for manifest size/count change) which get written + // into the statistics file belonging to the manifest. + Counters m_manifest_counter_updates; + + // The manifest and result keys and paths are stored by put() so that + // finalize() can use them to move the files in place. + nonstd::optional m_manifest_key; + nonstd::optional m_result_key; + std::string m_manifest_path; + std::string m_result_path; + + struct LookUpCacheFileResult + { + std::string path; + Stat stat; + uint8_t level; + }; + + LookUpCacheFileResult look_up_cache_file(const Digest& key, + core::CacheEntryType type) const; + + void clean_up_internal_tempdir(); + + nonstd::optional + update_stats_and_maybe_move_cache_file(const Digest& key, + const std::string& current_path, + const Counters& counter_updates, + core::CacheEntryType type); +}; + +} // namespace primary +} // namespace storage diff --git a/src/storage/types.hpp b/src/storage/types.hpp new file mode 100644 index 000000000..1a77f5f99 --- /dev/null +++ b/src/storage/types.hpp @@ -0,0 +1,33 @@ +// Copyright (C) 2021 Joel Rosdahl and other contributors +// +// See doc/AUTHORS.adoc for a complete list of contributors. +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License as published by the Free +// Software Foundation; either version 3 of the License, or (at your option) +// any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +// more details. +// +// You should have received a copy of the GNU General Public License along with +// this program; if not, write to the Free Software Foundation, Inc., 51 +// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +#pragma once + +// System headers +#include +#include +#include +// End of system headers + +namespace storage { + +using AttributeMap = + std::unordered_map; +using CacheEntryWriter = std::function; + +} // namespace storage diff --git a/src/util/CMakeLists.txt b/src/util/CMakeLists.txt index 3401cd6f1..0ca3a4de3 100644 --- a/src/util/CMakeLists.txt +++ b/src/util/CMakeLists.txt @@ -1,5 +1,6 @@ set( sources + ${CMAKE_CURRENT_SOURCE_DIR}/file_utils.cpp ${CMAKE_CURRENT_SOURCE_DIR}/Tokenizer.cpp ) diff --git a/src/util/file_utils.cpp b/src/util/file_utils.cpp new file mode 100644 index 000000000..a7a6ca9c3 --- /dev/null +++ b/src/util/file_utils.cpp @@ -0,0 +1,48 @@ +// Copyright (C) 2021 Joel Rosdahl and other contributors +// +// See doc/AUTHORS.adoc for a complete list of contributors. +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License as published by the Free +// Software Foundation; either version 3 of the License, or (at your option) +// any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +// more details. +// +// You should have received a copy of the GNU General Public License along with +// this program; if not, write to the Free Software Foundation, Inc., 51 +// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +#include "file_utils.hpp" + +#include +#include +#include + +namespace util { + +void +create_cachedir_tag(const std::string& dir) +{ + constexpr char cachedir_tag[] = + "Signature: 8a477f597d28d172789f06886806bc55\n" + "# This file is a cache directory tag created by ccache.\n" + "# For information about cache directory tags, see:\n" + "#\thttp://www.brynosaurus.com/cachedir/\n"; + + const std::string path = FMT("{}/CACHEDIR.TAG", dir); + const auto stat = Stat::stat(path); + if (stat) { + return; + } + try { + Util::write_file(path, cachedir_tag); + } catch (const Error& e) { + LOG("Failed to create {}: {}", path, e.what()); + } +} + +} // namespace util diff --git a/src/util/file_utils.hpp b/src/util/file_utils.hpp new file mode 100644 index 000000000..cf5bd2871 --- /dev/null +++ b/src/util/file_utils.hpp @@ -0,0 +1,29 @@ +// Copyright (C) 2021 Joel Rosdahl and other contributors +// +// See doc/AUTHORS.adoc for a complete list of contributors. +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License as published by the Free +// Software Foundation; either version 3 of the License, or (at your option) +// any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +// more details. +// +// You should have received a copy of the GNU General Public License along with +// this program; if not, write to the Free Software Foundation, Inc., 51 +// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +#pragma once + +// System headers +#include +// End of system headers + +namespace util { + +void create_cachedir_tag(const std::string& dir); + +} -- 2.47.3