From 5ceae21a9a43de511135d1a2a4a8d3244f866dd8 Mon Sep 17 00:00:00 2001 From: Joel Rosdahl Date: Tue, 20 Jul 2021 13:46:37 +0200 Subject: [PATCH] Move code for cleanup, compressing and similar to storage/primary --- src/CMakeLists.txt | 3 - src/Util.cpp | 47 ------ src/Util.hpp | 35 +---- src/ccache.cpp | 25 +-- src/cleanup.hpp | 43 ------ src/compress.hpp | 40 ----- src/storage/Storage.hpp | 3 +- src/storage/primary/CMakeLists.txt | 4 + src/{ => storage/primary}/CacheFile.cpp | 5 +- src/{ => storage/primary}/CacheFile.hpp | 4 +- src/storage/primary/PrimaryStorage.cpp | 12 +- src/storage/primary/PrimaryStorage.hpp | 29 +++- .../primary/PrimaryStorage_cleanup.cpp} | 95 ++++++------ .../primary/PrimaryStorage_compress.cpp} | 142 +++++++++--------- src/storage/primary/util.cpp | 75 +++++++++ src/storage/primary/util.hpp | 64 ++++++++ src/storage/secondary/FileStorage.hpp | 2 +- src/storage/secondary/HttpStorage.hpp | 2 +- src/storage/secondary/RedisStorage.hpp | 2 +- unittest/CMakeLists.txt | 1 + unittest/test_Util.cpp | 78 ---------- unittest/test_storage_primary_util.cpp | 120 +++++++++++++++ 22 files changed, 442 insertions(+), 389 deletions(-) delete mode 100644 src/cleanup.hpp delete mode 100644 src/compress.hpp rename src/{ => storage/primary}/CacheFile.cpp (96%) rename src/{ => storage/primary}/CacheFile.hpp (95%) rename src/{cleanup.cpp => storage/primary/PrimaryStorage_cleanup.cpp} (74%) rename src/{compress.cpp => storage/primary/PrimaryStorage_compress.cpp} (75%) create mode 100644 src/storage/primary/util.cpp create mode 100644 src/storage/primary/util.hpp create mode 100644 unittest/test_storage_primary_util.cpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index f699f96e7..f2c243252 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -4,7 +4,6 @@ set( AtomicFile.cpp CacheEntryReader.cpp CacheEntryWriter.cpp - CacheFile.cpp Config.cpp Context.cpp Counters.cpp @@ -28,9 +27,7 @@ set( argprocessing.cpp assertions.cpp ccache.cpp - cleanup.cpp compopt.cpp - compress.cpp execute.cpp hashutil.cpp language.cpp diff --git a/src/Util.cpp b/src/Util.cpp index 2e3c210ac..d8b856572 100644 --- a/src/Util.cpp +++ b/src/Util.cpp @@ -514,22 +514,6 @@ fallocate(int fd, long new_size) #endif } -void -for_each_level_1_subdir(const std::string& cache_dir, - const SubdirVisitor& visitor, - const ProgressReceiver& progress_receiver) -{ - for (int i = 0; i <= 0xF; i++) { - double progress = 1.0 * i / 16; - progress_receiver(progress); - std::string subdir_path = FMT("{}/{:x}", cache_dir, i); - visitor(subdir_path, [&](double inner_progress) { - progress_receiver(progress + inner_progress / 16); - }); - } - progress_receiver(1.0); -} - std::string format_argv_for_logging(const char* const* argv) { @@ -662,37 +646,6 @@ get_extension(string_view path) } } -std::vector -get_level_1_files(const std::string& dir, - const ProgressReceiver& progress_receiver) -{ - std::vector files; - - if (!Stat::stat(dir)) { - return files; - } - - size_t level_2_directories = 0; - - Util::traverse(dir, [&](const std::string& path, bool is_dir) { - auto name = Util::base_name(path); - if (name == "CACHEDIR.TAG" || name == "stats" || name.starts_with(".nfs")) { - return; - } - - if (!is_dir) { - files.emplace_back(path); - } else if (path != dir - && path.find('/', dir.size() + 1) == std::string::npos) { - ++level_2_directories; - progress_receiver(level_2_directories / 16.0); - } - }); - - progress_receiver(1.0); - return files; -} - std::string get_home_directory() { diff --git a/src/Util.hpp b/src/Util.hpp index 42c2e5239..7a3d116e2 100644 --- a/src/Util.hpp +++ b/src/Util.hpp @@ -18,8 +18,7 @@ #pragma once -#include "CacheFile.hpp" - +#include #include #include "third_party/nonstd/optional.hpp" @@ -39,9 +38,6 @@ class Context; namespace Util { using DataReceiver = std::function; -using ProgressReceiver = std::function; -using SubdirVisitor = std::function; using TraverseVisitor = std::function; @@ -146,17 +142,6 @@ void ensure_dir_exists(nonstd::string_view dir); // Returns 0 on success, an error number otherwise. int fallocate(int fd, long new_size); -// Call a function for each subdir (0-9a-f) in the cache. -// -// Parameters: -// - cache_dir: Path to the cache directory. -// - visitor: Function to call with directory path and progress_receiver as -// arguments. -// - progress_receiver: Function that will be called for progress updates. -void for_each_level_1_subdir(const std::string& cache_dir, - const SubdirVisitor& visitor, - const ProgressReceiver& progress_receiver); - // Format `argv` as a simple string for logging purposes. That is, the result is // not intended to be machine parsable. `argv` must be terminated by a nullptr. std::string format_argv_for_logging(const char* const* argv); @@ -189,24 +174,6 @@ std::string get_apparent_cwd(const std::string& actual_cwd); // `path` has no file extension, an empty string_view is returned. nonstd::string_view get_extension(nonstd::string_view path); -// Get a list of files in a level 1 subdirectory of the cache. -// -// The function works under the assumption that directory entries with one -// character names (except ".") are subdirectories and that there are no other -// subdirectories. -// -// Files ignored: -// - CACHEDIR.TAG -// - stats -// - .nfs* (temporary NFS files that may be left for open but deleted files). -// -// Parameters: -// - dir: The directory to traverse recursively. -// - progress_receiver: Function that will be called for progress updates. -std::vector -get_level_1_files(const std::string& dir, - const ProgressReceiver& progress_receiver); - // Return the current user's home directory, or throw `Fatal` if it can't // be determined. std::string get_home_directory(); diff --git a/src/ccache.cpp b/src/ccache.cpp index 74197d227..2aba2e292 100644 --- a/src/ccache.cpp +++ b/src/ccache.cpp @@ -45,9 +45,7 @@ #include "Util.hpp" #include "Win32Util.hpp" #include "argprocessing.hpp" -#include "cleanup.hpp" #include "compopt.hpp" -#include "compress.hpp" #include "execute.hpp" #include "fmtmacros.hpp" #include "hashutil.hpp" @@ -2319,8 +2317,8 @@ handle_main_options(int argc, const char* const* argv) case EVICT_OLDER_THAN: { auto seconds = Util::parse_duration(arg); ProgressBar progress_bar("Evicting..."); - clean_old( - ctx, [&](double progress) { progress_bar.update(progress); }, seconds); + ctx.storage.primary().clean_old( + [&](double progress) { progress_bar.update(progress); }, seconds); if (isatty(STDOUT_FILENO)) { PRINT_RAW(stdout, "\n"); } @@ -2361,8 +2359,8 @@ handle_main_options(int argc, const char* const* argv) case 'c': // --cleanup { ProgressBar progress_bar("Cleaning..."); - clean_up_all(ctx.config, - [&](double progress) { progress_bar.update(progress); }); + ctx.storage.primary().clean_all( + [&](double progress) { progress_bar.update(progress); }); if (isatty(STDOUT_FILENO)) { PRINT_RAW(stdout, "\n"); } @@ -2372,10 +2370,14 @@ handle_main_options(int argc, const char* const* argv) case 'C': // --clear { ProgressBar progress_bar("Clearing..."); - wipe_all(ctx, [&](double progress) { progress_bar.update(progress); }); + ctx.storage.primary().wipe_all( + [&](double progress) { progress_bar.update(progress); }); if (isatty(STDOUT_FILENO)) { PRINT_RAW(stdout, "\n"); } +#ifdef INODE_CACHE_SUPPORTED + ctx.inode_cache.drop(); +#endif break; } @@ -2466,8 +2468,8 @@ handle_main_options(int argc, const char* const* argv) case 'x': // --show-compression { ProgressBar progress_bar("Scanning..."); - compress_stats(ctx.config, - [&](double progress) { progress_bar.update(progress); }); + ctx.storage.primary().print_compression_statistics( + [&](double progress) { progress_bar.update(progress); }); break; } @@ -2482,9 +2484,8 @@ handle_main_options(int argc, const char* const* argv) } ProgressBar progress_bar("Recompressing..."); - compress_recompress(ctx, wanted_level, [&](double progress) { - progress_bar.update(progress); - }); + ctx.storage.primary().recompress( + wanted_level, [&](double progress) { progress_bar.update(progress); }); break; } diff --git a/src/cleanup.hpp b/src/cleanup.hpp deleted file mode 100644 index 97d5cb75c..000000000 --- a/src/cleanup.hpp +++ /dev/null @@ -1,43 +0,0 @@ -// Copyright (C) 2019-2021 Joel Rosdahl and other contributors -// -// See doc/AUTHORS.adoc for a complete list of contributors. -// -// This program is free software; you can redistribute it and/or modify it -// under the terms of the GNU General Public License as published by the Free -// Software Foundation; either version 3 of the License, or (at your option) -// any later version. -// -// This program is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -// more details. -// -// You should have received a copy of the GNU General Public License along with -// this program; if not, write to the Free Software Foundation, Inc., 51 -// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - -#pragma once - -#include "Util.hpp" - -#include -#include - -class Config; -class Context; - -void clean_old(const Context& ctx, - const Util::ProgressReceiver& progress_receiver, - uint64_t max_age); - -void clean_up_dir(const std::string& subdir, - uint64_t max_size, - uint64_t max_files, - uint64_t max_age, - const Util::ProgressReceiver& progress_receiver); - -void clean_up_all(const Config& config, - const Util::ProgressReceiver& progress_receiver); - -void wipe_all(const Context& ctx, - const Util::ProgressReceiver& progress_receiver); diff --git a/src/compress.hpp b/src/compress.hpp deleted file mode 100644 index dd1670b18..000000000 --- a/src/compress.hpp +++ /dev/null @@ -1,40 +0,0 @@ -// Copyright (C) 2019-2021 Joel Rosdahl and other contributors -// -// See doc/AUTHORS.adoc for a complete list of contributors. -// -// This program is free software; you can redistribute it and/or modify it -// under the terms of the GNU General Public License as published by the Free -// Software Foundation; either version 3 of the License, or (at your option) -// any later version. -// -// This program is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -// more details. -// -// You should have received a copy of the GNU General Public License along with -// this program; if not, write to the Free Software Foundation, Inc., 51 -// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - -#pragma once - -#include "Util.hpp" - -#include "third_party/nonstd/optional.hpp" - -class Config; -class Context; - -void compress_stats(const Config& config, - const Util::ProgressReceiver& progress_receiver); - -// Recompress the cache. -// -// Arguments: -// - ctx: The context. -// - level: Target compression level (positive or negative value for actual -// level, 0 for default level and nonstd::nullopt for no compression). -// - progress_receiver: Function that will be called for progress updates. -void compress_recompress(Context& ctx, - nonstd::optional level, - const Util::ProgressReceiver& progress_receiver); diff --git a/src/storage/Storage.hpp b/src/storage/Storage.hpp index 511a2b0dc..e3d10c696 100644 --- a/src/storage/Storage.hpp +++ b/src/storage/Storage.hpp @@ -18,10 +18,9 @@ #pragma once -#include "types.hpp" - #include #include +#include #include diff --git a/src/storage/primary/CMakeLists.txt b/src/storage/primary/CMakeLists.txt index f74323c6e..817da2c21 100644 --- a/src/storage/primary/CMakeLists.txt +++ b/src/storage/primary/CMakeLists.txt @@ -1,6 +1,10 @@ set( sources + ${CMAKE_CURRENT_SOURCE_DIR}/CacheFile.cpp ${CMAKE_CURRENT_SOURCE_DIR}/PrimaryStorage.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/PrimaryStorage_cleanup.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/PrimaryStorage_compress.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/util.cpp ) target_sources(ccache_lib PRIVATE ${sources}) diff --git a/src/CacheFile.cpp b/src/storage/primary/CacheFile.cpp similarity index 96% rename from src/CacheFile.cpp rename to src/storage/primary/CacheFile.cpp index 5d28d2937..8ada17ff7 100644 --- a/src/CacheFile.cpp +++ b/src/storage/primary/CacheFile.cpp @@ -18,9 +18,8 @@ #include "CacheFile.hpp" -#include "Manifest.hpp" -#include "Result.hpp" - +#include +#include #include const Stat& diff --git a/src/CacheFile.hpp b/src/storage/primary/CacheFile.hpp similarity index 95% rename from src/CacheFile.hpp rename to src/storage/primary/CacheFile.hpp index 8993f5bec..14064fec4 100644 --- a/src/CacheFile.hpp +++ b/src/storage/primary/CacheFile.hpp @@ -18,9 +18,9 @@ #pragma once -#include "Stat.hpp" +#include -#include "third_party/nonstd/optional.hpp" +#include #include diff --git a/src/storage/primary/PrimaryStorage.cpp b/src/storage/primary/PrimaryStorage.cpp index c4e29cf13..dcacb333f 100644 --- a/src/storage/primary/PrimaryStorage.cpp +++ b/src/storage/primary/PrimaryStorage.cpp @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include @@ -98,11 +97,11 @@ PrimaryStorage::PrimaryStorage(const Config& config) : m_config(config) void PrimaryStorage::initialize() { - MTR_BEGIN("primary_storage", "clean_up_internal_tempdir"); + MTR_BEGIN("primary_storage", "clean_internal_tempdir"); if (m_config.temporary_dir() == m_config.cache_dir() + "/tmp") { - clean_up_internal_tempdir(); + clean_internal_tempdir(); } - MTR_END("primary_storage", "clean_up_internal_tempdir"); + MTR_END("primary_storage", "clean_internal_tempdir"); } void @@ -174,8 +173,7 @@ PrimaryStorage::finalize() const uint64_t max_size = round(m_config.max_size() * factor); const uint32_t max_files = round(m_config.max_files() * factor); const time_t max_age = 0; - clean_up_dir( - subdir, max_size, max_files, max_age, [](double /*progress*/) {}); + clean_dir(subdir, max_size, max_files, max_age, [](double /*progress*/) {}); } } @@ -303,7 +301,7 @@ PrimaryStorage::look_up_cache_file(const Digest& key, } void -PrimaryStorage::clean_up_internal_tempdir() +PrimaryStorage::clean_internal_tempdir() { const time_t now = time(nullptr); const auto dir_st = Stat::stat(m_config.cache_dir(), Stat::OnError::log); diff --git a/src/storage/primary/PrimaryStorage.hpp b/src/storage/primary/PrimaryStorage.hpp index e1d5620d9..3263fdc2c 100644 --- a/src/storage/primary/PrimaryStorage.hpp +++ b/src/storage/primary/PrimaryStorage.hpp @@ -18,6 +18,8 @@ #pragma once +#include "util.hpp" + #include #include #include @@ -39,6 +41,8 @@ public: void initialize(); void finalize(); + // --- Cache entry handling --- + // Returns a path to a file containing the value. nonstd::optional get(const Digest& key, core::CacheEntryType type) const; @@ -49,6 +53,8 @@ public: void remove(const Digest& key, core::CacheEntryType type); + // --- Statistics --- + void increment_statistic(Statistic statistic, int64_t value = 1); // Return a machine-readable string representing the final ccache result, or @@ -59,6 +65,27 @@ public: // nullopt if there was no result. nonstd::optional get_result_message() const; + // --- Cleanup --- + + void clean_old(const ProgressReceiver& progress_receiver, uint64_t max_age); + + void clean_dir(const std::string& subdir, + uint64_t max_size, + uint64_t max_files, + uint64_t max_age, + const ProgressReceiver& progress_receiver); + + void clean_all(const ProgressReceiver& progress_receiver); + + void wipe_all(const ProgressReceiver& progress_receiver); + + // --- Compression --- + + void print_compression_statistics(const ProgressReceiver& progress_receiver); + + void recompress(nonstd::optional level, + const ProgressReceiver& progress_receiver); + private: const Config& m_config; @@ -88,7 +115,7 @@ private: LookUpCacheFileResult look_up_cache_file(const Digest& key, core::CacheEntryType type) const; - void clean_up_internal_tempdir(); + void clean_internal_tempdir(); nonstd::optional update_stats_and_maybe_move_cache_file(const Digest& key, diff --git a/src/cleanup.cpp b/src/storage/primary/PrimaryStorage_cleanup.cpp similarity index 74% rename from src/cleanup.cpp rename to src/storage/primary/PrimaryStorage_cleanup.cpp index 817c6653d..f39c96bb2 100644 --- a/src/cleanup.cpp +++ b/src/storage/primary/PrimaryStorage_cleanup.cpp @@ -17,30 +17,33 @@ // this program; if not, write to the Free Software Foundation, Inc., 51 // Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -#include "cleanup.hpp" - -#include "CacheFile.hpp" -#include "Config.hpp" -#include "Context.hpp" -#include "Logging.hpp" -#include "Statistics.hpp" -#include "Util.hpp" - +#include "PrimaryStorage.hpp" + +#include +#include +#include +#include +#include +#include +#include #include #ifdef INODE_CACHE_SUPPORTED -# include "InodeCache.hpp" +# include #endif #include +namespace storage { +namespace primary { + static void delete_file(const std::string& path, - uint64_t size, + const uint64_t size, uint64_t* cache_size, uint64_t* files_in_cache) { - bool deleted = Util::unlink_safe(path, Util::UnlinkLog::ignore_failure); + const bool deleted = Util::unlink_safe(path, Util::UnlinkLog::ignore_failure); if (!deleted && errno != ENOENT && errno != ESTALE) { LOG("Failed to unlink {} ({})", path, strerror(errno)); } else if (cache_size && files_in_cache) { @@ -55,9 +58,9 @@ delete_file(const std::string& path, static void update_counters(const std::string& dir, - uint64_t files_in_cache, - uint64_t cache_size, - bool cleanup_performed) + const uint64_t files_in_cache, + const uint64_t cache_size, + const bool cleanup_performed) { const std::string stats_file = dir + "/stats"; Statistics::update(stats_file, [=](auto& cs) { @@ -70,29 +73,29 @@ update_counters(const std::string& dir, } void -clean_old(const Context& ctx, - const Util::ProgressReceiver& progress_receiver, - uint64_t max_age) +PrimaryStorage::clean_old(const ProgressReceiver& progress_receiver, + const uint64_t max_age) { - Util::for_each_level_1_subdir( - ctx.config.cache_dir(), - [&](const auto& subdir, const auto& sub_progress_receiver) { - clean_up_dir(subdir, 0, 0, max_age, sub_progress_receiver); + for_each_level_1_subdir( + m_config.cache_dir(), + [&](const std::string& subdir, + const ProgressReceiver& sub_progress_receiver) { + clean_dir(subdir, 0, 0, max_age, sub_progress_receiver); }, progress_receiver); } // Clean up one cache subdirectory. void -clean_up_dir(const std::string& subdir, - uint64_t max_size, - uint64_t max_files, - uint64_t max_age, - const Util::ProgressReceiver& progress_receiver) +PrimaryStorage::clean_dir(const std::string& subdir, + const uint64_t max_size, + const uint64_t max_files, + const uint64_t max_age, + const ProgressReceiver& progress_receiver) { LOG("Cleaning up cache directory {}", subdir); - std::vector files = Util::get_level_1_files( + std::vector files = get_level_1_files( subdir, [&](double progress) { progress_receiver(progress / 3); }); uint64_t cache_size = 0; @@ -181,29 +184,28 @@ clean_up_dir(const std::string& subdir, // Clean up all cache subdirectories. void -clean_up_all(const Config& config, - const Util::ProgressReceiver& progress_receiver) +PrimaryStorage::clean_all(const ProgressReceiver& progress_receiver) { - Util::for_each_level_1_subdir( - config.cache_dir(), - [&](const auto& subdir, const auto& sub_progress_receiver) { - clean_up_dir(subdir, - config.max_size() / 16, - config.max_files() / 16, - 0, - sub_progress_receiver); + for_each_level_1_subdir( + m_config.cache_dir(), + [&](const std::string& subdir, + const ProgressReceiver& sub_progress_receiver) { + clean_dir(subdir, + m_config.max_size() / 16, + m_config.max_files() / 16, + 0, + sub_progress_receiver); }, progress_receiver); } // Wipe one cache subdirectory. static void -wipe_dir(const std::string& subdir, - const Util::ProgressReceiver& progress_receiver) +wipe_dir(const std::string& subdir, const ProgressReceiver& progress_receiver) { LOG("Clearing out cache directory {}", subdir); - const std::vector files = Util::get_level_1_files( + const std::vector files = get_level_1_files( subdir, [&](double progress) { progress_receiver(progress / 2); }); for (size_t i = 0; i < files.size(); ++i) { @@ -220,11 +222,10 @@ wipe_dir(const std::string& subdir, // Wipe all cached files in all subdirectories. void -wipe_all(const Context& ctx, const Util::ProgressReceiver& progress_receiver) +PrimaryStorage::wipe_all(const ProgressReceiver& progress_receiver) { - Util::for_each_level_1_subdir( - ctx.config.cache_dir(), wipe_dir, progress_receiver); -#ifdef INODE_CACHE_SUPPORTED - ctx.inode_cache.drop(); -#endif + for_each_level_1_subdir(m_config.cache_dir(), wipe_dir, progress_receiver); } + +} // namespace primary +} // namespace storage diff --git a/src/compress.cpp b/src/storage/primary/PrimaryStorage_compress.cpp similarity index 75% rename from src/compress.cpp rename to src/storage/primary/PrimaryStorage_compress.cpp index 9693ee71b..40ac213e8 100644 --- a/src/compress.cpp +++ b/src/storage/primary/PrimaryStorage_compress.cpp @@ -16,27 +16,26 @@ // this program; if not, write to the Free Software Foundation, Inc., 51 // Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -#include "compress.hpp" - -#include "AtomicFile.hpp" -#include "CacheEntryReader.hpp" -#include "CacheEntryWriter.hpp" -#include "Context.hpp" -#include "File.hpp" -#include "Logging.hpp" -#include "Manifest.hpp" -#include "Result.hpp" -#include "Statistics.hpp" -#include "ThreadPool.hpp" -#include "assertions.hpp" -#include "fmtmacros.hpp" - +#include "PrimaryStorage.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include #include +#include #include -#include "third_party/fmt/core.h" +#include #ifdef HAVE_UNISTD_H # include @@ -46,7 +45,8 @@ #include #include -using nonstd::optional; +namespace storage { +namespace primary { namespace { @@ -71,10 +71,10 @@ private: }; void -RecompressionStatistics::update(uint64_t content_size, - uint64_t old_size, - uint64_t new_size, - uint64_t incompressible_size) +RecompressionStatistics::update(const uint64_t content_size, + const uint64_t old_size, + const uint64_t new_size, + const uint64_t incompressible_size) { std::unique_lock lock(m_mutex); m_incompressible_size += incompressible_size; @@ -111,8 +111,10 @@ RecompressionStatistics::incompressible_size() const return m_incompressible_size; } -File -open_file(const std::string& path, const char* mode) +} // namespace + +static File +open_file(const std::string& path, const char* const mode) { File f(path, mode); if (!f) { @@ -122,8 +124,8 @@ open_file(const std::string& path, const char* mode) return f; } -std::unique_ptr -create_reader(const CacheFile& cache_file, FILE* stream) +static std::unique_ptr +create_reader(const CacheFile& cache_file, FILE* const stream) { if (cache_file.type() == CacheFile::Type::unknown) { throw core::Error("unknown file type for {}", cache_file.path()); @@ -145,11 +147,11 @@ create_reader(const CacheFile& cache_file, FILE* stream) ASSERT(false); } -std::unique_ptr -create_writer(FILE* stream, +static std::unique_ptr +create_writer(FILE* const stream, const CacheEntryReader& reader, - compression::Type compression_type, - int8_t compression_level) + const compression::Type compression_type, + const int8_t compression_level) { return std::make_unique(stream, reader.magic(), @@ -159,18 +161,18 @@ create_writer(FILE* stream, reader.payload_size()); } -void +static void recompress_file(RecompressionStatistics& statistics, const std::string& stats_file, const CacheFile& cache_file, - optional level) + const nonstd::optional level) { auto file = open_file(cache_file.path(), "rb"); auto reader = create_reader(cache_file, file.get()); - auto old_stat = Stat::stat(cache_file.path(), Stat::OnError::log); - uint64_t content_size = reader->content_size(); - int8_t wanted_level = + const auto old_stat = Stat::stat(cache_file.path(), Stat::OnError::log); + const uint64_t content_size = reader->content_size(); + const int8_t wanted_level = level ? (*level == 0 ? compression::ZstdCompressor::default_compression_level : *level) @@ -205,7 +207,7 @@ recompress_file(RecompressionStatistics& statistics, file.close(); atomic_new_file.commit(); - auto new_stat = Stat::stat(cache_file.path(), Stat::OnError::log); + const auto new_stat = Stat::stat(cache_file.path(), Stat::OnError::log); Statistics::update(stats_file, [=](auto& cs) { cs.increment(Statistic::cache_size_kibibyte, @@ -217,21 +219,19 @@ recompress_file(RecompressionStatistics& statistics, LOG("Recompression of {} done", cache_file.path()); } -} // namespace - void -compress_stats(const Config& config, - const Util::ProgressReceiver& progress_receiver) +PrimaryStorage::print_compression_statistics( + const ProgressReceiver& progress_receiver) { uint64_t on_disk_size = 0; uint64_t compr_size = 0; uint64_t content_size = 0; uint64_t incompr_size = 0; - Util::for_each_level_1_subdir( - config.cache_dir(), + for_each_level_1_subdir( + m_config.cache_dir(), [&](const auto& subdir, const auto& sub_progress_receiver) { - const std::vector files = Util::get_level_1_files( + const std::vector files = get_level_1_files( subdir, [&](double progress) { sub_progress_receiver(progress / 2); }); for (size_t i = 0; i < files.size(); ++i) { @@ -256,16 +256,20 @@ compress_stats(const Config& config, PRINT_RAW(stdout, "\n\n"); } - double ratio = + const double ratio = compr_size > 0 ? static_cast(content_size) / compr_size : 0.0; - double savings = ratio > 0.0 ? 100.0 - (100.0 / ratio) : 0.0; + const double savings = ratio > 0.0 ? 100.0 - (100.0 / ratio) : 0.0; - std::string on_disk_size_str = Util::format_human_readable_size(on_disk_size); - std::string cache_size_str = + const std::string on_disk_size_str = + Util::format_human_readable_size(on_disk_size); + const std::string cache_size_str = Util::format_human_readable_size(compr_size + incompr_size); - std::string compr_size_str = Util::format_human_readable_size(compr_size); - std::string content_size_str = Util::format_human_readable_size(content_size); - std::string incompr_size_str = Util::format_human_readable_size(incompr_size); + const std::string compr_size_str = + Util::format_human_readable_size(compr_size); + const std::string content_size_str = + Util::format_human_readable_size(content_size); + const std::string incompr_size_str = + Util::format_human_readable_size(incompr_size); PRINT(stdout, "Total data: {:>8s} ({} disk blocks)\n", @@ -284,20 +288,19 @@ compress_stats(const Config& config, } void -compress_recompress(Context& ctx, - optional level, - const Util::ProgressReceiver& progress_receiver) +PrimaryStorage::recompress(const nonstd::optional level, + const ProgressReceiver& progress_receiver) { const size_t threads = std::thread::hardware_concurrency(); const size_t read_ahead = 2 * threads; ThreadPool thread_pool(threads, read_ahead); RecompressionStatistics statistics; - Util::for_each_level_1_subdir( - ctx.config.cache_dir(), + for_each_level_1_subdir( + m_config.cache_dir(), [&](const auto& subdir, const auto& sub_progress_receiver) { std::vector files = - Util::get_level_1_files(subdir, [&](double progress) { + get_level_1_files(subdir, [&](double progress) { sub_progress_receiver(0.1 * progress); }); @@ -322,7 +325,7 @@ compress_recompress(Context& ctx, } if (util::ends_with(subdir, "f")) { - // Wait here instead of after Util::for_each_level_1_subdir to avoid + // Wait here instead of after for_each_level_1_subdir to avoid // updating the progress bar to 100% before all work is done. thread_pool.shut_down(); } @@ -333,28 +336,30 @@ compress_recompress(Context& ctx, PRINT_RAW(stdout, "\n\n"); } - double old_ratio = + const double old_ratio = statistics.old_size() > 0 ? static_cast(statistics.content_size()) / statistics.old_size() : 0.0; - double old_savings = old_ratio > 0.0 ? 100.0 - (100.0 / old_ratio) : 0.0; - double new_ratio = + const double old_savings = + old_ratio > 0.0 ? 100.0 - (100.0 / old_ratio) : 0.0; + const double new_ratio = statistics.new_size() > 0 ? static_cast(statistics.content_size()) / statistics.new_size() : 0.0; - double new_savings = new_ratio > 0.0 ? 100.0 - (100.0 / new_ratio) : 0.0; - int64_t size_difference = static_cast(statistics.new_size()) - - static_cast(statistics.old_size()); + const double new_savings = + new_ratio > 0.0 ? 100.0 - (100.0 / new_ratio) : 0.0; + const int64_t size_difference = static_cast(statistics.new_size()) + - static_cast(statistics.old_size()); - std::string old_compr_size_str = + const std::string old_compr_size_str = Util::format_human_readable_size(statistics.old_size()); - std::string new_compr_size_str = + const std::string new_compr_size_str = Util::format_human_readable_size(statistics.new_size()); - std::string content_size_str = + const std::string content_size_str = Util::format_human_readable_size(statistics.content_size()); - std::string incompr_size_str = + const std::string incompr_size_str = Util::format_human_readable_size(statistics.incompressible_size()); - std::string size_difference_str = + const std::string size_difference_str = FMT("{}{}", size_difference < 0 ? "-" : (size_difference > 0 ? "+" : " "), Util::format_human_readable_size( @@ -379,3 +384,6 @@ compress_recompress(Context& ctx, new_savings); PRINT(stdout, "Size change: {:>9s}\n", size_difference_str); } + +} // namespace primary +} // namespace storage diff --git a/src/storage/primary/util.cpp b/src/storage/primary/util.cpp new file mode 100644 index 000000000..f50b4defd --- /dev/null +++ b/src/storage/primary/util.cpp @@ -0,0 +1,75 @@ +// Copyright (C) 2021 Joel Rosdahl and other contributors +// +// See doc/AUTHORS.adoc for a complete list of contributors. +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License as published by the Free +// Software Foundation; either version 3 of the License, or (at your option) +// any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +// more details. +// +// You should have received a copy of the GNU General Public License along with +// this program; if not, write to the Free Software Foundation, Inc., 51 +// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +#include "util.hpp" + +#include +#include + +namespace storage { +namespace primary { + +void +for_each_level_1_subdir(const std::string& cache_dir, + const SubdirVisitor& visitor, + const ProgressReceiver& progress_receiver) +{ + for (int i = 0; i <= 0xF; i++) { + double progress = 1.0 * i / 16; + progress_receiver(progress); + std::string subdir_path = FMT("{}/{:x}", cache_dir, i); + visitor(subdir_path, [&](double inner_progress) { + progress_receiver(progress + inner_progress / 16); + }); + } + progress_receiver(1.0); +} + +std::vector +get_level_1_files(const std::string& dir, + const ProgressReceiver& progress_receiver) +{ + std::vector files; + + if (!Stat::stat(dir)) { + return files; + } + + size_t level_2_directories = 0; + + Util::traverse(dir, [&](const std::string& path, bool is_dir) { + auto name = Util::base_name(path); + if (name == "CACHEDIR.TAG" || name == "stats" || name.starts_with(".nfs")) { + return; + } + + if (!is_dir) { + files.emplace_back(path); + } else if (path != dir + && path.find('/', dir.size() + 1) == std::string::npos) { + ++level_2_directories; + progress_receiver(level_2_directories / 16.0); + } + }); + + progress_receiver(1.0); + return files; +} + +} // namespace primary +} // namespace storage diff --git a/src/storage/primary/util.hpp b/src/storage/primary/util.hpp new file mode 100644 index 000000000..f4e120117 --- /dev/null +++ b/src/storage/primary/util.hpp @@ -0,0 +1,64 @@ +// Copyright (C) 2021 Joel Rosdahl and other contributors +// +// See doc/AUTHORS.adoc for a complete list of contributors. +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License as published by the Free +// Software Foundation; either version 3 of the License, or (at your option) +// any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +// more details. +// +// You should have received a copy of the GNU General Public License along with +// this program; if not, write to the Free Software Foundation, Inc., 51 +// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +#pragma once + +#include + +#include +#include +#include + +namespace storage { +namespace primary { + +using ProgressReceiver = std::function; +using SubdirVisitor = std::function; + +// Call a function for each subdir (0-9a-f) in the cache. +// +// Parameters: +// - cache_dir: Path to the cache directory. +// - visitor: Function to call with directory path and progress_receiver as +// arguments. +// - progress_receiver: Function that will be called for progress updates. +void for_each_level_1_subdir(const std::string& cache_dir, + const SubdirVisitor& visitor, + const ProgressReceiver& progress_receiver); + +// Get a list of files in a level 1 subdirectory of the cache. +// +// The function works under the assumption that directory entries with one +// character names (except ".") are subdirectories and that there are no other +// subdirectories. +// +// Files ignored: +// - CACHEDIR.TAG +// - stats +// - .nfs* (temporary NFS files that may be left for open but deleted files). +// +// Parameters: +// - dir: The directory to traverse recursively. +// - progress_receiver: Function that will be called for progress updates. +std::vector +get_level_1_files(const std::string& dir, + const ProgressReceiver& progress_receiver); + +} // namespace primary +} // namespace storage diff --git a/src/storage/secondary/FileStorage.hpp b/src/storage/secondary/FileStorage.hpp index 77667a9d4..771941f96 100644 --- a/src/storage/secondary/FileStorage.hpp +++ b/src/storage/secondary/FileStorage.hpp @@ -18,7 +18,7 @@ #pragma once -#include "SecondaryStorage.hpp" +#include namespace storage { namespace secondary { diff --git a/src/storage/secondary/HttpStorage.hpp b/src/storage/secondary/HttpStorage.hpp index 63c908666..60c1354e1 100644 --- a/src/storage/secondary/HttpStorage.hpp +++ b/src/storage/secondary/HttpStorage.hpp @@ -18,7 +18,7 @@ #pragma once -#include "SecondaryStorage.hpp" +#include namespace storage { namespace secondary { diff --git a/src/storage/secondary/RedisStorage.hpp b/src/storage/secondary/RedisStorage.hpp index 352e54b43..98794fa2d 100644 --- a/src/storage/secondary/RedisStorage.hpp +++ b/src/storage/secondary/RedisStorage.hpp @@ -18,7 +18,7 @@ #pragma once -#include "SecondaryStorage.hpp" +#include namespace storage { namespace secondary { diff --git a/unittest/CMakeLists.txt b/unittest/CMakeLists.txt index c9f129cd1..ada8b6887 100644 --- a/unittest/CMakeLists.txt +++ b/unittest/CMakeLists.txt @@ -21,6 +21,7 @@ set( test_compopt.cpp test_compression_types.cpp test_hashutil.cpp + test_storage_primary_util.cpp test_util_Tokenizer.cpp test_util_expected.cpp test_util_path.cpp diff --git a/unittest/test_Util.cpp b/unittest/test_Util.cpp index bb0dc6a1b..afe7e1cc2 100644 --- a/unittest/test_Util.cpp +++ b/unittest/test_Util.cpp @@ -235,35 +235,6 @@ TEST_CASE("Util::fallocate") CHECK(Stat::stat(filename).size() == 20000); } -TEST_CASE("Util::for_each_level_1_subdir") -{ - std::vector actual; - Util::for_each_level_1_subdir( - "cache_dir", - [&](const auto& subdir, const auto&) { actual.push_back(subdir); }, - [](double) {}); - - std::vector expected = { - "cache_dir/0", - "cache_dir/1", - "cache_dir/2", - "cache_dir/3", - "cache_dir/4", - "cache_dir/5", - "cache_dir/6", - "cache_dir/7", - "cache_dir/8", - "cache_dir/9", - "cache_dir/a", - "cache_dir/b", - "cache_dir/c", - "cache_dir/d", - "cache_dir/e", - "cache_dir/f", - }; - CHECK(actual == expected); -} - TEST_CASE("Util::format_argv_for_logging") { const char* argv_0[] = {nullptr}; @@ -352,55 +323,6 @@ os_path(std::string path) return path; } -TEST_CASE("Util::get_level_1_files") -{ - TestContext test_context; - - Util::create_dir("e/m/p/t/y"); - - Util::create_dir("0/1"); - Util::create_dir("0/f/c"); - Util::write_file("0/file_a", ""); - Util::write_file("0/1/file_b", "1"); - Util::write_file("0/1/file_c", "12"); - Util::write_file("0/f/c/file_d", "123"); - - auto null_receiver = [](double) {}; - - SUBCASE("nonexistent subdirectory") - { - const auto files = Util::get_level_1_files("2", null_receiver); - CHECK(files.empty()); - } - - SUBCASE("empty subdirectory") - { - const auto files = Util::get_level_1_files("e", null_receiver); - CHECK(files.empty()); - } - - SUBCASE("simple case") - { - auto files = Util::get_level_1_files("0", null_receiver); - REQUIRE(files.size() == 4); - - // Files within a level are in arbitrary order, sort them to be able to - // verify them. - std::sort(files.begin(), files.end(), [](const auto& f1, const auto& f2) { - return f1.path() < f2.path(); - }); - - CHECK(files[0].path() == os_path("0/1/file_b")); - CHECK(files[0].lstat().size() == 1); - CHECK(files[1].path() == os_path("0/1/file_c")); - CHECK(files[1].lstat().size() == 2); - CHECK(files[2].path() == os_path("0/f/c/file_d")); - CHECK(files[2].lstat().size() == 3); - CHECK(files[3].path() == os_path("0/file_a")); - CHECK(files[3].lstat().size() == 0); - } -} - TEST_CASE("Util::get_relative_path") { #ifdef _WIN32 diff --git a/unittest/test_storage_primary_util.cpp b/unittest/test_storage_primary_util.cpp new file mode 100644 index 000000000..b847075d9 --- /dev/null +++ b/unittest/test_storage_primary_util.cpp @@ -0,0 +1,120 @@ +// Copyright (C) 2021 Joel Rosdahl and other contributors +// +// See doc/AUTHORS.adoc for a complete list of contributors. +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License as published by the Free +// Software Foundation; either version 3 of the License, or (at your option) +// any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +// more details. +// +// You should have received a copy of the GNU General Public License along with +// this program; if not, write to the Free Software Foundation, Inc., 51 +// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +#include "TestUtil.hpp" + +#include +#include + +#include + +#include + +using TestUtil::TestContext; + +static inline std::string +os_path(std::string path) +{ +#if defined(_WIN32) && !defined(HAVE_DIRENT_H) + std::replace(path.begin(), path.end(), '/', '\\'); +#endif + + return path; +} + +TEST_SUITE_BEGIN("storage::primary::util"); + +TEST_CASE("storage::primary::for_each_level_1_subdir") +{ + std::vector actual; + storage::primary::for_each_level_1_subdir( + "cache_dir", + [&](const auto& subdir, const auto&) { actual.push_back(subdir); }, + [](double) {}); + + std::vector expected = { + "cache_dir/0", + "cache_dir/1", + "cache_dir/2", + "cache_dir/3", + "cache_dir/4", + "cache_dir/5", + "cache_dir/6", + "cache_dir/7", + "cache_dir/8", + "cache_dir/9", + "cache_dir/a", + "cache_dir/b", + "cache_dir/c", + "cache_dir/d", + "cache_dir/e", + "cache_dir/f", + }; + CHECK(actual == expected); +} + +TEST_CASE("storage::primary::get_level_1_files") +{ + TestContext test_context; + + Util::create_dir("e/m/p/t/y"); + + Util::create_dir("0/1"); + Util::create_dir("0/f/c"); + Util::write_file("0/file_a", ""); + Util::write_file("0/1/file_b", "1"); + Util::write_file("0/1/file_c", "12"); + Util::write_file("0/f/c/file_d", "123"); + + auto null_receiver = [](double) {}; + + SUBCASE("nonexistent subdirectory") + { + const auto files = storage::primary::get_level_1_files("2", null_receiver); + CHECK(files.empty()); + } + + SUBCASE("empty subdirectory") + { + const auto files = storage::primary::get_level_1_files("e", null_receiver); + CHECK(files.empty()); + } + + SUBCASE("simple case") + { + auto files = storage::primary::get_level_1_files("0", null_receiver); + REQUIRE(files.size() == 4); + + // Files within a level are in arbitrary order, sort them to be able to + // verify them. + std::sort(files.begin(), files.end(), [](const auto& f1, const auto& f2) { + return f1.path() < f2.path(); + }); + + CHECK(files[0].path() == os_path("0/1/file_b")); + CHECK(files[0].lstat().size() == 1); + CHECK(files[1].path() == os_path("0/1/file_c")); + CHECK(files[1].lstat().size() == 2); + CHECK(files[2].path() == os_path("0/f/c/file_d")); + CHECK(files[2].lstat().size() == 3); + CHECK(files[3].path() == os_path("0/file_a")); + CHECK(files[3].lstat().size() == 0); + } +} + +TEST_SUITE_END(); -- 2.47.3