From: Joel Rosdahl Date: Sun, 6 Oct 2019 21:08:00 +0000 (+0200) Subject: Add -X/--recompress option X-Git-Tag: v4.0~753 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=d8e2ab9a1a37a26149444f4c29ca632cbb8ce0e0;p=thirdparty%2Fccache.git Add -X/--recompress option --- diff --git a/doc/MANUAL.adoc b/doc/MANUAL.adoc index 5304dd15e..6f05adeca 100644 --- a/doc/MANUAL.adoc +++ b/doc/MANUAL.adoc @@ -173,6 +173,17 @@ compiler options apply and you should refer to the compiler's documentation. COMPRESSION>> for more information. This can potentionally take a long time since all files in the cache need to be visited. +*`-X, --recompress`*=_LEVEL_:: + + Recompress the cache using compression level _LEVEL_. The level can be an + integer, with the same semantics as the + <> configuration setting), or + the special value *uncompressed* for no compression. See + <<_cache_compression,CACHE COMPRESSION>> for more information. This can + potentionally take a long time since all files in the cache need to be + visited. Only files that are currently compressed with a different level + than _LEVEL_ will be recompressed. + *`-s, --show-stats`*:: Print a summary of configuration and statistics counters in human-readable @@ -389,9 +400,9 @@ Semantics of *compression_level*: *> 0*:: A positive value corresponds to normal Zstandard compression levels. Lower levels (e.g. *1*) mean faster compression but worse compression ratio. - Higher levels (e.g. *19*) mean slower compression but better compression - ratio. The maximum possible value depends on the libzstd version. - Decompression speed is essentially the same for all levels. + Higher levels (e.g. *19*) mean slower compression but better + compression ratio. The maximum possible value depends on the libzstd + version. Decompression speed is essentially the same for all levels. *< 0*:: A negative value corresponds to Zstandard's “ultra-fast” compression levels, which are even faster than level 1 but less good compression @@ -763,7 +774,7 @@ Cache compression ----------------- ccache will by default compress all data it puts into the cache using the -compression algorithm Zstandard (zstd) using compression level 1. The algorithm +compression algorithm Zstandard (zstd) using compression level -1. The algorithm is fast enough that there should be little reason to turn off compression to gain performance. One exception is if the cache is located on a compressed file system, in which case the compression performed by ccache of course is @@ -771,14 +782,14 @@ redundant. See the documentation for the <> and <> settings for more information. -You can use *ccache -x/--show-compression* to print information related to +You can use the *-x/--show-compression* option to print information related to compression. Example: ------------------------------------------------------------------------------- Total data: 14.8 GB (16.0 GB disk blocks) Compressible data: 11.3 GB (30.6% of original size) - Original size: 36.9 GB - - Compression ratio: 3.267 x (69.4% space savings) + - Compression ratio: 3.267 x (69.4% space savings) Incompressible data: 3.5 GB ------------------------------------------------------------------------------- @@ -795,6 +806,16 @@ Notes: * The compression ratio is affected by the <> setting. +The cache data can also be recompressed to another compression level (or made +uncompressed) with the *-X/--recompress* option. If you choose to disable +compression by default or to use a low compression level, you can (re)compress +newly cached data with a higher compression level after the build or at another +time when there are more CPU cycles available, for instance every night. Full +recompression potentially takes a lot of time, but only files that are +currently compressed with a different level than the target level will be +recompressed. + + Cache statistics ---------------- diff --git a/doc/NEWS.adoc b/doc/NEWS.adoc index 52d0b8ad8..4f4acad16 100644 --- a/doc/NEWS.adoc +++ b/doc/NEWS.adoc @@ -25,11 +25,19 @@ High-level summary of changes (work in progress) - A C++11 compiler is now required to build ccache. -- There is a new -x/--show-compression option which shows some statistics about +- Added a `-x/--show-compression` option which shows some statistics about cache compression. -- A progress bar has been added to show the progress of time-consuming - options like -c/--cleanup, -C/--clear and -x/--show-compression. +- Added a `-X/--recompress` option which can be used to recompress the cache + data with another compression level (or make it uncompressed). If you choose + to disable compression by default or to use a level with a low compression + ratio, you can recompress the cache with a higher compression level after the + build or at another time when there are more CPU cycles available, for + instance every night. + +- A progress bar has been added to show the progress of time-consuming options + like `-c/--cleanup`, `-C/--clear`, `-x/--show-compression` and + `-X/--recompress`. - (More to be written.) diff --git a/src/CacheEntryReader.hpp b/src/CacheEntryReader.hpp index 40a377c0c..e8f3f2bf1 100644 --- a/src/CacheEntryReader.hpp +++ b/src/CacheEntryReader.hpp @@ -74,13 +74,25 @@ public: // Get size of the payload, uint64_t payload_size() const; + // Get content magic. + const uint8_t* magic() const; + + // Get content version. + uint8_t version() const; + + // Get compression type. + Compression::Type compression_type() const; + + // Get compression level. + uint64_t compression_level() const; + // Get size of the content (header + payload + checksum). uint64_t content_size() const; private: std::unique_ptr m_decompressor; Checksum m_checksum; - char m_magic[4]; + uint8_t m_magic[4]; uint8_t m_version; Compression::Type m_compression_type; int8_t m_compression_level; @@ -96,6 +108,30 @@ CacheEntryReader::read(T& value) Util::big_endian_to_int(buffer, value); } +inline const uint8_t* +CacheEntryReader::magic() const +{ + return m_magic; +} + +inline uint8_t +CacheEntryReader::version() const +{ + return m_version; +} + +inline Compression::Type +CacheEntryReader::compression_type() const +{ + return m_compression_type; +} + +inline uint64_t +CacheEntryReader::compression_level() const +{ + return m_compression_level; +} + inline uint64_t CacheEntryReader::payload_size() const { diff --git a/src/Checksum.hpp b/src/Checksum.hpp index 7033b87cc..adc0b0bad 100644 --- a/src/Checksum.hpp +++ b/src/Checksum.hpp @@ -18,7 +18,8 @@ #pragma once -#include +#include "system.hpp" + #include class Checksum diff --git a/src/ccache.cpp b/src/ccache.cpp index 8c8c3dbc0..b78d4a3d2 100644 --- a/src/ccache.cpp +++ b/src/ccache.cpp @@ -91,6 +91,8 @@ static const char USAGE_TEXT[] = "(decimal)\n" " and Ki, Mi, Gi, Ti (binary); default " "suffix: G\n" + " -X, --recompress LEVEL recompress the cache (integer level or" + " \"uncompressed\")\n" " -x, --show-compression show compression statistics\n" " -p, --show-config show current configuration options in\n" " human-readable format\n" @@ -3941,6 +3943,7 @@ ccache_main_options(int argc, char* argv[]) {"max-files", required_argument, 0, 'F'}, {"max-size", required_argument, 0, 'M'}, {"print-stats", no_argument, 0, PRINT_STATS}, + {"recompress", required_argument, 0, 'X'}, {"set-config", required_argument, 0, 'o'}, {"show-compression", no_argument, 0, 'x'}, {"show-config", no_argument, 0, 'p'}, @@ -3950,7 +3953,7 @@ ccache_main_options(int argc, char* argv[]) {0, 0, 0, 0}}; int c; - while ((c = getopt_long(argc, argv, "cCk:hF:M:po:sVxz", options, NULL)) + while ((c = getopt_long(argc, argv, "cCk:hF:M:po:sVxX:z", options, NULL)) != -1) { switch (c) { case DUMP_MANIFEST: @@ -4079,6 +4082,29 @@ ccache_main_options(int argc, char* argv[]) break; } + case 'X': // --recompress + { + initialize(); + int level; + if (std::string(optarg) == "uncompressed") { + level = 0; + } else { + level = Util::parse_int(optarg); + if (level < -128 || level > 127) { + throw Error("compression level must be between -128 and 127"); + } + if (level == 0) { + level = g_config.compression_level(); + } + } + + ProgressBar progress_bar("Recompressing..."); + compress_recompress(g_config, level, [&](double progress) { + progress_bar.update(progress); + }); + break; + } + case 'z': // --zero-stats initialize(); stats_zero(); diff --git a/src/compress.cpp b/src/compress.cpp index d5c75abd8..dbf0169f1 100644 --- a/src/compress.cpp +++ b/src/compress.cpp @@ -18,32 +18,111 @@ #include "compress.hpp" +#include "AtomicFile.hpp" #include "CacheEntryReader.hpp" +#include "CacheEntryWriter.hpp" #include "File.hpp" +#include "StdMakeUnique.hpp" #include "ccache.hpp" #include "manifest.hpp" #include "result.hpp" +#include #include -static bool -get_content_size(const std::string& path, - const uint8_t magic[4], - uint8_t version, - uint64_t& size) +static File +open_file(const std::string& path, const char* mode) { - File f(path, "rb"); + File f(path, mode); if (!f) { - cc_log("Failed to open %s for reading: %s", path.c_str(), strerror(errno)); - return false; + throw Error( + fmt::format("failed to open {} for reading: {}", path, strerror(errno))); } + return f; +} + +static std::unique_ptr +create_reader(const CacheFile& cache_file, FILE* stream) +{ + if (cache_file.type() == CacheFile::Type::unknown) { + throw Error(fmt::format("unknown file type for {}", cache_file.path())); + } + + switch (cache_file.type()) { + case CacheFile::Type::result: + return std::make_unique( + stream, k_result_magic, k_result_version); + + case CacheFile::Type::manifest: + return std::make_unique( + stream, k_manifest_magic, k_manifest_version); + + case CacheFile::Type::unknown: + assert(false); // Handled at function entry. + return {}; + } + + assert(false); + return {}; +} + +static std::unique_ptr +create_writer(FILE* stream, + const CacheEntryReader& reader, + Compression::Type compression_type, + int8_t compression_level) +{ + return std::make_unique(stream, + reader.magic(), + reader.version(), + compression_type, + compression_level, + reader.payload_size()); +} - try { - size = CacheEntryReader(f.get(), magic, version).content_size(); - return true; - } catch (const Error&) { - return false; +static void +recompress_file(const std::string& stats_file, + const CacheFile& cache_file, + int8_t level) +{ + auto file = open_file(cache_file.path(), "rb"); + auto reader = create_reader(cache_file, file.get()); + + int8_t current_level = reader->compression_type() == Compression::Type::none + ? 0 + : reader->compression_level(); + if (current_level == level) { + return; + } + + AtomicFile atomic_new_file(cache_file.path(), AtomicFile::Mode::binary); + auto writer = create_writer(atomic_new_file.stream(), + *reader, + level == 0 ? Compression::Type::none + : Compression::Type::zstd, + level); + + char buffer[READ_BUFFER_SIZE]; + size_t bytes_left = reader->payload_size(); + while (bytes_left > 0) { + size_t bytes_to_read = std::min(bytes_left, sizeof(buffer)); + reader->read(buffer, bytes_to_read); + writer->write(buffer, bytes_to_read); + bytes_left -= bytes_to_read; } + reader->finalize(); + writer->finalize(); + + struct stat st; + x_stat(cache_file.path().c_str(), &st); + uint64_t old_size = file_size_on_disk(&st); + + atomic_new_file.commit(); + + x_stat(cache_file.path().c_str(), &st); + uint64_t new_size = file_size_on_disk(&st); + + stats_update_size(stats_file.c_str(), new_size - old_size, 0); } void @@ -66,27 +145,16 @@ compress_stats(const Config& config, files); for (size_t i = 0; i < files.size(); ++i) { - const auto& file = files[i]; - - on_disk_size += file_size_on_disk(&file->stat()); - - uint64_t content_size = 0; - bool is_compressible; - if (file->type() == CacheFile::Type::manifest) { - is_compressible = get_content_size( - file->path(), k_manifest_magic, k_manifest_version, content_size); - } else if (file->type() == CacheFile::Type::result) { - is_compressible = get_content_size( - file->path(), k_result_magic, k_result_version, content_size); - } else { - is_compressible = false; - } - - if (is_compressible) { - compr_size += file->stat().st_size; - compr_orig_size += content_size; - } else { - incompr_size += file->stat().st_size; + const auto& cache_file = files[i]; + on_disk_size += file_size_on_disk(&cache_file->stat()); + + try { + auto file = open_file(cache_file->path(), "rb"); + auto reader = create_reader(*cache_file, file.get()); + compr_size += cache_file->stat().st_size; + compr_orig_size += reader->content_size(); + } catch (Error&) { + incompr_size += cache_file->stat().st_size; } sub_progress_receiver(1.0 / 2 + 1.0 * i / files.size() / 2); @@ -124,3 +192,41 @@ compress_stats(const Config& config, free(cache_size_str); free(on_disk_size_str); } + +void +compress_recompress(const Config& config, + int8_t level, + const Util::ProgressReceiver& progress_receiver) +{ + Util::for_each_level_1_subdir( + config.cache_dir(), + [&](const std::string& subdir, + const Util::ProgressReceiver& sub_progress_receiver) { + std::vector> files; + Util::get_level_1_files( + subdir, + [&](double progress) { sub_progress_receiver(progress / 2); }, + files); + + auto stats_file = subdir + "/stats"; + + for (size_t i = 0; i < files.size(); ++i) { + const auto& file = files[i]; + + if (file->type() != CacheFile::Type::unknown) { + try { + recompress_file(stats_file, *file, level); + } catch (Error&) { + // Ignore for now. + } + } + + sub_progress_receiver(1.0 / 2 + 1.0 * i / files.size() / 2); + } + }, + progress_receiver); + + if (isatty(STDOUT_FILENO)) { + printf("\n"); + } +} diff --git a/src/compress.hpp b/src/compress.hpp index 58f6e2b4d..96bbed020 100644 --- a/src/compress.hpp +++ b/src/compress.hpp @@ -23,3 +23,14 @@ void compress_stats(const Config& config, const Util::ProgressReceiver& progress_receiver); + +// Recompress the cache. +// +// Arguments: +// - config: The config. +// - level: Target compression level (positive or negative value), or 0 for no +// compression. +// - progress_receiver: Function that will be called for progress updates. +void compress_recompress(const Config& config, + int8_t level, + const Util::ProgressReceiver& progress_receiver); diff --git a/src/stats.cpp b/src/stats.cpp index b8dae83ad..3c317deb9 100644 --- a/src/stats.cpp +++ b/src/stats.cpp @@ -290,6 +290,10 @@ stats_collect(struct counters* counters, time_t* last_updated) void stats_update_size(const char* sfile, int64_t size, int files) { + if (size == 0 && files == 0) { + return; + } + struct counters* updates; if (sfile == stats_file) { init_counter_updates();