From: Anders Björklund Date: Mon, 15 Jul 2019 13:28:26 +0000 (+0200) Subject: Add command to show compression statistics (#440) X-Git-Tag: v4.0~903 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=26fbfd4f3fcd25c100b1b2e0af66bb422b44a993;p=thirdparty%2Fccache.git Add command to show compression statistics (#440) This will only show information about the files that is knows about (right magic bytes). So the file count might differ from what is shown with the regular statistics (which shows all files, including old ones). The terminology used here is a bit confused, the compression ratio is supposed to grow upwards. Sometimes known as "space savings" instead, so list both values (ratio and savings) to make the output more obvious. --- diff --git a/Makefile.in b/Makefile.in index 7120b3008..85630acf8 100644 --- a/Makefile.in +++ b/Makefile.in @@ -37,6 +37,7 @@ non_3pp_sources = \ src/compopt.c \ src/compr_none.c \ src/compr_zstd.c \ + src/compress.c \ src/compression.c \ src/conf.c \ src/confitems.c \ diff --git a/src/ccache.c b/src/ccache.c index 8f00df7f4..7bd1432df 100644 --- a/src/ccache.c +++ b/src/ccache.c @@ -75,6 +75,7 @@ static const char USAGE_TEXT[] = " human-readable format\n" " -s, --show-stats show summary of configuration and statistics\n" " counters in human-readable format\n" + " -x, --show-compression show compression statistics\n" " -z, --zero-stats zero statistics counters\n" "\n" " -h, --help print this help text\n" @@ -3832,26 +3833,27 @@ ccache_main_options(int argc, char *argv[]) PRINT_STATS, }; static const struct option options[] = { - {"cleanup", no_argument, 0, 'c'}, - {"clear", no_argument, 0, 'C'}, - {"dump-manifest", required_argument, 0, DUMP_MANIFEST}, - {"dump-result", required_argument, 0, DUMP_RESULT}, - {"get-config", required_argument, 0, 'k'}, - {"hash-file", required_argument, 0, HASH_FILE}, - {"help", no_argument, 0, 'h'}, - {"max-files", required_argument, 0, 'F'}, - {"max-size", required_argument, 0, 'M'}, - {"print-stats", no_argument, 0, PRINT_STATS}, - {"set-config", required_argument, 0, 'o'}, - {"show-config", no_argument, 0, 'p'}, - {"show-stats", no_argument, 0, 's'}, - {"version", no_argument, 0, 'V'}, - {"zero-stats", no_argument, 0, 'z'}, + {"cleanup", no_argument, 0, 'c'}, + {"clear", no_argument, 0, 'C'}, + {"dump-manifest", required_argument, 0, DUMP_MANIFEST}, + {"dump-result", required_argument, 0, DUMP_RESULT}, + {"get-config", required_argument, 0, 'k'}, + {"hash-file", required_argument, 0, HASH_FILE}, + {"help", no_argument, 0, 'h'}, + {"max-files", required_argument, 0, 'F'}, + {"max-size", required_argument, 0, 'M'}, + {"print-stats", no_argument, 0, PRINT_STATS}, + {"set-config", required_argument, 0, 'o'}, + {"show-config", no_argument, 0, 'p'}, + {"show-compression", no_argument,0, 'x'}, + {"show-stats", no_argument, 0, 's'}, + {"version", no_argument, 0, 'V'}, + {"zero-stats", no_argument, 0, 'z'}, {0, 0, 0, 0} }; int c; - while ((c = getopt_long(argc, argv, "cCk:hF:M:po:sVz", options, NULL)) + while ((c = getopt_long(argc, argv, "cCk:hF:M:po:sVxz", options, NULL)) != -1) { switch (c) { case DUMP_MANIFEST: @@ -3985,6 +3987,11 @@ ccache_main_options(int argc, char *argv[]) fprintf(stdout, VERSION_TEXT, CCACHE_VERSION); x_exit(0); + case 'x': // --show-compression + initialize(); + compress_stats(conf); + break; + case 'z': // --zero-stats initialize(); stats_zero(); diff --git a/src/ccache.h b/src/ccache.h index 860f5ea8c..7ae214e7d 100644 --- a/src/ccache.h +++ b/src/ccache.h @@ -247,6 +247,11 @@ void clean_up_dir(struct conf *conf, const char *dir, double limit_multiple); void clean_up_all(struct conf *conf); void wipe_all(struct conf *conf); +// ---------------------------------------------------------------------------- +// compress.c + +void compress_stats(struct conf *conf); + // ---------------------------------------------------------------------------- // execute.c diff --git a/src/common_header.c b/src/common_header.c index e5908944e..f91ffbac1 100644 --- a/src/common_header.c +++ b/src/common_header.c @@ -146,6 +146,12 @@ bool common_header_initialize_for_reading( return true; } +size_t common_header_size(const struct common_header *header, bool *is_compressed) +{ + *is_compressed = header->compression_type != COMPR_TYPE_NONE; + return header->content_size; +} + void common_header_dump(const struct common_header *header, FILE *f) { fprintf( diff --git a/src/common_header.h b/src/common_header.h index f4d23d25f..76179ded1 100644 --- a/src/common_header.h +++ b/src/common_header.h @@ -61,6 +61,8 @@ bool common_header_initialize_for_reading( XXH64_state_t *checksum, char **errmsg); +size_t common_header_size(const struct common_header *header, bool *is_compressed); + void common_header_dump(const struct common_header *header, FILE *f); #endif diff --git a/src/compress.c b/src/compress.c new file mode 100644 index 000000000..99685f10b --- /dev/null +++ b/src/compress.c @@ -0,0 +1,107 @@ +// Copyright (C) 2002-2006 Andrew Tridgell +// Copyright (C) 2009-2018 Joel Rosdahl +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License as published by the Free +// Software Foundation; either version 3 of the License, or (at your option) +// any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +// more details. +// +// You should have received a copy of the GNU General Public License along with +// this program; if not, write to the Free Software Foundation, Inc., 51 +// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +#include "ccache.h" + +#include "manifest.h" +#include "result.h" + +static unsigned num_files; +static unsigned comp_files; + +static uint64_t cache_size; +static uint64_t real_size; + +// This measures the size of files in the cache. +static void +measure_fn(const char *fname, struct stat *st) +{ + if (!S_ISREG(st->st_mode)) { + return; + } + + char *p = basename(fname); + if (str_eq(p, "stats")) { + goto out; + } + + if (str_startswith(p, ".nfs")) { + // Ignore temporary NFS files that may be left for open but deleted files. + goto out; + } + + if (strstr(p, "CACHEDIR.TAG")) { + goto out; + } + + size_t uncompressed_size; + bool is_compressed; + if (str_endswith(p, ".manifest")) { + uncompressed_size = manifest_size(fname, &is_compressed); + } else if (str_endswith(p, ".result")) { + uncompressed_size = result_size(fname, &is_compressed); + } else { + uncompressed_size = 0; + is_compressed = false; + } + + // Ignore unknown files in the cache, including any files from older versions. + if (uncompressed_size > 0) { + cache_size += st->st_size; + num_files++; + if (is_compressed) { + real_size += uncompressed_size; + comp_files++; + } else { + real_size += st->st_size; + } + } + +out: + free(p); +} + +// Process up all cache subdirectories. +void compress_stats(struct conf *conf) +{ + num_files = 0; + comp_files = 0; + cache_size = 0; + real_size = 0; + + for (int i = 0; i <= 0xF; i++) { + char *dname = format("%s/%1x", conf->cache_dir, i); + traverse(dname, measure_fn); + free(dname); + } + + char *cache_str = format_human_readable_size(cache_size); + printf("Compressed size: %s, %.0f files\n", + cache_str, (double)comp_files); + free(cache_str); + char *real_str = format_human_readable_size(real_size); + printf("Uncompressed size: %s, %.0f files\n", + real_str, (double)num_files); + free(real_str); + + double percent = real_size > 0 ? (100.0 * comp_files) / num_files : 0.0; + printf("Compressed files: %.2f %%\n", percent); + double ratio = cache_size > 0 ? ((double) real_size) / cache_size : 0.0; + double savings = ratio > 0.0 ? 100.0 - (100.0 / ratio) : 0.0; + printf("Compression ratio: %.2f %% (%.1fx)\n", savings, ratio); +} + diff --git a/src/manifest.c b/src/manifest.c index d43dea632..c1e5062f5 100644 --- a/src/manifest.c +++ b/src/manifest.c @@ -782,6 +782,37 @@ out: return ret; } +size_t +manifest_size(const char *path, bool *is_compressed) +{ + char *errmsg; + size_t size = 0; + FILE *f = fopen(path, "rb"); + if (!f) { + cc_log("Failed to open %s for reading: %s", path, strerror(errno)); + goto error; + } + struct common_header header; + if (!common_header_initialize_for_reading( + &header, + f, + MANIFEST_MAGIC, + MANIFEST_VERSION, + NULL, + NULL, + NULL, + &errmsg)) { + cc_log("Error: %s", errmsg); + goto error; + } + size = common_header_size(&header, is_compressed); +error: + if (f) { + fclose(f); + } + return size; +} + bool manifest_dump(const char *manifest_path, FILE *stream) { diff --git a/src/manifest.h b/src/manifest.h index c0e283d05..eca033948 100644 --- a/src/manifest.h +++ b/src/manifest.h @@ -11,6 +11,7 @@ extern const char MANIFEST_MAGIC[4]; struct digest *manifest_get(struct conf *conf, const char *manifest_path); bool manifest_put(const char *manifest_path, struct digest *result_digest, struct hashtable *included_files); +size_t manifest_size(const char *manifest_path, bool *is_compressed); bool manifest_dump(const char *manifest_path, FILE *stream); #endif diff --git a/src/result.c b/src/result.c index 81e18ef7e..e72685d00 100644 --- a/src/result.c +++ b/src/result.c @@ -710,6 +710,38 @@ out: return ret; } + +size_t +result_size(const char *path, bool *is_compressed) +{ + size_t size = 0; + char *errmsg; + FILE *f = fopen(path, "rb"); + if (!f) { + cc_log("Failed to open %s for reading: %s", path, strerror(errno)); + goto error; + } + struct common_header header; + if (!common_header_initialize_for_reading( + &header, + f, + RESULT_MAGIC, + RESULT_VERSION, + NULL, + NULL, + NULL, + &errmsg)) { + cc_log("Error: %s", errmsg); + goto error; + } + size = common_header_size(&header, is_compressed); +error: + if (f) { + fclose(f); + } + return size; +} + bool result_dump(const char *path, FILE *stream) { diff --git a/src/result.h b/src/result.h index 71f8ed512..ca9a08924 100644 --- a/src/result.h +++ b/src/result.h @@ -16,6 +16,7 @@ void result_files_free(struct result_files *c); bool result_get(const char *path, struct result_files *list); bool result_put(const char *path, struct result_files *list); +size_t result_size(const char *path, bool *is_compressed); bool result_dump(const char *path, FILE *stream); #endif