From: Joel Rosdahl Date: Sun, 30 Jun 2019 12:01:38 +0000 (+0200) Subject: Add checksumming of cached content X-Git-Tag: v4.0~922 X-Git-Url: http://git.ipfire.org/gitweb/gitweb.cgi?a=commitdiff_plain;h=42faf47f45505fc51105cef583b460929053974d;p=thirdparty%2Fccache.git Add checksumming of cached content Both compressed and uncompressed content are checksummed and verified. The chosen checksum algorithm is XXH64, which is the same that the zstd frame format uses (but ccache stores all 64 bits instead of only 32, because why not?). --- diff --git a/doc/MANUAL.adoc b/doc/MANUAL.adoc index 572599097..2755275c7 100644 --- a/doc/MANUAL.adoc +++ b/doc/MANUAL.adoc @@ -44,6 +44,7 @@ Features * Easy installation. * Low overhead. * Compresses data in the cache to save disk space. +* Checksums data in the cache to detect corruption. Limitations diff --git a/src/compr_none.c b/src/compr_none.c index 614a73c88..ea312a02a 100644 --- a/src/compr_none.c +++ b/src/compr_none.c @@ -16,11 +16,19 @@ #include "compression.h" +struct state { + FILE *output; + XXH64_state_t *checksum; +}; + static struct compr_state * -compr_none_init(FILE *output, int8_t level) +compr_none_init(FILE *output, int8_t level, XXH64_state_t *checksum) { + struct state *state = malloc(sizeof(struct state)); + state->output = output; + state->checksum = checksum; (void)level; - return (struct compr_state *)output; + return (struct compr_state *)state; } static int8_t @@ -33,15 +41,21 @@ compr_none_get_actual_compression_level(struct compr_state *handle) static bool compr_none_write(struct compr_state *handle, const void *data, size_t size) { - FILE *output = (FILE *)handle; - return fwrite(data, 1, size, output) == size; + struct state *state = (struct state *)handle; + size_t ret = fwrite(data, size, 1, state->output); + if (state->checksum) { + XXH64_update(state->checksum, data, size); + } + return ret == 1; } static bool compr_none_free(struct compr_state *handle) { - FILE *output = (FILE *)handle; - return ferror(output) == 0; + struct state *state = (struct state *)handle; + bool result = ferror(state->output) == 0; + free(state); + return result; } struct compressor compressor_none_impl = { diff --git a/src/compr_zstd.c b/src/compr_zstd.c index 1d5b3ccfd..3dc3944f9 100644 --- a/src/compr_zstd.c +++ b/src/compr_zstd.c @@ -23,6 +23,7 @@ struct state { FILE *output; + XXH64_state_t *checksum; ZSTD_CStream *stream; ZSTD_inBuffer in; ZSTD_outBuffer out; @@ -31,10 +32,11 @@ struct state { }; static struct compr_state * -compr_zstd_init(FILE *output, int8_t level) +compr_zstd_init(FILE *output, int8_t level, XXH64_state_t *checksum) { struct state *state = malloc(sizeof(struct state)); state->output = output; + state->checksum = checksum; state->stream = ZSTD_createCStream(); state->failed = false; @@ -86,6 +88,10 @@ compr_zstd_write(struct compr_state *handle, const void *data, size_t size) } struct state *state = (struct state *)handle; + if (state->checksum) { + XXH64_update(state->checksum, data, size); + } + state->in.src = data; state->in.size = size; state->in.pos = 0; @@ -131,6 +137,7 @@ compr_zstd_free(struct compr_state *handle) if (!handle) { return false; } + struct state *state = (struct state *)handle; compr_zstd_write(handle, NULL, 0); diff --git a/src/compression.h b/src/compression.h index 089824f01..2101be2f2 100644 --- a/src/compression.h +++ b/src/compression.h @@ -2,6 +2,7 @@ #define COMPRESSION_H #include "system.h" +#include "xxhash.h" struct compr_state; @@ -10,7 +11,11 @@ struct compressor { // // output: The file to write compressed data to. // compression_level: Desired compression level. - struct compr_state *(*init)(FILE *output, int8_t compression_level); + // checksum: Checksum state to update (NULL for no checksum). + struct compr_state *(*init)( + FILE *output, + int8_t compression_level, + XXH64_state_t *checksum); // Get the actual compression level that will be used. int8_t (*get_actual_compression_level)(struct compr_state *state); @@ -36,7 +41,8 @@ struct decompressor { // Create and initialize a decompressor. // // input: The file to read compressed data from. - struct decompr_state *(*init)(FILE *input); + // checksum: Checksum state to update (NULL for no checksum). + struct decompr_state *(*init)(FILE *input, XXH64_state_t *checksum); // Decompress data. // diff --git a/src/decompr_none.c b/src/decompr_none.c index 65c8abe68..51b1a2581 100644 --- a/src/decompr_none.c +++ b/src/decompr_none.c @@ -16,24 +16,44 @@ #include "compression.h" +struct state { + FILE *input; + XXH64_state_t *checksum; + bool failed; +}; + static struct decompr_state * -decompr_none_init(FILE *input) +decompr_none_init(FILE *input, XXH64_state_t *checksum) { - return (struct decompr_state *)input; + struct state *state = malloc(sizeof(struct state)); + state->input = input; + state->checksum = checksum; + state->failed = false; + return (struct decompr_state *)state; } static bool decompr_none_read(struct decompr_state *handle, void *data, size_t size) { - FILE *input = (FILE *)handle; - return fread(data, 1, size, input) == size; + struct state *state = (struct state *)handle; + + bool result = fread(data, 1, size, state->input) == size; + if (result && state->checksum) { + XXH64_update(state->checksum, data, size); + } + if (!result) { + state->failed = true; + } + return result; } static bool decompr_none_free(struct decompr_state *handle) { - FILE *input = (FILE *)handle; - return ferror(input) == 0; + struct state *state = (struct state *)handle; + bool result = !state->failed; + free(state); + return result; } struct decompressor decompressor_none_impl = { diff --git a/src/decompr_zstd.c b/src/decompr_zstd.c index ae9a637f3..0fb2d9ec3 100644 --- a/src/decompr_zstd.c +++ b/src/decompr_zstd.c @@ -27,6 +27,7 @@ enum stream_state { struct state { FILE *input; + XXH64_state_t *checksum; char input_buffer[READ_BUFFER_SIZE]; size_t input_size; size_t input_consumed; @@ -37,11 +38,12 @@ struct state { }; static struct decompr_state * -decompr_zstd_init(FILE *input) +decompr_zstd_init(FILE *input, XXH64_state_t *checksum) { struct state *state = malloc(sizeof(struct state)); state->input = input; + state->checksum = checksum; state->input_size = 0; state->input_consumed = 0; state->stream = ZSTD_createDStream(); @@ -88,7 +90,11 @@ decompr_zstd_read(struct decompr_state *handle, void *data, size_t size) if (ZSTD_isError(ret)) { state->stream_state = STREAM_STATE_FAILED; return false; - } else if (ret == 0) { + } + if (state->checksum) { + XXH64_update(state->checksum, state->out.dst, state->out.pos); + } + if (ret == 0) { state->stream_state = STREAM_STATE_END; break; } diff --git a/src/manifest.c b/src/manifest.c index 08c36d61c..1011b0ddf 100644 --- a/src/manifest.c +++ b/src/manifest.c @@ -23,9 +23,12 @@ #include "manifest.h" #include "xxhash.h" -// Manifest data format (big-endian integers): +// Manifest data format +// ==================== // -// ::=
+// Integers are big-endian. +// +// ::=
::= // // ::= 4 bytes ("cCrS") @@ -56,6 +59,8 @@ // ::= uint32_t // ::= uint32_t // ::= DIGEST_SIZE bytes +// ::= +// ::= uint64_t ; XXH64 of content bytes // // Sketch of concrete layout: @@ -84,8 +89,11 @@ // ... // DIGEST_SIZE bytes // ... +// checksum 8 bytes +// // -// Version history: +// Version history +// =============== // // 1: Introduced in ccache 3.0. (Files are always compressed with gzip.) // 2: Introduced in ccache 3.8. @@ -245,6 +253,7 @@ read_manifest(const char *path, char **errmsg) struct decompressor *decompressor = NULL; struct decompr_state *decompr_state = NULL; *errmsg = NULL; + XXH64_state_t *checksum = NULL; FILE *f = fopen(path, "rb"); if (!f) { @@ -289,7 +298,11 @@ read_manifest(const char *path, char **errmsg) goto out; } - decompr_state = decompressor->init(f); + checksum = XXH64_createState(); + XXH64_reset(checksum, 0); + XXH64_update(checksum, header_bytes, sizeof(header_bytes)); + + decompr_state = decompressor->init(f, checksum); if (!decompr_state) { *errmsg = x_strdup("Failed to initialize decompressor"); goto out; @@ -324,7 +337,17 @@ read_manifest(const char *path, char **errmsg) READ_BYTES(mf->results[i].name.bytes, DIGEST_SIZE); } - success = true; + uint64_t actual_checksum = XXH64_digest(checksum); + uint64_t expected_checksum; + READ_UINT64(expected_checksum); + if (actual_checksum == expected_checksum) { + success = true; + } else { + *errmsg = format( + "Incorrect checksum (actual %016llx, expected %016llx)", + (unsigned long long)actual_checksum, + (unsigned long long)expected_checksum); + } out: if (decompressor && !decompressor->free(decompr_state)) { @@ -333,6 +356,9 @@ out: if (f) { fclose(f); } + if (checksum) { + XXH64_freeState(checksum); + } if (!success) { if (!*errmsg) { *errmsg = x_strdup("Corrupt manifest file"); @@ -346,7 +372,7 @@ out: #define WRITE_BYTES(buf, length) \ do { \ if (!compressor->write(compr_state, buf, length)) { \ - goto error; \ + goto out; \ } \ } while (false) @@ -381,6 +407,9 @@ out: static bool write_manifest(FILE *f, const struct manifest *mf) { + int ret = false; + XXH64_state_t *checksum = NULL; + uint64_t content_size = COMMON_HEADER_SIZE; content_size += 4; // n_files for (size_t i = 0; i < mf->n_files; i++) { @@ -394,18 +423,22 @@ write_manifest(FILE *f, const struct manifest *mf) content_size += mf->results[i].n_file_info_indexes * 4; content_size += DIGEST_SIZE; } + content_size += 8; // checksum struct common_header header; common_header_from_config(&header, MAGIC, MANIFEST_VERSION, content_size); + checksum = XXH64_createState(); + XXH64_reset(checksum, 0); + struct compressor *compressor = compressor_from_type(header.compression_type); assert(compressor); struct compr_state *compr_state = - compressor->init(f, header.compression_level); + compressor->init(f, header.compression_level, checksum); if (!compr_state) { cc_log("Failed to initialize compressor"); - goto error; + goto out; } header.compression_level = compressor->get_actual_compression_level(compr_state); @@ -413,8 +446,9 @@ write_manifest(FILE *f, const struct manifest *mf) uint8_t header_bytes[COMMON_HEADER_SIZE]; common_header_to_bytes(&header, header_bytes); if (fwrite(header_bytes, sizeof(header_bytes), 1, f) != 1) { - goto error; + goto out; } + XXH64_update(checksum, header_bytes, sizeof(header_bytes)); WRITE_UINT32(mf->n_files); for (uint32_t i = 0; i < mf->n_files; i++) { @@ -440,11 +474,16 @@ write_manifest(FILE *f, const struct manifest *mf) WRITE_BYTES(mf->results[i].name.bytes, DIGEST_SIZE); } - return compressor->free(compr_state); + WRITE_UINT64(XXH64_digest(checksum)); -error: - cc_log("Error writing to manifest file"); - return false; + ret = compressor->free(compr_state); + +out: + XXH64_freeState(checksum); + if (!ret) { + cc_log("Error writing to manifest file"); + } + return ret; } static bool diff --git a/src/result.c b/src/result.c index ec4096ed5..23537602e 100644 --- a/src/result.c +++ b/src/result.c @@ -18,11 +18,15 @@ #include "common_header.h" #include "int_bytes_conversion.h" #include "compression.h" +#include "xxhash.h" #include "result.h" -// Result data format (big-endian integers): +// Result data format +// ================== // -// ::=
+// Integers are big-endian. +// +// ::=
//
::= // ::= 4 bytes ("cCrS") // ::= uint8_t @@ -44,6 +48,8 @@ // ::= 1 (uint8_t) // ::= uint8_t // ::= key_len bytes +// ::= +// ::= uint64_t ; XXH64 of content bytes // // Sketch of concrete layout: // @@ -64,8 +70,11 @@ // 1 byte // key_len bytes // ... +// checksum 8 bytes +// // -// Version history: +// Version history +// =============== // // 1: Introduced in ccache 3.8. @@ -154,6 +163,7 @@ read_result( struct decompressor *decompressor = NULL; struct decompr_state *decompr_state = NULL; FILE *subfile = NULL; + XXH64_state_t *checksum = NULL; FILE *f = fopen(path, "rb"); if (!f) { @@ -168,6 +178,10 @@ read_result( goto out; } + checksum = XXH64_createState(); + XXH64_reset(checksum, 0); + XXH64_update(checksum, header_bytes, sizeof(header_bytes)); + struct common_header header; common_header_from_bytes(&header, header_bytes); @@ -200,7 +214,7 @@ read_result( goto out; } - decompr_state = decompressor->init(f); + decompr_state = decompressor->init(f, checksum); if (!decompr_state) { *errmsg = x_strdup("Failed to initialize decompressor"); goto out; @@ -283,10 +297,22 @@ read_result( } } - if (i == n_entries) { + if (i != n_entries) { + *errmsg = format("Too few entries (read %u, expected %u)", i, n_entries); + goto out; + } + + uint64_t actual_checksum = XXH64_digest(checksum); + uint8_t expected_checksum_bytes[8]; + READ_BYTES(expected_checksum_bytes, sizeof(expected_checksum_bytes)); + uint64_t expected_checksum = UINT64_FROM_BYTES(expected_checksum_bytes); + if (actual_checksum == expected_checksum) { success = true; } else { - *errmsg = format("Too few entries (read %u, expected %u)", i, n_entries); + *errmsg = format( + "Incorrect checksum (actual %016llx, expected %016llx)", + (unsigned long long)actual_checksum, + (unsigned long long)expected_checksum); } out: @@ -299,6 +325,9 @@ out: if (f) { fclose(f); } + if (checksum) { + XXH64_freeState(checksum); + } if (!success && !*errmsg) { *errmsg = x_strdup("Corrupt result file"); } @@ -332,7 +361,8 @@ static bool write_result( const struct result_files *list, struct compressor *compressor, - struct compr_state *compr_state) + struct compr_state *compr_state, + XXH64_state_t *checksum) { WRITE_BYTE(list->n_files); @@ -360,6 +390,8 @@ write_result( fclose(f); } + WRITE_INT(8, XXH64_digest(checksum)); + return true; error: @@ -387,6 +419,8 @@ bool result_get(const char *path, struct result_files *list) bool result_put(const char *path, struct result_files *list) { bool ret = false; + XXH64_state_t *checksum = NULL; + char *tmp_file = format("%s.tmp", path); int fd = create_tmp_fd(&tmp_file); FILE *f = fdopen(fd, "wb"); @@ -404,15 +438,19 @@ bool result_put(const char *path, struct result_files *list) content_size += 8; // data_len content_size += list->files[i].size; // data } + content_size += 8; // checksum struct common_header header; common_header_from_config(&header, MAGIC, RESULT_VERSION, content_size); + checksum = XXH64_createState(); + XXH64_reset(checksum, 0); + struct compressor *compressor = compressor_from_type(header.compression_type); assert(compressor); struct compr_state *compr_state = - compressor->init(f, header.compression_level); + compressor->init(f, header.compression_level, checksum); if (!compr_state) { cc_log("Failed to initialize compressor"); goto out; @@ -426,8 +464,9 @@ bool result_put(const char *path, struct result_files *list) cc_log("Failed to write result file header to %s", tmp_file); goto out; } + XXH64_update(checksum, header_bytes, sizeof(header_bytes)); - bool ok = write_result(list, compressor, compr_state) + bool ok = write_result(list, compressor, compr_state, checksum) && compressor->free(compr_state); if (!ok) { cc_log("Failed to write result file"); @@ -447,6 +486,9 @@ out: if (f) { fclose(f); } + if (checksum) { + XXH64_freeState(checksum); + } return ret; } diff --git a/unittest/test_compr_none.c b/unittest/test_compr_none.c new file mode 100644 index 000000000..85fc76fce --- /dev/null +++ b/unittest/test_compr_none.c @@ -0,0 +1,66 @@ +// Copyright (C) 2019 Joel Rosdahl +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License as published by the Free +// Software Foundation; either version 3 of the License, or (at your option) +// any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +// more details. +// +// You should have received a copy of the GNU General Public License along with +// this program; if not, write to the Free Software Foundation, Inc., 51 +// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +#include "../src/compression.h" +#include "framework.h" +#include "util.h" + +TEST_SUITE(compr_type_none) + +TEST(small_roundtrip) +{ + const uint64_t expected_foobar_checksum = 0xa2aa05ed9085aaf9ULL; + + XXH64_state_t *checksum = XXH64_createState(); + XXH64_reset(checksum, 0); + + FILE *f = fopen("data.uncompressed", "w"); + struct compressor *compr_none = compressor_from_type(COMPR_TYPE_NONE); + struct compr_state *c_state = compr_none->init(f, -1, checksum); + CHECK(c_state); + + CHECK(compr_none->write(c_state, "foobar", 6)); + + CHECK(compr_none->free(c_state)); + fclose(f); + + CHECK_INT_EQ(XXH64_digest(checksum), expected_foobar_checksum); + + XXH64_reset(checksum, 0); + f = fopen("data.uncompressed", "r"); + struct decompressor *decompr_none = decompressor_from_type(COMPR_TYPE_NONE); + struct decompr_state *d_state = decompr_none->init(f, checksum); + CHECK(d_state); + + char buffer[4]; + CHECK(decompr_none->read(d_state, buffer, 4)); + CHECK(memcmp(buffer, "foob", 4) == 0); + CHECK(decompr_none->read(d_state, buffer, 2)); + CHECK(memcmp(buffer, "ar", 2) == 0); + + // Nothing left to read. + CHECK(!decompr_none->read(d_state, buffer, 1)); + + // Error state is remembered. + CHECK(!decompr_none->free(d_state)); + fclose(f); + + CHECK_INT_EQ(XXH64_digest(checksum), expected_foobar_checksum); + + XXH64_freeState(checksum); +} + +TEST_SUITE_END diff --git a/unittest/test_compr_zstd.c b/unittest/test_compr_zstd.c index 67423a445..9d81f9593 100644 --- a/unittest/test_compr_zstd.c +++ b/unittest/test_compr_zstd.c @@ -18,13 +18,18 @@ #include "framework.h" #include "util.h" -TEST_SUITE(compr_zstd) +TEST_SUITE(compr_type_zstd) -TEST(zstd_small_roundtrip) +TEST(small_roundtrip) { + const uint64_t expected_foobar_checksum = 0xa2aa05ed9085aaf9ULL; + + XXH64_state_t *checksum = XXH64_createState(); + XXH64_reset(checksum, 0); + FILE *f = fopen("data.zstd", "w"); struct compressor *compr_zstd = compressor_from_type(COMPR_TYPE_ZSTD); - struct compr_state *c_state = compr_zstd->init(f, -1); + struct compr_state *c_state = compr_zstd->init(f, -1, checksum); CHECK(c_state); CHECK(compr_zstd->write(c_state, "foobar", 6)); @@ -32,9 +37,12 @@ TEST(zstd_small_roundtrip) CHECK(compr_zstd->free(c_state)); fclose(f); + CHECK_INT_EQ(XXH64_digest(checksum), expected_foobar_checksum); + + XXH64_reset(checksum, 0); f = fopen("data.zstd", "r"); struct decompressor *decompr_zstd = decompressor_from_type(COMPR_TYPE_ZSTD); - struct decompr_state *d_state = decompr_zstd->init(f); + struct decompr_state *d_state = decompr_zstd->init(f, checksum); CHECK(d_state); char buffer[4]; @@ -49,15 +57,19 @@ TEST(zstd_small_roundtrip) // Error state is remembered. CHECK(!decompr_zstd->free(d_state)); fclose(f); + + CHECK_INT_EQ(XXH64_digest(checksum), expected_foobar_checksum); + + XXH64_freeState(checksum); } -TEST(zstd_large_compressible_roundtrip) +TEST(large_compressible_roundtrip) { char data[] = "The quick brown fox jumps over the lazy dog"; FILE *f = fopen("data.zstd", "w"); struct compressor *compr_zstd = compressor_from_type(COMPR_TYPE_ZSTD); - struct compr_state *c_state = compr_zstd->init(f, 1); + struct compr_state *c_state = compr_zstd->init(f, 1, NULL); CHECK(c_state); for (size_t i = 0; i < 1000; i++) { @@ -69,7 +81,7 @@ TEST(zstd_large_compressible_roundtrip) f = fopen("data.zstd", "r"); struct decompressor *decompr_zstd = decompressor_from_type(COMPR_TYPE_ZSTD); - struct decompr_state *d_state = decompr_zstd->init(f); + struct decompr_state *d_state = decompr_zstd->init(f, NULL); CHECK(d_state); char buffer[sizeof(data)]; @@ -86,7 +98,7 @@ TEST(zstd_large_compressible_roundtrip) fclose(f); } -TEST(zstd_large_uncompressible_roundtrip) +TEST(large_uncompressible_roundtrip) { char data[100000]; for (size_t i = 0; i < sizeof(data); i++) { @@ -95,7 +107,7 @@ TEST(zstd_large_uncompressible_roundtrip) FILE *f = fopen("data.zstd", "w"); struct compressor *compr_zstd = compressor_from_type(COMPR_TYPE_ZSTD); - struct compr_state *c_state = compr_zstd->init(f, 1); + struct compr_state *c_state = compr_zstd->init(f, 1, NULL); CHECK(c_state); CHECK(compr_zstd->write(c_state, data, sizeof(data))); @@ -105,7 +117,7 @@ TEST(zstd_large_uncompressible_roundtrip) f = fopen("data.zstd", "r"); struct decompressor *decompr_zstd = decompressor_from_type(COMPR_TYPE_ZSTD); - struct decompr_state *d_state = decompr_zstd->init(f); + struct decompr_state *d_state = decompr_zstd->init(f, NULL); CHECK(d_state); char buffer[sizeof(data)];