From: Joel Rosdahl Date: Wed, 3 Aug 2022 08:04:03 +0000 (+0200) Subject: fix: Hash time information on inode cache hit X-Git-Tag: v4.7~126 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=561be2085df94b3c35dd803e56668e6feafc93b7;p=thirdparty%2Fccache.git fix: Hash time information on inode cache hit As mentioned in discussion #1086: If the inode cache is enabled, hash_source_code_file will on an inode cache hit fail to hash time information if there are temporal macros in the code. This is because hash_source_code_string (called from hash_source_code_file via hash_source_code_file_nocache) on an inode cache miss adds time information to the hash in case one of those macros were found. However, on an inode cache hit the return value of hash_source_code_file will be correctly fetched from the cache, but the hash sum will only be updated with (the hash of) the include file and not the time information. The fix is to let the inode cache only cache the effects of hashing the file and checking for macros, not the hashing of time information since that's volatile. After the fix: - The new do_hash_file function performs file hashing and macro checking. The inode cache caches this step. - hash_source_code_file returns a Digest instead of adding data to a Hash. - hash_source_code_file calls do_hash_file and then potentially hashes time information. If there are no temporal macros the returned digest will be identical to the file hash, otherwise the returned digest will be of a hash of file content hash + time information. This also improves hashes that are stored in the direct mode manifest: Previously they were always the hash of the file content hash but now they are just the file content hash in the common case when there is no __DATE__ or __TIMESTAMP__ macro in the file. --- diff --git a/src/InodeCache.cpp b/src/InodeCache.cpp index 9b0562552..e0055d264 100644 --- a/src/InodeCache.cpp +++ b/src/InodeCache.cpp @@ -70,16 +70,10 @@ static_assert(std::is_trivially_copyable::value, "Digest is expected to be trivially copyable."); static_assert( - static_cast(InodeCache::ContentType::binary) == 0, + static_cast(InodeCache::ContentType::raw) == 0, "Numeric value is part of key, increment version number if changed."); static_assert( - static_cast(InodeCache::ContentType::code) == 1, - "Numeric value is part of key, increment version number if changed."); -static_assert( - static_cast(InodeCache::ContentType::code_with_sloppy_time_macros) == 2, - "Numeric value is part of key, increment version number if changed."); -static_assert( - static_cast(InodeCache::ContentType::precompiled_header) == 3, + static_cast(InodeCache::ContentType::checked_for_temporal_macros) == 1, "Numeric value is part of key, increment version number if changed."); const void* MMAP_FAILED = reinterpret_cast(-1); // NOLINT: Must cast here diff --git a/src/InodeCache.hpp b/src/InodeCache.hpp index a4a272972..0d76e5728 100644 --- a/src/InodeCache.hpp +++ b/src/InodeCache.hpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020-2021 Joel Rosdahl and other contributors +// Copyright (C) 2020-2022 Joel Rosdahl and other contributors // // See doc/AUTHORS.adoc for a complete list of contributors. // @@ -29,21 +29,23 @@ class Digest; class InodeCache { public: - // Specifies in which role a file was hashed, since the hash result does not - // only depend on the actual content but also what we used it for. Source code - // files are scanned for macros while binary files are not as one example. + // Specifies in which mode a file was hashed since the hash result does not + // only depend on the actual content but also on operations that were + // performed that affect the return value. For example, source code files are + // normally scanned for macros while binary files are not. enum class ContentType { - binary = 0, - code = 1, - code_with_sloppy_time_macros = 2, - precompiled_header = 3, + // The file was not scanned for temporal macros. + raw = 0, + // The file was checked for temporal macros (see check_for_temporal_macros + // in hashutil). + checked_for_temporal_macros = 1, }; InodeCache(const Config& config); ~InodeCache(); // Get saved hash digest and return value from a previous call to - // hash_source_code_file(). + // do_hash_file() in hashutil.cpp. // // Returns true if saved values could be retrieved from the cache, false // otherwise. @@ -52,8 +54,8 @@ public: Digest& file_digest, int* return_value = nullptr); - // Put hash digest and return value from a successful call to - // hash_source_code_file(). + // Put hash digest and return value from a successful call to do_hash_file() + // in hashutil.cpp. // // Returns true if values could be stored in the cache, false otherwise. bool put(const std::string& path, diff --git a/src/ccache.cpp b/src/ccache.cpp index ea1c3bac0..3326ada17 100644 --- a/src/ccache.cpp +++ b/src/ccache.cpp @@ -369,7 +369,7 @@ do_remember_include_file(Context& ctx, } // Let's hash the include file content. - Hash fhash; + Digest file_digest; if (is_pch) { if (ctx.args_info.included_pch_file.empty()) { @@ -387,28 +387,27 @@ do_remember_include_file(Context& ctx, } } - if (!hash_binary_file(ctx, fhash, path)) { + if (!hash_binary_file(ctx, file_digest, path)) { return false; } cpp_hash.hash_delimiter(using_pch_sum ? "pch_sum_hash" : "pch_hash"); - cpp_hash.hash(fhash.digest().to_string()); + cpp_hash.hash(file_digest.to_string()); } if (ctx.config.direct_mode()) { if (!is_pch) { // else: the file has already been hashed. - int result = hash_source_code_file(ctx, fhash, path); + int result = hash_source_code_file(ctx, file_digest, path); if (result & HASH_SOURCE_CODE_ERROR || result & HASH_SOURCE_CODE_FOUND_TIME) { return false; } } - Digest d = fhash.digest(); - ctx.included_files.emplace(path, d); + ctx.included_files.emplace(path, file_digest); if (depend_mode_hash) { depend_mode_hash->hash_delimiter("include"); - depend_mode_hash->hash(d.to_string()); + depend_mode_hash->hash(file_digest.to_string()); } } @@ -1793,8 +1792,10 @@ calculate_result_and_manifest_key(Context& ctx, hash.hash_delimiter("inputfile"); hash.hash(ctx.args_info.input_file); - hash.hash_delimiter("sourcecode"); - int result = hash_source_code_file(ctx, hash, ctx.args_info.input_file); + hash.hash_delimiter("sourcecode hash"); + Digest input_file_digest; + int result = + hash_source_code_file(ctx, input_file_digest, ctx.args_info.input_file); if (result & HASH_SOURCE_CODE_ERROR) { return nonstd::make_unexpected(Statistic::internal_error); } @@ -1803,6 +1804,7 @@ calculate_result_and_manifest_key(Context& ctx, ctx.config.set_direct_mode(false); return std::make_pair(std::nullopt, std::nullopt); } + hash.hash(input_file_digest.to_string()); manifest_key = hash.digest(); diff --git a/src/core/Manifest.cpp b/src/core/Manifest.cpp index 01adf50fc..f57c90e69 100644 --- a/src/core/Manifest.cpp +++ b/src/core/Manifest.cpp @@ -388,8 +388,8 @@ Manifest::result_matches( auto hashed_files_iter = hashed_files.find(path); if (hashed_files_iter == hashed_files.end()) { - Hash hash; - int ret = hash_source_code_file(ctx, hash, path, fs.size); + Digest actual_digest; + int ret = hash_source_code_file(ctx, actual_digest, path, fs.size); if (ret & HASH_SOURCE_CODE_ERROR) { LOG("Failed hashing {}", path); return false; @@ -398,8 +398,7 @@ Manifest::result_matches( return false; } - Digest actual = hash.digest(); - hashed_files_iter = hashed_files.emplace(path, actual).first; + hashed_files_iter = hashed_files.emplace(path, actual_digest).first; } if (fi.digest != hashed_files_iter->second) { diff --git a/src/hashutil.cpp b/src/hashutil.cpp index 4598ec8d7..865698d13 100644 --- a/src/hashutil.cpp +++ b/src/hashutil.cpp @@ -179,44 +179,49 @@ check_for_temporal_macros_avx2(std::string_view str) #endif int -hash_source_code_file_nocache(const Context& ctx, - Hash& hash, - const std::string& path, - size_t size_hint, - bool is_precompiled) +do_hash_file(const Context& ctx, + Digest& digest, + const std::string& path, + size_t size_hint, + bool check_temporal_macros) { - if (is_precompiled) { - if (hash.hash_file(path)) { - return HASH_SOURCE_CODE_OK; - } else { - return HASH_SOURCE_CODE_ERROR; - } - } else { - std::string data; - try { - data = Util::read_file(path, size_hint); - } catch (core::Error&) { - return HASH_SOURCE_CODE_ERROR; +#ifdef INODE_CACHE_SUPPORTED + const InodeCache::ContentType content_type = + check_temporal_macros ? InodeCache::ContentType::checked_for_temporal_macros + : InodeCache::ContentType::raw; + if (ctx.config.inode_cache()) { + int result; + if (ctx.inode_cache.get(path, content_type, digest, &result)) { + return result; } - int result = hash_source_code_string(ctx, hash, data, path); - return result; } -} +#else + (void)ctx; +#endif -#ifdef INODE_CACHE_SUPPORTED -InodeCache::ContentType -get_content_type(const Config& config, const std::string& path) -{ - if (Util::is_precompiled_header(path)) { - return InodeCache::ContentType::precompiled_header; + std::string data; + try { + data = Util::read_file(path, size_hint); + } catch (core::Error&) { + return HASH_SOURCE_CODE_ERROR; } - if (config.sloppiness().is_enabled(core::Sloppy::time_macros)) { - return InodeCache::ContentType::code_with_sloppy_time_macros; + + int result = HASH_SOURCE_CODE_OK; + if (check_temporal_macros) { + result |= check_for_temporal_macros(data); } - return InodeCache::ContentType::code; -} + + Hash hash; + hash.hash(data); + digest = hash.digest(); + +#ifdef INODE_CACHE_SUPPORTED + ctx.inode_cache.put(path, content_type, digest, result); #endif + return result; +} + } // namespace int @@ -231,27 +236,42 @@ check_for_temporal_macros(std::string_view str) } int -hash_source_code_string(const Context& ctx, - Hash& hash, - std::string_view str, - const std::string& path) +hash_source_code_file(const Context& ctx, + Digest& digest, + const std::string& path, + size_t size_hint) { - int result = HASH_SOURCE_CODE_OK; + const bool check_temporal_macros = + !ctx.config.sloppiness().is_enabled(core::Sloppy::time_macros); + int result = + do_hash_file(ctx, digest, path, size_hint, check_temporal_macros); - // Check for __DATE__, __TIME__ and __TIMESTAMP__if the sloppiness - // configuration tells us we should. - if (!(ctx.config.sloppiness().is_enabled(core::Sloppy::time_macros))) { - result |= check_for_temporal_macros(str); + if (!check_temporal_macros || result == HASH_SOURCE_CODE_OK + || (result & HASH_SOURCE_CODE_ERROR)) { + return result; } - // Hash the source string. - hash.hash(str); + if (result & HASH_SOURCE_CODE_FOUND_TIME) { + // We don't know for sure that the program actually uses the __TIME__ macro, + // but we have to assume it anyway and hash the time stamp. However, that's + // not very useful since the chance that we get a cache hit later the same + // second should be quite slim... So, just signal back to the caller that + // __TIME__ has been found so that the direct mode can be disabled. + LOG("Found __TIME__ in {}", path); + return result; + } + + // __DATE__ or __TIMESTAMP__ found. We now make sure that the digest changes + // if the (potential) expansion of those macros changes by computing a new + // digest comprising the file digest and time information that represents the + // macro expansions. + + Hash hash; + hash.hash(digest.to_string()); if (result & HASH_SOURCE_CODE_FOUND_DATE) { LOG("Found __DATE__ in {}", path); - // Make sure that the hash sum changes if the (potential) expansion of - // __DATE__ changes. hash.hash_delimiter("date"); auto now = Util::localtime(); if (!now) { @@ -270,20 +290,10 @@ hash_source_code_string(const Context& ctx, hash.hash(source_date_epoch); } } - if (result & HASH_SOURCE_CODE_FOUND_TIME) { - // We don't know for sure that the program actually uses the __TIME__ macro, - // but we have to assume it anyway and hash the time stamp. However, that's - // not very useful since the chance that we get a cache hit later the same - // second should be quite slim... So, just signal back to the caller that - // __TIME__ has been found so that the direct mode can be disabled. - LOG("Found __TIME__ in {}", path); - } if (result & HASH_SOURCE_CODE_FOUND_TIMESTAMP) { LOG("Found __TIMESTAMP__ in {}", path); - // Make sure that the hash sum changes if the (potential) expansion of - // __TIMESTAMP__ changes. const auto stat = Stat::stat(path); if (!stat) { return HASH_SOURCE_CODE_ERROR; @@ -306,74 +316,29 @@ hash_source_code_string(const Context& ctx, hash.hash(timestamp); } + digest = hash.digest(); return result; } -int -hash_source_code_file(const Context& ctx, - Hash& hash, - const std::string& path, - size_t size_hint) +bool +hash_binary_file(const Context& ctx, + Digest& digest, + const std::string& path, + size_t size_hint) { -#ifdef INODE_CACHE_SUPPORTED - if (!ctx.config.inode_cache()) { -#endif - return hash_source_code_file_nocache( - ctx, hash, path, size_hint, Util::is_precompiled_header(path)); - -#ifdef INODE_CACHE_SUPPORTED - } - - // Reusable file hashes must be independent of the outer context. Thus hash - // files separately so that digests based on file contents can be reused. Then - // add the digest into the outer hash instead. - InodeCache::ContentType content_type = get_content_type(ctx.config, path); - Digest digest; - int return_value; - if (!ctx.inode_cache.get(path, content_type, digest, &return_value)) { - Hash file_hash; - return_value = hash_source_code_file_nocache( - ctx, - file_hash, - path, - size_hint, - content_type == InodeCache::ContentType::precompiled_header); - if (return_value == HASH_SOURCE_CODE_ERROR) { - return HASH_SOURCE_CODE_ERROR; - } - digest = file_hash.digest(); - ctx.inode_cache.put(path, content_type, digest, return_value); - } - hash.hash(digest.bytes(), Digest::size(), Hash::HashType::binary); - return return_value; -#endif + return do_hash_file(ctx, digest, path, size_hint, false) + == HASH_SOURCE_CODE_OK; } bool hash_binary_file(const Context& ctx, Hash& hash, const std::string& path) { - if (!ctx.config.inode_cache()) { - return hash.hash_file(path); - } - -#ifdef INODE_CACHE_SUPPORTED - // Reusable file hashes must be independent of the outer context. Thus hash - // files separately so that digests based on file contents can be reused. Then - // add the digest into the outer hash instead. Digest digest; - if (!ctx.inode_cache.get(path, InodeCache::ContentType::binary, digest)) { - Hash file_hash; - if (!file_hash.hash_file(path)) { - return false; - } - digest = file_hash.digest(); - ctx.inode_cache.put(path, InodeCache::ContentType::binary, digest); + const bool success = hash_binary_file(ctx, digest, path); + if (success) { + hash.hash(digest.to_string()); } - hash.hash(digest.bytes(), Digest::size(), Hash::HashType::binary); - return true; -#else - return hash.hash_file(path); -#endif + return success; } bool diff --git a/src/hashutil.hpp b/src/hashutil.hpp index 01ee8e6ba..d0fd1420a 100644 --- a/src/hashutil.hpp +++ b/src/hashutil.hpp @@ -1,4 +1,4 @@ -// Copyright (C) 2009-2021 Joel Rosdahl and other contributors +// Copyright (C) 2009-2022 Joel Rosdahl and other contributors // // See doc/AUTHORS.adoc for a complete list of contributors. // @@ -24,6 +24,7 @@ class Config; class Context; +class Digest; class Hash; const int HASH_SOURCE_CODE_OK = 0; @@ -40,20 +41,24 @@ const int HASH_SOURCE_CODE_FOUND_TIMESTAMP = (1 << 3); // appropriately. int check_for_temporal_macros(std::string_view str); -// Hash a string. Returns a bitmask of HASH_SOURCE_CODE_* results. -int hash_source_code_string(const Context& ctx, - Hash& hash, - std::string_view str, - const std::string& path); - -// Hash a file ignoring comments. Returns a bitmask of HASH_SOURCE_CODE_* -// results. +// Hash a source code file using the inode cache if enabled. Returns a bitmask +// of HASH_SOURCE_CODE_* results. int hash_source_code_file(const Context& ctx, - Hash& hash, + Digest& digest, const std::string& path, size_t size_hint = 0); -// Hash a binary file using the inode cache if enabled. +// Hash a binary file (using the inode cache if enabled) and put its digest in +// `digest` +// +// Returns true on success, otherwise false. +bool hash_binary_file(const Context& ctx, + Digest& digest, + const std::string& path, + size_t size_hint = 0); + +// Hash a binary file (using the inode cache if enabled) and hash the digest to +// `hash`. // // Returns true on success, otherwise false. bool hash_binary_file(const Context& ctx, Hash& hash, const std::string& path); diff --git a/unittest/test_InodeCache.cpp b/unittest/test_InodeCache.cpp index 1c8a15d6a..a7fe02fb4 100644 --- a/unittest/test_InodeCache.cpp +++ b/unittest/test_InodeCache.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Joel Rosdahl and other contributors +// Copyright (C) 2020-2022 Joel Rosdahl and other contributors // // See doc/AUTHORS.adoc for a complete list of contributors. // @@ -43,10 +43,11 @@ put(const Context& ctx, const std::string& str, int return_value) { - return ctx.inode_cache.put(filename, - InodeCache::ContentType::code, - Hash().hash(str).digest(), - return_value); + return ctx.inode_cache.put( + filename, + InodeCache::ContentType::checked_for_temporal_macros, + Hash().hash(str).digest(), + return_value); } } // namespace @@ -64,10 +65,16 @@ TEST_CASE("Test disabled") Digest digest; int return_value; - CHECK(!ctx.inode_cache.get( - "a", InodeCache::ContentType::code, digest, &return_value)); - CHECK(!ctx.inode_cache.put( - "a", InodeCache::ContentType::code, digest, return_value)); + CHECK( + !ctx.inode_cache.get("a", + InodeCache::ContentType::checked_for_temporal_macros, + digest, + &return_value)); + CHECK( + !ctx.inode_cache.put("a", + InodeCache::ContentType::checked_for_temporal_macros, + digest, + return_value)); CHECK(ctx.inode_cache.get_hits() == -1); CHECK(ctx.inode_cache.get_misses() == -1); CHECK(ctx.inode_cache.get_errors() == -1); @@ -85,8 +92,11 @@ TEST_CASE("Test lookup nonexistent") Digest digest; int return_value; - CHECK(!ctx.inode_cache.get( - "a", InodeCache::ContentType::code, digest, &return_value)); + CHECK( + !ctx.inode_cache.get("a", + InodeCache::ContentType::checked_for_temporal_macros, + digest, + &return_value)); CHECK(ctx.inode_cache.get_hits() == 0); CHECK(ctx.inode_cache.get_misses() == 1); CHECK(ctx.inode_cache.get_errors() == 0); @@ -106,8 +116,11 @@ TEST_CASE("Test put and lookup") Digest digest; int return_value; - CHECK(ctx.inode_cache.get( - "a", InodeCache::ContentType::code, digest, &return_value)); + CHECK( + ctx.inode_cache.get("a", + InodeCache::ContentType::checked_for_temporal_macros, + digest, + &return_value)); CHECK(digest == Hash().hash("a text").digest()); CHECK(return_value == 1); CHECK(ctx.inode_cache.get_hits() == 1); @@ -116,16 +129,22 @@ TEST_CASE("Test put and lookup") Util::write_file("a", "something else"); - CHECK(!ctx.inode_cache.get( - "a", InodeCache::ContentType::code, digest, &return_value)); + CHECK( + !ctx.inode_cache.get("a", + InodeCache::ContentType::checked_for_temporal_macros, + digest, + &return_value)); CHECK(ctx.inode_cache.get_hits() == 1); CHECK(ctx.inode_cache.get_misses() == 1); CHECK(ctx.inode_cache.get_errors() == 0); CHECK(put(ctx, "a", "something else", 2)); - CHECK(ctx.inode_cache.get( - "a", InodeCache::ContentType::code, digest, &return_value)); + CHECK( + ctx.inode_cache.get("a", + InodeCache::ContentType::checked_for_temporal_macros, + digest, + &return_value)); CHECK(digest == Hash().hash("something else").digest()); CHECK(return_value == 2); CHECK(ctx.inode_cache.get_hits() == 2); @@ -142,7 +161,7 @@ TEST_CASE("Drop file") Digest digest; - ctx.inode_cache.get("a", InodeCache::ContentType::binary, digest); + ctx.inode_cache.get("a", InodeCache::ContentType::raw, digest); CHECK(Stat::stat(ctx.inode_cache.get_file())); CHECK(ctx.inode_cache.drop()); CHECK(!Stat::stat(ctx.inode_cache.get_file())); @@ -159,39 +178,27 @@ TEST_CASE("Test content type") Util::write_file("a", "a text"); Digest binary_digest = Hash().hash("binary").digest(); Digest code_digest = Hash().hash("code").digest(); - Digest code_with_sloppy_time_macros_digest = - Hash().hash("sloppy_time_macros").digest(); - CHECK(ctx.inode_cache.put( - "a", InodeCache::ContentType::binary, binary_digest, 1)); CHECK( - ctx.inode_cache.put("a", InodeCache::ContentType::code, code_digest, 2)); - CHECK( - ctx.inode_cache.put("a", - InodeCache::ContentType::code_with_sloppy_time_macros, - code_with_sloppy_time_macros_digest, - 3)); + ctx.inode_cache.put("a", InodeCache::ContentType::raw, binary_digest, 1)); + CHECK(ctx.inode_cache.put( + "a", InodeCache::ContentType::checked_for_temporal_macros, code_digest, 2)); Digest digest; int return_value; CHECK(ctx.inode_cache.get( - "a", InodeCache::ContentType::binary, digest, &return_value)); + "a", InodeCache::ContentType::raw, digest, &return_value)); CHECK(digest == binary_digest); CHECK(return_value == 1); - CHECK(ctx.inode_cache.get( - "a", InodeCache::ContentType::code, digest, &return_value)); - CHECK(digest == code_digest); - CHECK(return_value == 2); - CHECK( ctx.inode_cache.get("a", - InodeCache::ContentType::code_with_sloppy_time_macros, + InodeCache::ContentType::checked_for_temporal_macros, digest, &return_value)); - CHECK(digest == code_with_sloppy_time_macros_digest); - CHECK(return_value == 3); + CHECK(digest == code_digest); + CHECK(return_value == 2); } TEST_SUITE_END();