From: Erik Johansson Date: Sun, 17 Nov 2019 20:31:49 +0000 (+0100) Subject: Add handling of __TIMESTAMP__ macro (#488) X-Git-Tag: v4.0~705 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=ffa82dd89a145b237aa8ca1ee2aab8687387eec0;p=thirdparty%2Fccache.git Add handling of __TIMESTAMP__ macro (#488) __TIMESTAMP__ is expanded by the pre-processor to asctime() of the source file's modification time. Handle __TIMESTAMP__ similar to __DATE__ and include asctime(modification time) in the hash when __TIMESTAMP__ is detected in the source code. --- diff --git a/configure.ac b/configure.ac index bf5d1b945..8a00df486 100644 --- a/configure.ac +++ b/configure.ac @@ -118,6 +118,7 @@ AC_CHECK_HEADERS(sys/ioctl.h) AC_CHECK_HEADERS(linux/fs.h) AC_CHECK_HEADERS(sys/clonefile.h) +AC_CHECK_FUNCS(asctime_r) AC_CHECK_FUNCS(gethostname) AC_CHECK_FUNCS(getopt_long) AC_CHECK_FUNCS(getpwuid) diff --git a/doc/MANUAL.adoc b/doc/MANUAL.adoc index 9c3ac4577..e4af9982d 100644 --- a/doc/MANUAL.adoc +++ b/doc/MANUAL.adoc @@ -676,7 +676,8 @@ still has to do _some_ preprocessing (like macros). With this option set, ccache will only include system headers in the hash but not add the system header files to the list of include files. *time_macros*:: - Ignore `__DATE__` and `__TIME__` being present in the source code. + Ignore `__DATE__`, `__TIME__` and `__TIMESTAMP__` being present in the + source code. -- + See the discussion under <<_troubleshooting,TROUBLESHOOTING>> for more @@ -1172,10 +1173,10 @@ ccache has support for GCC's precompiled headers. However, you have to do some things to make it work properly: * You must set <> to *pch_defines,time_macros*. - The reason is that ccache can't tell whether `__TIME__` or `__DATE__` is used - when using a precompiled header. Further, it can't detect changes in - **#define**s in the source code because of how preprocessing works in - combination with precompiled headers. + The reason is that ccache can't tell whether `__TIME__`, `__DATE__` or + `__TIMESTAMP__` is used when using a precompiled header. Further, it can't + detect changes in **#define**s in the source code because of how + preprocessing works in combination with precompiled headers. * You must either: + -- @@ -1379,6 +1380,15 @@ problems and what may be done to increase the hit rate: output. If you know that `__DATE__` isn't used in practise, or don't care if ccache produces objects where `__DATE__` is expanded to something in the past, you can set <> to *time_macros*. +** The `__TIMESTAMP__` preprocessor macro is (potentially) being used and the + source file's modification time has changed. This is similar to how + `__TIME__` is handled. If `__TIMESTAMP__` is present in the source code, + ccache hashes the string representation of the source file's modification + time in order to be able to produce the correct object file if the + `__TIMESTAMP__` macro affects the output. If you know that `__TIMESTAMP__` + isn't used in practise, or don't care if ccache produces objects where + `__TIMESTAMP__` is expanded to something in the past, you can set + <> to *time_macros*. ** The input file path has changed. ccache includes the input file path in the direct mode hash to be able to take relative include files into account and to produce a correct object file if the source code includes a `__FILE__` @@ -1387,9 +1397,9 @@ problems and what may be done to increase the hit rate: compiled and cached before, ccache has either detected that something has changed anyway or a cleanup has been performed (either explicitly or implicitly when a cache limit has been reached). Some perhaps unobvious - things that may result in a cache miss are usage of `__TIME__` or - `__DATE__` macros, or use of automatically generated code that contains a - timestamp, build counter or other volatile information. + things that may result in a cache miss are usage of `__TIME__`, `__DATE__` or + `__TIMESTAMP__` macros, or use of automatically generated code that contains + a timestamp, build counter or other volatile information. * If ``multiple source files'' has been incremented, it's an indication that the compiler has been invoked on several source code files at once. ccache doesn't support that. Compile the source code files separately if possible. diff --git a/src/hashutil.cpp b/src/hashutil.cpp index 591965d02..a091225d7 100644 --- a/src/hashutil.cpp +++ b/src/hashutil.cpp @@ -18,6 +18,7 @@ #include "hashutil.hpp" +#include "Stat.hpp" #include "ccache.hpp" #include "macroskip.hpp" @@ -48,6 +49,42 @@ hash_from_int(int i) return XXH64(&i, sizeof(int), 0); } +// Returns one of HASH_SOURCE_CODE_FOUND_DATE, HASH_SOURCE_CODE_FOUND_TIME or +// HASH_SOURCE_CODE_FOUND_TIMESTAMP if "_DATE__", "_TIME__" or "_TIMESTAMP__" +// starts at str[pos]. +// +// Pre-condition: str[pos - 1] == '_' +static int +check_for_temporal_macros_helper(const char* str, size_t len, size_t pos) +{ + if (pos + 7 > len) { + return 0; + } + + int found = 0; + int macro_len = 7; + if (memcmp(str + pos, "_DATE__", 7) == 0) { + found = HASH_SOURCE_CODE_FOUND_DATE; + } else if (memcmp(str + pos, "_TIME__", 7) == 0) { + found = HASH_SOURCE_CODE_FOUND_TIME; + } else if (pos + 12 <= len && memcmp(str + pos, "_TIMESTAMP__", 12) == 0) { + found = HASH_SOURCE_CODE_FOUND_TIMESTAMP; + macro_len = 12; + } else { + return 0; + } + + // Check char before and after macro to verify that the found macro isn't + // part of another identifier. + if ((pos == 1 || (str[pos - 2] != '_' && !isalnum(str[pos - 2]))) + && (pos + macro_len == len + || (str[pos + macro_len] != '_' && !isalnum(str[pos + macro_len])))) { + return found; + } + + return 0; +} + static int check_for_temporal_macros_bmh(const char* str, size_t len) { @@ -59,20 +96,11 @@ check_for_temporal_macros_bmh(const char* str, size_t len) size_t i = 7; while (i < len) { - // Check whether the substring ending at str[i] has the form "__...E__". On + // Check whether the substring ending at str[i] has the form "_....E..". On // the assumption that 'E' is less common in source than '_', we check // str[i-2] first. - if (str[i - 2] == 'E' && str[i - 0] == '_' && str[i - 7] == '_' - && str[i - 1] == '_' && str[i - 6] == '_' - && (i < 8 || (str[i - 8] != '_' && !isalnum(str[i - 8]))) - && (i + 1 >= len || (str[i + 1] != '_' && !isalnum(str[i + 1])))) { - // Check the remaining characters to see if the substring is "__DATE__" - // or "__TIME__". - if (str[i - 5] == 'D' && str[i - 4] == 'A' && str[i - 3] == 'T') { - result |= HASH_SOURCE_CODE_FOUND_DATE; - } else if (str[i - 5] == 'T' && str[i - 4] == 'I' && str[i - 3] == 'M') { - result |= HASH_SOURCE_CODE_FOUND_TIME; - } + if (str[i - 2] == 'E' && str[i - 7] == '_') { + result |= check_for_temporal_macros_helper(str, len, i - 6); } // macro_skip tells us how far we can skip forward upon seeing str[i] at @@ -87,8 +115,8 @@ check_for_temporal_macros_bmh(const char* str, size_t len) static int check_for_temporal_macros_avx2(const char* str, size_t len) __attribute__((target("avx2"))); -// The following algorithm, which uses AVX2 instructions to find __DATE__ and -// __TIME__, is heavily inspired by +// The following algorithm, which uses AVX2 instructions to find __DATE__, +// __TIME__ and __TIMESTAMP__, is heavily inspired by // . static int check_for_temporal_macros_avx2(const char* str, size_t len) @@ -102,7 +130,8 @@ check_for_temporal_macros_avx2(const char* str, size_t len) size_t pos = 0; for (; pos + 5 + 32 <= len; pos += 32) { // Load 32 bytes from the current position in the input string, with - // block_last being offset 5 bytes (i.e. the offset of 'E' in both macros). + // block_last being offset 5 bytes (i.e. the offset of 'E' in all three + // macros). const __m256i block_first = _mm256_loadu_si256(reinterpret_cast(str + pos)); const __m256i block_last = @@ -125,26 +154,7 @@ check_for_temporal_macros_avx2(const char* str, size_t len) // Clear the least significant bit set. mask = mask & (mask - 1); - if (start + 7 > len) { - break; - } - - int found = 0; - if (memcmp(str + start, "_DATE__", 7) == 0) { - found = HASH_SOURCE_CODE_FOUND_DATE; - } else if (memcmp(str + start, "_TIME__", 7) == 0) { - found = HASH_SOURCE_CODE_FOUND_TIME; - } else { - continue; - } - - // Check char before and after macro to verify that the found macro isn't - // part of another identifier. - if ((start == 1 || (str[start - 2] != '_' && !isalnum(str[start - 2]))) - && (start + 7 == len - || (str[start + 7] != '_' && !isalnum(str[start + 7])))) { - result |= found; - } + result |= check_for_temporal_macros_helper(str, len, start); } } @@ -154,10 +164,11 @@ check_for_temporal_macros_avx2(const char* str, size_t len) } #endif -// Search for the strings "__DATE__" and "__TIME__" in str. +// Search for the strings "__DATE__", "__TIME__" and "__TIMESTAMP__" in str. // -// Returns a bitmask with HASH_SOURCE_CODE_FOUND_DATE and -// HASH_SOURCE_CODE_FOUND_TIME set appropriately. +// Returns a bitmask with HASH_SOURCE_CODE_FOUND_DATE, +// HASH_SOURCE_CODE_FOUND_TIME and HASH_SOURCE_CODE_FOUND_TIMESTAMP set +// appropriately. int check_for_temporal_macros(const char* str, size_t len) { @@ -179,8 +190,8 @@ hash_source_code_string(const Config& config, { int result = HASH_SOURCE_CODE_OK; - // Check for __DATE__ and __TIME__ if the sloppiness configuration tells us - // we should. + // Check for __DATE__, __TIME__ and __TIMESTAMP__if the sloppiness + // configuration tells us we should. if (!(config.sloppiness() & SLOPPY_TIME_MACROS)) { result |= check_for_temporal_macros(str, len); } @@ -212,6 +223,34 @@ hash_source_code_string(const Config& config, // disabled. cc_log("Found __TIME__ in %s", path); } + if (result & HASH_SOURCE_CODE_FOUND_TIMESTAMP) { + cc_log("Found __TIMESTAMP__ in %s", path); + + // Make sure that the hash sum changes if the (potential) expansion of + // __TIMESTAMP__ changes. + const auto stat = Stat::stat(path); + if (!stat) { + return HASH_SOURCE_CODE_ERROR; + } + + time_t t = stat.mtime(); + tm modified; + hash_delimiter(hash, "timestamp"); + if (!localtime_r(&t, &modified)) { + return HASH_SOURCE_CODE_ERROR; + } + +#ifdef HAVE_ASCTIME_R + char buffer[26]; + auto timestamp = asctime_r(&modified, buffer); +#else + auto timestamp = asctime(&modified); +#endif + if (!timestamp) { + return HASH_SOURCE_CODE_ERROR; + } + hash_string(hash, timestamp); + } return result; } diff --git a/src/hashutil.hpp b/src/hashutil.hpp index 87a49b81d..ef3bb03ff 100644 --- a/src/hashutil.hpp +++ b/src/hashutil.hpp @@ -31,6 +31,7 @@ unsigned hash_from_int(int i); #define HASH_SOURCE_CODE_ERROR 1 #define HASH_SOURCE_CODE_FOUND_DATE 2 #define HASH_SOURCE_CODE_FOUND_TIME 4 +#define HASH_SOURCE_CODE_FOUND_TIMESTAMP 8 int check_for_temporal_macros(const char* str, size_t len); int hash_source_code_string(const Config& config, diff --git a/src/macroskip.hpp b/src/macroskip.hpp index 6c5926a1d..de755b33c 100644 --- a/src/macroskip.hpp +++ b/src/macroskip.hpp @@ -23,9 +23,9 @@ #include // A Boyer-Moore-Horspool skip table used for searching for the strings -// "__TIME__" and "__DATE__". +// "__TIME__", "__DATE__" and "__TIMESTAMP__". // -// macro_skip[c] = 8 for all c not in "__TIME__" and "__DATE__". +// macro_skip[c] = 8 for all c not in "__TIME__", "__DATE__" and "__TIMEST". // // The other characters map as follows: // @@ -36,6 +36,7 @@ // I -> 4 // M -> 3 // T -> 3 +// S -> 1 // // // This was generated with the following Python script: @@ -46,6 +47,7 @@ // 'E': 2, // 'I': 4, // 'M': 3, +// 'S': 1, // 'T': 3} // // for i in range(0, 256): @@ -62,7 +64,7 @@ static const uint32_t macro_skip[] = { 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 4, 8, 8, 5, 2, 8, 8, 8, 4, 8, 8, 8, 3, - 8, 8, 8, 8, 8, 8, 3, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 1, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 1, 3, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 1, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, diff --git a/unittest/test_hashutil.cpp b/unittest/test_hashutil.cpp index fae57f53c..2cb6c0598 100644 --- a/unittest/test_hashutil.cpp +++ b/unittest/test_hashutil.cpp @@ -178,6 +178,14 @@ TEST(check_for_temporal_macros) "int ab;\n"; const char date_end[] = "#define ab __DATE__"; + const char timestamp_start[] = + "__TIMESTAMP__\n" + "int c;\n"; + const char timestamp_middle[] = + "#define c __TIMESTAMP__\n" + "int c;\n"; + const char timestamp_end[] = "#define c __TIMESTAMP__"; + const char no_temporal[] = "#define ab a__DATE__\n" "#define ab __DATE__a\n" @@ -237,6 +245,35 @@ TEST(check_for_temporal_macros) CHECK(check_for_temporal_macros(date_end + sizeof(date_end) - 9, 9)); CHECK(!check_for_temporal_macros(date_end + sizeof(date_end) - 8, 8)); + CHECK(check_for_temporal_macros(timestamp_start + 0, + sizeof(timestamp_start) - 0)); + CHECK(!check_for_temporal_macros(timestamp_start + 1, + sizeof(timestamp_start) - 1)); + + CHECK(check_for_temporal_macros(timestamp_middle + 0, + sizeof(timestamp_middle) - 0)); + CHECK(check_for_temporal_macros(timestamp_middle + 1, + sizeof(timestamp_middle) - 1)); + CHECK(check_for_temporal_macros(timestamp_middle + 2, + sizeof(timestamp_middle) - 2)); + CHECK(check_for_temporal_macros(timestamp_middle + 3, + sizeof(timestamp_middle) - 3)); + CHECK(check_for_temporal_macros(timestamp_middle + 4, + sizeof(timestamp_middle) - 4)); + CHECK(check_for_temporal_macros(timestamp_middle + 5, + sizeof(timestamp_middle) - 5)); + CHECK(check_for_temporal_macros(timestamp_middle + 6, + sizeof(timestamp_middle) - 6)); + CHECK(check_for_temporal_macros(timestamp_middle + 7, + sizeof(timestamp_middle) - 7)); + + CHECK( + check_for_temporal_macros(timestamp_end + 0, sizeof(timestamp_end) - 0)); + CHECK( + check_for_temporal_macros(timestamp_end + sizeof(timestamp_end) - 14, 14)); + CHECK( + !check_for_temporal_macros(timestamp_end + sizeof(timestamp_end) - 13, 13)); + CHECK(!check_for_temporal_macros(no_temporal + 0, sizeof(no_temporal) - 0)); CHECK(!check_for_temporal_macros(no_temporal + 1, sizeof(no_temporal) - 1)); CHECK(!check_for_temporal_macros(no_temporal + 2, sizeof(no_temporal) - 2));