From: Joel Rosdahl Date: Tue, 27 Apr 2010 20:43:51 +0000 (+0200) Subject: Handle __{DATE,FILE,TIME}__ macros in a better way X-Git-Tag: v3.0pre1~47 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=3f26134a072a75437cd0847f161b48e80b6f4ddb;p=thirdparty%2Fccache.git Handle __{DATE,FILE,TIME}__ macros in a better way --- diff --git a/ccache.c b/ccache.c index caa741e89..ea25a2778 100644 --- a/ccache.c +++ b/ccache.c @@ -203,11 +203,6 @@ enum fromcache_call_mode { FROMCACHE_COMPILED_MODE }; -enum findhash_call_mode { - FINDHASH_DIRECT_MODE, - FINDHASH_CPP_MODE -}; - /* * This is a string that identifies the current "version" of the hash sum * computed by ccache. If, for any reason, we want to force the hash sum to be @@ -290,9 +285,7 @@ static char *get_path_in_cache(const char *name, const char *suffix) /* * This function hashes an include file and stores the path and hash in the - * global included_files variable. It also checks if the include file contains - * __DATE__/__FILE__/__TIME__ macros, in which case the file is not stored and - * direct mode is disabled. Takes over ownership of path. + * global included_files variable. Takes over ownership of path. */ static void remember_include_file(char *path, size_t path_len) { @@ -301,7 +294,7 @@ static void remember_include_file(char *path, size_t path_len) struct stat st; int fd = -1; char *data = (char *)-1; - enum hash_source_code_result result; + int result; if (!included_files) { goto ignore; @@ -348,14 +341,9 @@ static void remember_include_file(char *path, size_t path_len) } hash_start(&fhash); - result = hash_source_code_string(&fhash, data, st.st_size, 1); - switch (result) { - case HASH_SOURCE_CODE_OK: - break; - case HASH_SOURCE_CODE_FOUND_VOLATILE_MACRO: - cc_log("Found __DATE__/__FILE__/__TIME__ macro in %s", path); - /* Fall through. */ - case HASH_SOURCE_CODE_ERROR: + result = hash_source_code_string(&fhash, data, st.st_size, path); + if (result & HASH_SOURCE_CODE_ERROR + || result & HASH_SOURCE_CODE_FOUND_TIME) { goto failure; } @@ -845,7 +833,7 @@ static struct file_hash *calculate_object_hash( int i; char *manifest_name; struct stat st; - enum hash_source_code_result result; + int result; struct file_hash *object_hash = NULL; /* first the arguments */ @@ -905,19 +893,22 @@ static struct file_hash *calculate_object_hash( } if (direct_mode) { - result = hash_source_code_file(hash, input_file, 1); - switch (result) { - case HASH_SOURCE_CODE_OK: - break; - case HASH_SOURCE_CODE_ERROR: + /* + * The source code file or an include file may contain + * __FILE__, so make sure that the hash is unique for the file + * name. + */ + hash_string(hash, input_file); + hash_delimiter(hash); + + result = hash_source_code_file(hash, input_file); + if (result & HASH_SOURCE_CODE_ERROR) { failed(); - break; - case HASH_SOURCE_CODE_FOUND_VOLATILE_MACRO: - cc_log("Found __DATE__/__FILE__/__TIME__ macro in %s", - input_file); + } + if (result & HASH_SOURCE_CODE_FOUND_TIME) { cc_log("Disabling direct mode"); enable_direct = 0; - break; + return NULL; } manifest_name = hash_result(hash); manifest_path = get_path_in_cache(manifest_name, ".manifest"); @@ -1575,10 +1566,10 @@ static void ccache(int argc, char *argv[]) struct mdfour direct_hash; struct mdfour cpp_hash; - /* Argument list to be sent to the preprocessor (except -E). */ + /* Arguments (except -E) to send to the preprocessor. */ ARGS *preprocessor_args; - /* Argument list to be sent to the real compiler. */ + /* Arguments to send to the real compiler. */ ARGS *compiler_args; t = time(NULL); diff --git a/ccache.txt b/ccache.txt index f6cd53158..4dce9ca6a 100644 --- a/ccache.txt +++ b/ccache.txt @@ -397,8 +397,8 @@ The direct mode will be disabled if any of the following holds: * the unifier is enabled (the environment variable *CCACHE_UNIFY* is set) * a *-Wp,_X_* compiler option other than *-Wp,-MD,_path_* and *-Wp,-MMD,_path_* is used -* a *\_\_DATE\_\_*, *\_\_FILE\_\_* or *\_\_TIME__* macro is used in the source - code +* the string ``\_\_TIME__'' is present outside comments and string literals in + the source code THE PREPROCESSOR MODE @@ -429,8 +429,9 @@ contain absolute paths: similar compiler options. * Paths specified by compiler options (such as *-I*, *-MF*, etc) may be absolute. -* The source code file path may be absolute, and that path may be included in - warnings emitted to standard error by the preprocessor. +* The source code file path may be absolute, and that path may substituted for + *\_\_FILE__* macros in the source code or included in warnings emitted to + standard error by the preprocessor. This means that if you compile the same code in different locations, you can't share compilation results between the different build directories since you get @@ -444,11 +445,15 @@ directory that is a parent of your build directories. (Don't use +/+ as the base directory since that will make ccache also rewrite paths to system header files, which doesn't gain anything.) -The only drawback of using *CCACHE_BASEDIR* is that if you compile with *-g* -and use absolute paths to the source code files, the source code paths that are -stored in the object files may point to the wrong directory, which may prevent -debuggers like GDB from finding the source code. Sometimes, a work-around is to -change the directory explicitly with the ``cd'' command in GDB. +The drawbacks of using *CCACHE_BASEDIR* are: + +* If you specify an absolute path to the source code file, *\_\_FILE__* macros + will be expanded to a relative path instead. +* If you specify an absolute path to the source code file and compile with + *-g*, the source code path stored in the object file may point to the wrong + directory, which may prevent debuggers like GDB from finding the source code. + Sometimes, a work-around is to change the directory explicitly with the + ``cd'' command in GDB. SHARING A CACHE diff --git a/hashutil.c b/hashutil.c index 6f012962a..b7407ac1b 100644 --- a/hashutil.c +++ b/hashutil.c @@ -23,6 +23,7 @@ #include #include #include +#include unsigned int hash_from_string(void *str) { @@ -51,19 +52,18 @@ int file_hashes_equal(struct file_hash *fh1, struct file_hash *fh2) } while (0) /* - * Hash a string ignoring comments. If check_volatile_macros is true, also - * check for volatile preprocessor macros (__{DATE,FILE,TIME}__) and, if found, - * stop hashing. + * Hash a string ignoring comments. Returns a bitmask of HASH_SOURCE_CODE_* + * results. */ -enum hash_source_code_result +int hash_source_code_string( - struct mdfour *hash, const char *str, size_t len, - int check_volatile_macros) + struct mdfour *hash, const char *str, size_t len, const char *path) { const char *p; const char *end; char hashbuf[64]; size_t hashbuflen = 0; + int result = HASH_SOURCE_CODE_OK; p = str; end = str + len; @@ -124,15 +124,27 @@ hash_source_code_string( /* Potential start of volatile macro. */ case '_': - if (check_volatile_macros - && p + 7 < end - && p[1] == '_' - && p[6] == '_' - && p[7] == '_' - && (strncmp(p + 2, "DATE", 4) == 0 - || strncmp(p + 2, "FILE", 4) == 0 - || strncmp(p + 2, "TIME", 4) == 0)) { - return HASH_SOURCE_CODE_FOUND_VOLATILE_MACRO; + if (p + 7 < end + && p[1] == '_' && p[5] == 'E' + && p[6] == '_' && p[7] == '_') { + if (p[2] == 'D' && p[3] == 'A' + && p[4] == 'T') { + result |= HASH_SOURCE_CODE_FOUND_DATE; + } else if (p[2] == 'T' && p[3] == 'I' + && p[4] == 'M') { + result |= HASH_SOURCE_CODE_FOUND_TIME; + } + /* + * Of course, we can't be sure that we have + * found a __{DATE,TIME}__ that's actually + * used, but better safe than sorry. And if you + * do something like + * + * #define TIME __TI ## ME__ + * + * in your code, you deserve to get a false + * cache hit. + */ } break; @@ -146,21 +158,47 @@ hash_source_code_string( end: hash_buffer(hash, hashbuf, hashbuflen); - return HASH_SOURCE_CODE_OK; + + if (result & HASH_SOURCE_CODE_FOUND_DATE) { + /* + * Make sure that the hash sum changes if the (potential) + * expansion of __DATE__ changes. + */ + cc_log("Found __DATE__ in %s", path); + time_t t = time(NULL); + struct tm *now = localtime(&t); + hash_delimiter(hash); + hash_buffer(hash, &now->tm_year, sizeof(now->tm_year)); + hash_buffer(hash, &now->tm_mon, sizeof(now->tm_mon)); + hash_buffer(hash, &now->tm_mday, sizeof(now->tm_mday)); + } + if (result & HASH_SOURCE_CODE_FOUND_TIME) { + /* + * We don't know for sure that the program actually uses the + * __TIME__ macro, but we have to assume it anyway and hash the + * time stamp. However, that's not very useful since the chance + * that we get a cache hit later the same second should be + * quite slim... So, just signal back to the caller that + * __TIME__ has been found so that the direct mode can be + * disabled. + */ + cc_log("Found __TIME__ in %s", path); + } + + return result; } /* - * Add contents of a source code file to a hash, but don't hash comments. + * Hash a file ignoring comments. Returns a bitmask of HASH_SOURCE_CODE_* + * results. */ -enum hash_source_code_result -hash_source_code_file( - struct mdfour *hash, const char *path, - int check_volatile_macros) +int +hash_source_code_file(struct mdfour *hash, const char *path) { int fd; struct stat st; char *data; - enum hash_source_code_result result; + int result; fd = open(path, O_RDONLY|O_BINARY); if (fd == -1) { @@ -183,8 +221,7 @@ hash_source_code_file( return HASH_SOURCE_CODE_ERROR; } - result = hash_source_code_string( - hash, data, st.st_size, check_volatile_macros); + result = hash_source_code_string(hash, data, st.st_size, path); munmap(data, st.st_size); return result; } diff --git a/hashutil.h b/hashutil.h index 96f11fc3c..85349c607 100644 --- a/hashutil.h +++ b/hashutil.h @@ -14,19 +14,13 @@ unsigned int hash_from_string(void *str); int strings_equal(void *str1, void *str2); int file_hashes_equal(struct file_hash *fh1, struct file_hash *fh2); -enum hash_source_code_result { - HASH_SOURCE_CODE_OK, - HASH_SOURCE_CODE_ERROR, - HASH_SOURCE_CODE_FOUND_VOLATILE_MACRO -}; +#define HASH_SOURCE_CODE_OK 0 +#define HASH_SOURCE_CODE_ERROR 1 +#define HASH_SOURCE_CODE_FOUND_DATE 2 +#define HASH_SOURCE_CODE_FOUND_TIME 4 -enum hash_source_code_result -hash_source_code_string( - struct mdfour *hash, const char *str, size_t len, - int check_volatile_macros); -enum hash_source_code_result -hash_source_code_file( - struct mdfour *hash, const char *path, - int check_volatile_macros); +int hash_source_code_string( + struct mdfour *hash, const char *str, size_t len, const char *path); +int hash_source_code_file(struct mdfour *hash, const char *path); #endif diff --git a/manifest.c b/manifest.c index c8d1de135..24c74d743 100644 --- a/manifest.c +++ b/manifest.c @@ -363,6 +363,7 @@ static int verify_object(struct manifest *mf, struct object *obj, struct file_info *fi; struct file_hash *actual; struct mdfour hash; + int result; for (i = 0; i < obj->n_file_info_indexes; i++) { fi = &mf->file_infos[obj->file_info_indexes[i]]; @@ -370,14 +371,18 @@ static int verify_object(struct manifest *mf, struct object *obj, if (!actual) { actual = x_malloc(sizeof(*actual)); hash_start(&hash); - if (hash_source_code_file(&hash, mf->files[fi->index], - 0) - != HASH_SOURCE_CODE_OK) { + result = hash_source_code_file(&hash, + mf->files[fi->index]); + if (result & HASH_SOURCE_CODE_ERROR) { cc_log("Failed hashing %s", mf->files[fi->index]); free(actual); return 0; } + if (result & HASH_SOURCE_CODE_FOUND_TIME) { + free(actual); + return 0; + } hash_result_as_bytes(&hash, actual->hash); actual->size = hash.totalN; hashtable_insert(hashed_files, diff --git a/test.sh b/test.sh index bac6190cb..b4ac87914 100755 --- a/test.sh +++ b/test.sh @@ -792,38 +792,85 @@ EOF checkstat 'cache miss' 1 ################################################################## - # Check that direct mode is disabled if __DATE__, __FILE__ or __TIME__ - # macros are used. - for x in date file time; do - X=`echo $x | tr 'a-z' 'A-Z'` - testname="__${X}__ in source file" - $CCACHE -Cz >/dev/null - cat <$x.c -char $x[] = __${X}__; + # Check that direct mode correctly detects file name/path changes. + testname="__FILE__ in source file" + $CCACHE -Cz >/dev/null + cat <file.c +#define file __FILE__ +int test; EOF - $CCACHE $COMPILER -c $x.c - checkstat 'cache hit (direct)' 0 - checkstat 'cache hit (preprocessed)' 0 - checkstat 'cache miss' 1 - $CCACHE $COMPILER -c $x.c - checkstat 'cache hit (direct)' 0 - - testname="__${X}__ in include file" - $CCACHE -Cz >/dev/null - cat <$x.h -char $x[] = __${X}__; + $CCACHE $COMPILER -c file.c + checkstat 'cache hit (direct)' 0 + checkstat 'cache hit (preprocessed)' 0 + checkstat 'cache miss' 1 + $CCACHE $COMPILER -c file.c + checkstat 'cache hit (direct)' 1 + checkstat 'cache hit (preprocessed)' 0 + checkstat 'cache miss' 1 + $CCACHE $COMPILER -c $PWD/file.c + checkstat 'cache hit (direct)' 1 + checkstat 'cache hit (preprocessed)' 0 + checkstat 'cache miss' 2 + + testname="__FILE__ in include file" + $CCACHE -Cz >/dev/null + cat <file.h +#define file __FILE__ +int test; EOF - backdate $x.h - cat <${x}_h.c -#include "$x.h" + backdate file.h + cat <file_h.c +#include "file.h" EOF - $CCACHE $COMPILER -c ${x}_h.c - checkstat 'cache hit (direct)' 0 - checkstat 'cache hit (preprocessed)' 0 - checkstat 'cache miss' 1 - $CCACHE $COMPILER -c ${x}_h.c - checkstat 'cache hit (direct)' 0 - done + $CCACHE $COMPILER -c file_h.c + checkstat 'cache hit (direct)' 0 + checkstat 'cache hit (preprocessed)' 0 + checkstat 'cache miss' 1 + $CCACHE $COMPILER -c file_h.c + checkstat 'cache hit (direct)' 1 + checkstat 'cache hit (preprocessed)' 0 + checkstat 'cache miss' 1 + mv file_h.c file2_h.c + $CCACHE $COMPILER -c $PWD/file2_h.c + checkstat 'cache hit (direct)' 1 + checkstat 'cache hit (preprocessed)' 0 + checkstat 'cache miss' 2 + + ################################################################## + # Check that we never get direct hits when __TIME__ is used. + testname="__TIME__ in source file" + $CCACHE -Cz >/dev/null + cat <time.c +#define time __TIME__ +int test; +EOF + $CCACHE $COMPILER -c time.c + checkstat 'cache hit (direct)' 0 + checkstat 'cache hit (preprocessed)' 0 + checkstat 'cache miss' 1 + $CCACHE $COMPILER -c time.c + checkstat 'cache hit (direct)' 0 + checkstat 'cache hit (preprocessed)' 1 + checkstat 'cache miss' 1 + + testname="__TIME__ in include time" + $CCACHE -Cz >/dev/null + cat <time.h +#define time __TIME__ +int test; +EOF + backdate time.h + cat <time_h.c +#include "time.h" +EOF + $CCACHE $COMPILER -c time_h.c + checkstat 'cache hit (direct)' 0 + checkstat 'cache hit (preprocessed)' 0 + checkstat 'cache miss' 1 + $CCACHE $COMPILER -c time_h.c + checkstat 'cache hit (direct)' 0 + checkstat 'cache hit (preprocessed)' 1 + checkstat 'cache miss' 1 ################################################################## # Reset things. @@ -919,9 +966,6 @@ EOF checkstat 'cache miss' 1 cd .. - CCACHE_NODIRECT=1 - export CCACHE_NODIRECT - ################################################################## # CCACHE_BASEDIR="" is the default. testname="default CCACHE_BASEDIR" @@ -939,9 +983,7 @@ EOF testname="path normalization" cd dir1 $CCACHE -z >/dev/null - unset CCACHE_NODIRECT - CCACHE_BASEDIR=$PWD $CCACHE $COMPILER -I$PWD//include -c $PWD//.///src/test.c - export CCACHE_NODIRECT=1 + CCACHE_BASEDIR=$PWD $CCACHE $COMPILER -I$PWD//include -c $PWD//src/test.c checkstat 'cache hit (direct)' 1 checkstat 'cache hit (preprocessed)' 0 checkstat 'cache miss' 0 @@ -960,12 +1002,14 @@ EOF fi CCACHE_BASEDIR=$PWD $CCACHE $COMPILER -Wall -W -I$PWD -c $PWD/stderr.c -o $PWD/stderr.o 2>stderr.txt - checkstat 'cache hit (direct)' 0 - checkstat 'cache hit (preprocessed)' 1 + checkstat 'cache hit (direct)' 1 + checkstat 'cache hit (preprocessed)' 0 checkstat 'cache miss' 1 if grep -q $PWD stderr.txt; then test_failed "Base dir ($PWD) found in stderr:\n`cat stderr.txt`" fi + + export CCACHE_NODIRECT=1 } compression_suite() {