From: Justin Lebar Date: Tue, 25 Dec 2012 04:09:38 +0000 (-0500) Subject: Allow mtime- and ctime-only matches. X-Git-Tag: v3.2~56 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=b291647878eb3eaa331cb34cab361788231de074;p=thirdparty%2Fccache.git Allow mtime- and ctime-only matches. If CCACHE_SLOPPINESS includes "file_stat_matches", ccache will consider two files which match in name, size, mtime, and ctime to have the same contents. This allows ccache to avoid reading the file, thus speeding up cache hits. --- diff --git a/MANUAL.txt b/MANUAL.txt index e7411b407..c5072abb8 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -429,10 +429,14 @@ WRAPPERS>>. By default, ccache will not cache a file if it includes a header whose mtime is too new. This option disables that check. *include_file_ctime*:: - ccache also will not cache a file if it includes a header whose ctime is - too new. This option disables that check. + By default, ccache also will not cache a file if it includes a header whose + ctime is too new. This option disables that check. *time_macros*:: Ignore *\_\_DATE\__* and *\_\_TIME__* being present in the source code. +*file_stat_matches*:: + ccache normally examines a file's contents to determine whether it matches + the cached version. But with this option set, ccache will consider a file + as matching its cached version if the sizes, mtime's, and ctime's match. -- + See the discussion under <<_troubleshooting,TROUBLESHOOTING>> for more diff --git a/ccache.h b/ccache.h index f57a901bd..00aec468a 100644 --- a/ccache.h +++ b/ccache.h @@ -58,6 +58,12 @@ enum stats { #define SLOPPY_FILE_MACRO 4 #define SLOPPY_TIME_MACROS 8 +/* + * Allow us to match files based on their stats (size, mtime, ctime), without + * looking at their contents. + */ +#define SLOPPY_FILE_STAT_MATCHES 16 + #define str_eq(s1, s2) (strcmp((s1), (s2)) == 0) #define str_startswith(s, p) (strncmp((s), (p), strlen((p))) == 0) @@ -260,4 +266,6 @@ int win32execute(char *path, char **argv, int doreturn, # define PATH_DELIM ":" #endif +#define MAX(a, b) (((a) > (b)) ? (a) : (b)) + #endif /* ifndef CCACHE_H */ diff --git a/conf.c b/conf.c index a9d261dc6..1b1b2b7c7 100644 --- a/conf.c +++ b/conf.c @@ -96,6 +96,8 @@ parse_sloppiness(const char *str, void *result, char **errmsg) *value |= SLOPPY_INCLUDE_FILE_CTIME; } else if (str_eq(word, "time_macros")) { *value |= SLOPPY_TIME_MACROS; + } else if (str_eq(word, "file_stat_matches")) { + *value |= SLOPPY_FILE_STAT_MATCHES; } else { *errmsg = format("unknown sloppiness: \"%s\"", word); free(p); @@ -592,6 +594,9 @@ conf_print_items(struct conf *conf, if (conf->sloppiness & SLOPPY_TIME_MACROS) { reformat(&s, "%stime_macros, ", s); } + if (conf->sloppiness & SLOPPY_FILE_STAT_MATCHES) { + reformat(&s, "%sfile_stat_matches, ", s); + } if (conf->sloppiness) { /* Strip last ", ". */ s[strlen(s) - 2] = '\0'; diff --git a/manifest.c b/manifest.c index 5da269ae1..59153b88a 100644 --- a/manifest.c +++ b/manifest.c @@ -41,10 +41,14 @@ * index of include file path (4 bytes unsigned int) * hash of include file ( bytes) * size of include file (4 bytes unsigned int) + * mtime of include file (8 bytes signed int) + * ctime of include file (8 bytes signed int) * ... + * * * - * + * + * * ---------------------------------------------------------------------------- * number of object name entries (4 bytes unsigned int) * number of include file hash indexes (4 bytes unsigned int) @@ -63,7 +67,7 @@ */ static const uint32_t MAGIC = 0x63436d46U; -static const uint8_t VERSION = 0; +static const uint8_t VERSION = 1; static const uint32_t MAX_MANIFEST_ENTRIES = 100; #define ccache_static_assert(e) \ @@ -76,6 +80,10 @@ struct file_info { uint8_t hash[16]; /* Size of referenced file. */ uint32_t size; + /* mtime of referenced file. */ + int64_t mtime; + /* ctime of referenced file. */ + int64_t ctime; }; struct object { @@ -110,10 +118,16 @@ struct manifest { struct object *objects; }; +struct file_stats { + uint32_t size; + int64_t mtime; + int64_t ctime; +}; + static unsigned int hash_from_file_info(void *key) { - ccache_static_assert(sizeof(struct file_info) == 24); /* No padding. */ + ccache_static_assert(sizeof(struct file_info) == 40); /* No padding. */ return murmurhashneutral2(key, sizeof(struct file_info), 0); } @@ -124,7 +138,9 @@ file_infos_equal(void *key1, void *key2) struct file_info *fi2 = (struct file_info *)key2; return fi1->index == fi2->index && memcmp(fi1->hash, fi2->hash, 16) == 0 - && fi1->size == fi2->size; + && fi1->size == fi2->size + && fi1->mtime == fi2->mtime + && fi1->ctime == fi2->ctime; } static void @@ -263,6 +279,8 @@ read_manifest(gzFile f) READ_INT(4, mf->file_infos[i].index); READ_BYTES(mf->hash_size, mf->file_infos[i].hash); READ_INT(4, mf->file_infos[i].size); + READ_INT(8, mf->file_infos[i].mtime); + READ_INT(8, mf->file_infos[i].ctime); } READ_INT(4, mf->n_objects); @@ -336,6 +354,8 @@ write_manifest(gzFile f, const struct manifest *mf) WRITE_INT(4, mf->file_infos[i].index); WRITE_BYTES(mf->hash_size, mf->file_infos[i].hash); WRITE_INT(4, mf->file_infos[i].size); + WRITE_INT(8, mf->file_infos[i].mtime); + WRITE_INT(8, mf->file_infos[i].ctime); } WRITE_INT(4, mf->n_objects); @@ -357,23 +377,59 @@ error: static int verify_object(struct conf *conf, struct manifest *mf, struct object *obj, - struct hashtable *hashed_files) + struct hashtable *stated_files, struct hashtable *hashed_files) { uint32_t i; struct file_info *fi; struct file_hash *actual; + struct file_stats *st; struct mdfour hash; int result; + char *path; for (i = 0; i < obj->n_file_info_indexes; i++) { fi = &mf->file_infos[obj->file_info_indexes[i]]; - actual = hashtable_search(hashed_files, mf->files[fi->index]); + path = mf->files[fi->index]; + st = hashtable_search(hashed_files, path); + if (!st) { + struct stat file_stat; + if (stat(path, &file_stat) == -1) { + cc_log("Failed to stat include file %s: %s", path, strerror(errno)); + return 0; + } + st = x_malloc(sizeof(*st)); + st->size = file_stat.st_size; + st->mtime = file_stat.st_mtime; + st->ctime = file_stat.st_ctime; + hashtable_insert(stated_files, x_strdup(path), st); + } + + if (conf->sloppiness & SLOPPY_FILE_STAT_MATCHES) { + /* + * st->ctime is sometimes 0, so we can't check that both + * st->ctime and st->mtime are greater than + * time_of_compilation. But it's sufficient to check that + * either is. + */ + if (fi->size == st->size + && fi->mtime == st->mtime + && fi->ctime == st->ctime + && MAX(st->mtime, st->ctime) >= time_of_compilation) { + cc_log("size/mtime/ctime hit for %s.", path); + continue; + } + else { + cc_log("size/mtime/ctime miss for %s.", path); + } + } + + actual = hashtable_search(hashed_files, path); if (!actual) { actual = x_malloc(sizeof(*actual)); hash_start(&hash); - result = hash_source_code_file(conf, &hash, mf->files[fi->index]); + result = hash_source_code_file(conf, &hash, path); if (result & HASH_SOURCE_CODE_ERROR) { - cc_log("Failed hashing %s", mf->files[fi->index]); + cc_log("Failed hashing %s", path); free(actual); return 0; } @@ -383,7 +439,7 @@ verify_object(struct conf *conf, struct manifest *mf, struct object *obj, } hash_result_as_bytes(&hash, actual->hash); actual->size = hash.totalN; - hashtable_insert(hashed_files, x_strdup(mf->files[fi->index]), actual); + hashtable_insert(hashed_files, x_strdup(path), actual); } if (memcmp(fi->hash, actual->hash, mf->hash_size) != 0 || fi->size != actual->size) { @@ -459,11 +515,31 @@ get_file_hash_index(struct manifest *mf, struct file_info fi; uint32_t *fi_index; uint32_t n; + struct stat file_stat; fi.index = get_include_file_index(mf, path, mf_files); memcpy(fi.hash, file_hash->hash, sizeof(fi.hash)); fi.size = file_hash->size; + /* + * file_stat.st_{m,c}time has a resolution of 1s, so we can cache the + * file's mtime and ctime only if they're at least one second older + * than time_of_compilation. + * + * st->ctime may be 0, so we have to check time_of_compilation against + * MAX(mtime, ctime). + */ + + if (stat(path, &file_stat) != -1 + && time_of_compilation > MAX(file_stat.st_mtime, file_stat.st_ctime)) { + fi.mtime = file_stat.st_mtime; + fi.ctime = file_stat.st_ctime; + } + else { + fi.mtime = -1; + fi.ctime = -1; + } + fi_index = hashtable_search(mf_file_infos, &fi); if (fi_index) { return *fi_index; @@ -541,6 +617,7 @@ manifest_get(struct conf *conf, const char *manifest_path) gzFile f = NULL; struct manifest *mf = NULL; struct hashtable *hashed_files = NULL; /* path --> struct file_hash */ + struct hashtable *stated_files = NULL; /* path --> struct file_stats */ uint32_t i; struct file_hash *fh = NULL; @@ -563,10 +640,12 @@ manifest_get(struct conf *conf, const char *manifest_path) } hashed_files = create_hashtable(1000, hash_from_string, strings_equal); + stated_files = create_hashtable(1000, hash_from_string, strings_equal); /* Check newest object first since it's a bit more likely to match. */ for (i = mf->n_objects; i > 0; i--) { - if (verify_object(conf, mf, &mf->objects[i - 1], hashed_files)) { + if (verify_object(conf, mf, &mf->objects[i - 1], + stated_files, hashed_files)) { fh = x_malloc(sizeof(*fh)); *fh = mf->objects[i - 1].hash; goto out; @@ -577,6 +656,9 @@ out: if (hashed_files) { hashtable_destroy(hashed_files, 1); } + if (stated_files) { + hashtable_destroy(stated_files, 1); + } if (f) { gzclose(f); } @@ -733,6 +815,8 @@ manifest_dump(const char *manifest_path, FILE *stream) fprintf(stream, " Hash: %s\n", hash); free(hash); fprintf(stream, " Size: %u\n", mf->file_infos[i].size); + fprintf(stream, " Mtime: %lld\n", (long long)mf->file_infos[i].mtime); + fprintf(stream, " Ctime: %lld\n", (long long)mf->file_infos[i].ctime); } fprintf(stream, "Results (%u):\n", (unsigned)mf->n_objects); for (i = 0; i < mf->n_objects; ++i) { diff --git a/test.sh b/test.sh index d2af7e4ca..08eb08ab4 100755 --- a/test.sh +++ b/test.sh @@ -1318,12 +1318,12 @@ EOF $CCACHE $COMPILER test.c -c -o test.o manifest=`find $CCACHE_DIR -name '*.manifest'` $CCACHE --dump-manifest $manifest | - perl -ape 's/:.*/: normalized/ if ($F[0] =~ "Mtime:") or ($F[0] =~ "(Hash|Size):" and ++$n > 6)' \ + perl -ape 's/:.*/: normalized/ if ($F[0] =~ "(Mtime|Ctime):") or ($F[0] =~ "(Hash|Size):" and ++$n > 6)' \ >manifest.dump if [ $COMPILER_TYPE_CLANG -eq 1 ]; then cat <expected.dump Magic: cCmF -Version: 0 +Version: 1 Hash size: 16 Reserved field: 0 File paths (3): @@ -1335,14 +1335,20 @@ File infos (3): Path index: 0 Hash: c2f5392dbc7e8ff6138d01608445240a Size: 24 + Mtime: normalized + Ctime: normalized 1: Path index: 1 Hash: e6b009695d072974f2c4d1dd7e7ed4fc Size: 95 + Mtime: normalized + Ctime: normalized 2: Path index: 2 Hash: e94ceb9f1b196c387d098a5f1f4fe862 Size: 11 + Mtime: normalized + Ctime: normalized Results (1): 0: File hash indexes: 0 1 2 @@ -1352,7 +1358,7 @@ EOF else cat <expected.dump Magic: cCmF -Version: 0 +Version: 1 Hash size: 16 Reserved field: 0 File paths (3): @@ -1364,14 +1370,20 @@ File infos (3): Path index: 0 Hash: e94ceb9f1b196c387d098a5f1f4fe862 Size: 11 + Mtime: normalized + Ctime: normalized 1: Path index: 1 Hash: c2f5392dbc7e8ff6138d01608445240a Size: 24 + Mtime: normalized + Ctime: normalized 2: Path index: 2 Hash: e6b009695d072974f2c4d1dd7e7ed4fc Size: 95 + Mtime: normalized + Ctime: normalized Results (1): 0: File hash indexes: 0 1 2 diff --git a/test/test_conf.c b/test/test_conf.c index 048264fc9..b06e477ac 100644 --- a/test/test_conf.c +++ b/test/test_conf.c @@ -144,7 +144,8 @@ TEST(conf_read_valid_config) CHECK(conf->recache); CHECK(conf->run_second_cpp); CHECK_INT_EQ(SLOPPY_INCLUDE_FILE_MTIME|SLOPPY_INCLUDE_FILE_CTIME| - SLOPPY_FILE_MACRO|SLOPPY_TIME_MACROS, + SLOPPY_FILE_MACRO|SLOPPY_TIME_MACROS| + SLOPPY_FILE_STAT_MATCHES, conf->sloppiness); CHECK(!conf->stats); CHECK_STR_EQ_FREE1(format("%s_foo", user), conf->temporary_dir); @@ -362,6 +363,8 @@ TEST(conf_print_items) true, SLOPPY_FILE_MACRO|SLOPPY_INCLUDE_FILE_MTIME| SLOPPY_INCLUDE_FILE_CTIME|SLOPPY_TIME_MACROS, + SLOPPY_INCLUDE_FILE_CTIME|SLOPPY_TIME_MACROS| + SLOPPY_FILE_STAT_MATCHES, false, "td", 022,