]> git.ipfire.org Git - thirdparty/ccache.git/commitdiff
Allow mtime- and ctime-only matches.
authorJustin Lebar <justin.lebar@gmail.com>
Tue, 25 Dec 2012 04:09:38 +0000 (23:09 -0500)
committerJoel Rosdahl <joel@rosdahl.net>
Sat, 2 Mar 2013 19:45:58 +0000 (20:45 +0100)
If CCACHE_SLOPPINESS includes "file_stat_matches", ccache will consider
two files which match in name, size, mtime, and ctime to have the same
contents.  This allows ccache to avoid reading the file, thus speeding
up cache hits.

MANUAL.txt
ccache.h
conf.c
manifest.c
test.sh
test/test_conf.c

index e7411b407d9024dfeed92811244d8a5388fd9670..c5072abb89189d8f809f0310313858145928c77d 100644 (file)
@@ -429,10 +429,14 @@ WRAPPERS>>.
     By default, ccache will not cache a file if it includes a header whose
     mtime is too new.  This option disables that check.
 *include_file_ctime*::
-    ccache also will not cache a file if it includes a header whose ctime is
-    too new.  This option disables that check.
+    By default, ccache also will not cache a file if it includes a header whose
+    ctime is too new.  This option disables that check.
 *time_macros*::
     Ignore *\_\_DATE\__* and *\_\_TIME__* being present in the source code.
+*file_stat_matches*::
+    ccache normally examines a file's contents to determine whether it matches
+    the cached version.  But with this option set, ccache will consider a file
+    as matching its cached version if the sizes, mtime's, and ctime's match.
 --
 +
 See the discussion under <<_troubleshooting,TROUBLESHOOTING>> for more
index f57a901bd9b611cf640fd5eff8d7896d260185c6..00aec468a154b44087d6c066aff75b2cf343079a 100644 (file)
--- a/ccache.h
+++ b/ccache.h
@@ -58,6 +58,12 @@ enum stats {
 #define SLOPPY_FILE_MACRO 4
 #define SLOPPY_TIME_MACROS 8
 
+/*
+ * Allow us to match files based on their stats (size, mtime, ctime), without
+ * looking at their contents.
+ */
+#define SLOPPY_FILE_STAT_MATCHES 16
+
 #define str_eq(s1, s2) (strcmp((s1), (s2)) == 0)
 #define str_startswith(s, p) (strncmp((s), (p), strlen((p))) == 0)
 
@@ -260,4 +266,6 @@ int win32execute(char *path, char **argv, int doreturn,
 #    define PATH_DELIM ":"
 #endif
 
+#define MAX(a, b) (((a) > (b)) ? (a) : (b))
+
 #endif /* ifndef CCACHE_H */
diff --git a/conf.c b/conf.c
index a9d261dc670860d7d0d06d9395640cf674e954f2..1b1b2b7c70fd046ebf8097cd88f3501cee4c0420 100644 (file)
--- a/conf.c
+++ b/conf.c
@@ -96,6 +96,8 @@ parse_sloppiness(const char *str, void *result, char **errmsg)
                        *value |= SLOPPY_INCLUDE_FILE_CTIME;
                } else if (str_eq(word, "time_macros")) {
                        *value |= SLOPPY_TIME_MACROS;
+               } else if (str_eq(word, "file_stat_matches")) {
+                       *value |= SLOPPY_FILE_STAT_MATCHES;
                } else {
                        *errmsg = format("unknown sloppiness: \"%s\"", word);
                        free(p);
@@ -592,6 +594,9 @@ conf_print_items(struct conf *conf,
        if (conf->sloppiness & SLOPPY_TIME_MACROS) {
                reformat(&s, "%stime_macros, ", s);
        }
+       if (conf->sloppiness & SLOPPY_FILE_STAT_MATCHES) {
+               reformat(&s, "%sfile_stat_matches, ", s);
+       }
        if (conf->sloppiness) {
                /* Strip last ", ". */
                s[strlen(s) - 2] = '\0';
index 5da269ae1cfaa07a11f19037a811a5c460a60558..59153b88ae079bdf58b1256eecf94b2efd586c25 100644 (file)
  * <index[0]>      index of include file path          (4 bytes unsigned int)
  * <hash[0]>       hash of include file                (<hash_size> bytes)
  * <size[0]>       size of include file                (4 bytes unsigned int)
+ * <mtime[0]>      mtime of include file               (8 bytes signed int)
+ * <ctime[0]>      ctime of include file               (8 bytes signed int)
  * ...
+ * <index[n-1]>
  * <hash[n-1]>
  * <size[n-1]>
- * <index[n-1]>
+ * <mtime[n-1]>
+ * <ctime[n-1]>
  * ----------------------------------------------------------------------------
  * <n>             number of object name entries       (4 bytes unsigned int)
  * <m[0]>          number of include file hash indexes (4 bytes unsigned int)
@@ -63,7 +67,7 @@
  */
 
 static const uint32_t MAGIC = 0x63436d46U;
-static const uint8_t  VERSION = 0;
+static const uint8_t  VERSION = 1;
 static const uint32_t MAX_MANIFEST_ENTRIES = 100;
 
 #define ccache_static_assert(e) \
@@ -76,6 +80,10 @@ struct file_info {
        uint8_t hash[16];
        /* Size of referenced file. */
        uint32_t size;
+       /* mtime of referenced file. */
+       int64_t mtime;
+       /* ctime of referenced file. */
+       int64_t ctime;
 };
 
 struct object {
@@ -110,10 +118,16 @@ struct manifest {
        struct object *objects;
 };
 
+struct file_stats {
+       uint32_t size;
+       int64_t mtime;
+       int64_t ctime;
+};
+
 static unsigned int
 hash_from_file_info(void *key)
 {
-       ccache_static_assert(sizeof(struct file_info) == 24); /* No padding. */
+       ccache_static_assert(sizeof(struct file_info) == 40); /* No padding. */
        return murmurhashneutral2(key, sizeof(struct file_info), 0);
 }
 
@@ -124,7 +138,9 @@ file_infos_equal(void *key1, void *key2)
        struct file_info *fi2 = (struct file_info *)key2;
        return fi1->index == fi2->index
               && memcmp(fi1->hash, fi2->hash, 16) == 0
-              && fi1->size == fi2->size;
+              && fi1->size == fi2->size
+              && fi1->mtime == fi2->mtime
+              && fi1->ctime == fi2->ctime;
 }
 
 static void
@@ -263,6 +279,8 @@ read_manifest(gzFile f)
                READ_INT(4, mf->file_infos[i].index);
                READ_BYTES(mf->hash_size, mf->file_infos[i].hash);
                READ_INT(4, mf->file_infos[i].size);
+               READ_INT(8, mf->file_infos[i].mtime);
+               READ_INT(8, mf->file_infos[i].ctime);
        }
 
        READ_INT(4, mf->n_objects);
@@ -336,6 +354,8 @@ write_manifest(gzFile f, const struct manifest *mf)
                WRITE_INT(4, mf->file_infos[i].index);
                WRITE_BYTES(mf->hash_size, mf->file_infos[i].hash);
                WRITE_INT(4, mf->file_infos[i].size);
+               WRITE_INT(8, mf->file_infos[i].mtime);
+               WRITE_INT(8, mf->file_infos[i].ctime);
        }
 
        WRITE_INT(4, mf->n_objects);
@@ -357,23 +377,59 @@ error:
 
 static int
 verify_object(struct conf *conf, struct manifest *mf, struct object *obj,
-              struct hashtable *hashed_files)
+             struct hashtable *stated_files, struct hashtable *hashed_files)
 {
        uint32_t i;
        struct file_info *fi;
        struct file_hash *actual;
+       struct file_stats *st;
        struct mdfour hash;
        int result;
+       char *path;
 
        for (i = 0; i < obj->n_file_info_indexes; i++) {
                fi = &mf->file_infos[obj->file_info_indexes[i]];
-               actual = hashtable_search(hashed_files, mf->files[fi->index]);
+               path = mf->files[fi->index];
+               st = hashtable_search(hashed_files, path);
+               if (!st) {
+                       struct stat file_stat;
+                       if (stat(path, &file_stat) == -1) {
+                               cc_log("Failed to stat include file %s: %s", path, strerror(errno));
+                               return 0;
+                       }
+                       st = x_malloc(sizeof(*st));
+                       st->size = file_stat.st_size;
+                       st->mtime = file_stat.st_mtime;
+                       st->ctime = file_stat.st_ctime;
+                       hashtable_insert(stated_files, x_strdup(path), st);
+               }
+
+               if (conf->sloppiness & SLOPPY_FILE_STAT_MATCHES) {
+                       /*
+                        * st->ctime is sometimes 0, so we can't check that both
+                        * st->ctime and st->mtime are greater than
+                        * time_of_compilation.  But it's sufficient to check that
+                        * either is.
+                        */
+                       if (fi->size == st->size
+                           && fi->mtime == st->mtime
+                           && fi->ctime == st->ctime
+                           && MAX(st->mtime, st->ctime) >= time_of_compilation) {
+                               cc_log("size/mtime/ctime hit for %s.", path);
+                               continue;
+                       }
+                       else {
+                               cc_log("size/mtime/ctime miss for %s.", path);
+                       }
+               }
+
+               actual = hashtable_search(hashed_files, path);
                if (!actual) {
                        actual = x_malloc(sizeof(*actual));
                        hash_start(&hash);
-                       result = hash_source_code_file(conf, &hash, mf->files[fi->index]);
+                       result = hash_source_code_file(conf, &hash, path);
                        if (result & HASH_SOURCE_CODE_ERROR) {
-                               cc_log("Failed hashing %s", mf->files[fi->index]);
+                               cc_log("Failed hashing %s", path);
                                free(actual);
                                return 0;
                        }
@@ -383,7 +439,7 @@ verify_object(struct conf *conf, struct manifest *mf, struct object *obj,
                        }
                        hash_result_as_bytes(&hash, actual->hash);
                        actual->size = hash.totalN;
-                       hashtable_insert(hashed_files, x_strdup(mf->files[fi->index]), actual);
+                       hashtable_insert(hashed_files, x_strdup(path), actual);
                }
                if (memcmp(fi->hash, actual->hash, mf->hash_size) != 0
                    || fi->size != actual->size) {
@@ -459,11 +515,31 @@ get_file_hash_index(struct manifest *mf,
        struct file_info fi;
        uint32_t *fi_index;
        uint32_t n;
+       struct stat file_stat;
 
        fi.index = get_include_file_index(mf, path, mf_files);
        memcpy(fi.hash, file_hash->hash, sizeof(fi.hash));
        fi.size = file_hash->size;
 
+       /*
+        * file_stat.st_{m,c}time has a resolution of 1s, so we can cache the
+        * file's mtime and ctime only if they're at least one second older
+        * than time_of_compilation.
+        *
+        * st->ctime may be 0, so we have to check time_of_compilation against
+        * MAX(mtime, ctime).
+        */
+
+       if (stat(path, &file_stat) != -1
+           && time_of_compilation > MAX(file_stat.st_mtime, file_stat.st_ctime)) {
+               fi.mtime = file_stat.st_mtime;
+               fi.ctime = file_stat.st_ctime;
+       }
+       else {
+               fi.mtime = -1;
+               fi.ctime = -1;
+       }
+
        fi_index = hashtable_search(mf_file_infos, &fi);
        if (fi_index) {
                return *fi_index;
@@ -541,6 +617,7 @@ manifest_get(struct conf *conf, const char *manifest_path)
        gzFile f = NULL;
        struct manifest *mf = NULL;
        struct hashtable *hashed_files = NULL; /* path --> struct file_hash */
+       struct hashtable *stated_files = NULL; /* path --> struct file_stats */
        uint32_t i;
        struct file_hash *fh = NULL;
 
@@ -563,10 +640,12 @@ manifest_get(struct conf *conf, const char *manifest_path)
        }
 
        hashed_files = create_hashtable(1000, hash_from_string, strings_equal);
+       stated_files = create_hashtable(1000, hash_from_string, strings_equal);
 
        /* Check newest object first since it's a bit more likely to match. */
        for (i = mf->n_objects; i > 0; i--) {
-               if (verify_object(conf, mf, &mf->objects[i - 1], hashed_files)) {
+               if (verify_object(conf, mf, &mf->objects[i - 1],
+                                 stated_files, hashed_files)) {
                        fh = x_malloc(sizeof(*fh));
                        *fh = mf->objects[i - 1].hash;
                        goto out;
@@ -577,6 +656,9 @@ out:
        if (hashed_files) {
                hashtable_destroy(hashed_files, 1);
        }
+       if (stated_files) {
+               hashtable_destroy(stated_files, 1);
+       }
        if (f) {
                gzclose(f);
        }
@@ -733,6 +815,8 @@ manifest_dump(const char *manifest_path, FILE *stream)
                fprintf(stream, "    Hash: %s\n", hash);
                free(hash);
                fprintf(stream, "    Size: %u\n", mf->file_infos[i].size);
+               fprintf(stream, "    Mtime: %lld\n", (long long)mf->file_infos[i].mtime);
+               fprintf(stream, "    Ctime: %lld\n", (long long)mf->file_infos[i].ctime);
        }
        fprintf(stream, "Results (%u):\n", (unsigned)mf->n_objects);
        for (i = 0; i < mf->n_objects; ++i) {
diff --git a/test.sh b/test.sh
index d2af7e4ca448705414385f96bc538ec4b23dafc0..08eb08ab4fee90665c6fddc6fad759d98448f5c2 100755 (executable)
--- a/test.sh
+++ b/test.sh
@@ -1318,12 +1318,12 @@ EOF
     $CCACHE $COMPILER test.c -c -o test.o
     manifest=`find $CCACHE_DIR -name '*.manifest'`
     $CCACHE --dump-manifest $manifest |
-        perl -ape 's/:.*/: normalized/ if ($F[0] =~ "Mtime:") or ($F[0] =~ "(Hash|Size):" and ++$n > 6)' \
+        perl -ape 's/:.*/: normalized/ if ($F[0] =~ "(Mtime|Ctime):") or ($F[0] =~ "(Hash|Size):" and ++$n > 6)' \
         >manifest.dump
     if [ $COMPILER_TYPE_CLANG -eq 1 ]; then
         cat <<EOF >expected.dump
 Magic: cCmF
-Version: 0
+Version: 1
 Hash size: 16
 Reserved field: 0
 File paths (3):
@@ -1335,14 +1335,20 @@ File infos (3):
     Path index: 0
     Hash: c2f5392dbc7e8ff6138d01608445240a
     Size: 24
+    Mtime: normalized
+    Ctime: normalized
   1:
     Path index: 1
     Hash: e6b009695d072974f2c4d1dd7e7ed4fc
     Size: 95
+    Mtime: normalized
+    Ctime: normalized
   2:
     Path index: 2
     Hash: e94ceb9f1b196c387d098a5f1f4fe862
     Size: 11
+    Mtime: normalized
+    Ctime: normalized
 Results (1):
   0:
     File hash indexes: 0 1 2
@@ -1352,7 +1358,7 @@ EOF
     else
         cat <<EOF >expected.dump
 Magic: cCmF
-Version: 0
+Version: 1
 Hash size: 16
 Reserved field: 0
 File paths (3):
@@ -1364,14 +1370,20 @@ File infos (3):
     Path index: 0
     Hash: e94ceb9f1b196c387d098a5f1f4fe862
     Size: 11
+    Mtime: normalized
+    Ctime: normalized
   1:
     Path index: 1
     Hash: c2f5392dbc7e8ff6138d01608445240a
     Size: 24
+    Mtime: normalized
+    Ctime: normalized
   2:
     Path index: 2
     Hash: e6b009695d072974f2c4d1dd7e7ed4fc
     Size: 95
+    Mtime: normalized
+    Ctime: normalized
 Results (1):
   0:
     File hash indexes: 0 1 2
index 048264fc91dd2ca1eb83c8e429e4771e36b52d4f..b06e477ac74109c29b4e1a87e50311866d919f0b 100644 (file)
@@ -144,7 +144,8 @@ TEST(conf_read_valid_config)
        CHECK(conf->recache);
        CHECK(conf->run_second_cpp);
        CHECK_INT_EQ(SLOPPY_INCLUDE_FILE_MTIME|SLOPPY_INCLUDE_FILE_CTIME|
-                    SLOPPY_FILE_MACRO|SLOPPY_TIME_MACROS,
+                    SLOPPY_FILE_MACRO|SLOPPY_TIME_MACROS|
+                    SLOPPY_FILE_STAT_MATCHES,
                     conf->sloppiness);
        CHECK(!conf->stats);
        CHECK_STR_EQ_FREE1(format("%s_foo", user), conf->temporary_dir);
@@ -362,6 +363,8 @@ TEST(conf_print_items)
                true,
                SLOPPY_FILE_MACRO|SLOPPY_INCLUDE_FILE_MTIME|
                  SLOPPY_INCLUDE_FILE_CTIME|SLOPPY_TIME_MACROS,
+                 SLOPPY_INCLUDE_FILE_CTIME|SLOPPY_TIME_MACROS|
+                 SLOPPY_FILE_STAT_MATCHES,
                false,
                "td",
                022,