Allow mtime- and ctime-only matches.

author Justin Lebar <justin.lebar@gmail.com>

Tue, 25 Dec 2012 04:09:38 +0000 (23:09 -0500)

committer Joel Rosdahl <joel@rosdahl.net>

Sat, 2 Mar 2013 19:45:58 +0000 (20:45 +0100)
author Justin Lebar <justin.lebar@gmail.com>
Tue, 25 Dec 2012 04:09:38 +0000 (23:09 -0500)
committer Joel Rosdahl <joel@rosdahl.net>
Sat, 2 Mar 2013 19:45:58 +0000 (20:45 +0100)
diff --git a/MANUAL.txt b/MANUAL.txt

index e7411b407d9024dfeed92811244d8a5388fd9670..c5072abb89189d8f809f0310313858145928c77d 100644 (file)
--- a/MANUAL.txt
+++ b/MANUAL.txt
@@ -429,10 +429,14 @@ WRAPPERS>>.
      By default, ccache will not cache a file if it includes a header whose
      mtime is too new.  This option disables that check.
  *include_file_ctime*::
-    ccache also will not cache a file if it includes a header whose ctime is
-    too new.  This option disables that check.
+    By default, ccache also will not cache a file if it includes a header whose
+    ctime is too new.  This option disables that check.
  *time_macros*::
      Ignore *\_\_DATE\__* and *\_\_TIME__* being present in the source code.
+*file_stat_matches*::
+    ccache normally examines a file's contents to determine whether it matches
+    the cached version.  But with this option set, ccache will consider a file
+    as matching its cached version if the sizes, mtime's, and ctime's match.
  --
  +
  See the discussion under <<_troubleshooting,TROUBLESHOOTING>> for more
diff --git a/ccache.h b/ccache.h

index f57a901bd9b611cf640fd5eff8d7896d260185c6..00aec468a154b44087d6c066aff75b2cf343079a 100644 (file)
--- a/ccache.h
+++ b/ccache.h
@@ -58,6 +58,12 @@ enum stats {
  #define SLOPPY_FILE_MACRO 4
  #define SLOPPY_TIME_MACROS 8
  
+/*
+ * Allow us to match files based on their stats (size, mtime, ctime), without
+ * looking at their contents.
+ */
+#define SLOPPY_FILE_STAT_MATCHES 16
+
  #define str_eq(s1, s2) (strcmp((s1), (s2)) == 0)
  #define str_startswith(s, p) (strncmp((s), (p), strlen((p))) == 0)
  
@@ -260,4 +266,6 @@ int win32execute(char *path, char **argv, int doreturn,
  #    define PATH_DELIM ":"
  #endif
  
+#define MAX(a, b) (((a) > (b)) ? (a) : (b))
+
  #endif /* ifndef CCACHE_H */
diff --git a/conf.c b/conf.c

index a9d261dc670860d7d0d06d9395640cf674e954f2..1b1b2b7c70fd046ebf8097cd88f3501cee4c0420 100644 (file)
--- a/conf.c
+++ b/conf.c
@@ -96,6 +96,8 @@ parse_sloppiness(const char *str, void *result, char **errmsg)
                         *value |= SLOPPY_INCLUDE_FILE_CTIME;
                 } else if (str_eq(word, "time_macros")) {
                         *value |= SLOPPY_TIME_MACROS;
+               } else if (str_eq(word, "file_stat_matches")) {
+                       *value |= SLOPPY_FILE_STAT_MATCHES;
                 } else {
                         *errmsg = format("unknown sloppiness: \"%s\"", word);
                         free(p);
@@ -592,6 +594,9 @@ conf_print_items(struct conf *conf,
         if (conf->sloppiness & SLOPPY_TIME_MACROS) {
                 reformat(&s, "%stime_macros, ", s);
         }
+       if (conf->sloppiness & SLOPPY_FILE_STAT_MATCHES) {
+               reformat(&s, "%sfile_stat_matches, ", s);
+       }
         if (conf->sloppiness) {
                 /* Strip last ", ". */
                 s[strlen(s) - 2] = '\0';
diff --git a/manifest.c b/manifest.c

index 5da269ae1cfaa07a11f19037a811a5c460a60558..59153b88ae079bdf58b1256eecf94b2efd586c25 100644 (file)
--- a/manifest.c
+++ b/manifest.c
@@ -41,10 +41,14 @@
   * <index[0]>      index of include file path          (4 bytes unsigned int)
   * <hash[0]>       hash of include file                (<hash_size> bytes)
   * <size[0]>       size of include file                (4 bytes unsigned int)
+ * <mtime[0]>      mtime of include file               (8 bytes signed int)
+ * <ctime[0]>      ctime of include file               (8 bytes signed int)
   * ...
+ * <index[n-1]>
   * <hash[n-1]>
   * <size[n-1]>
- * <index[n-1]>
+ * <mtime[n-1]>
+ * <ctime[n-1]>
   * ----------------------------------------------------------------------------
   * <n>             number of object name entries       (4 bytes unsigned int)
   * <m[0]>          number of include file hash indexes (4 bytes unsigned int)
@@ -63,7 +67,7 @@
   */
  
  static const uint32_t MAGIC = 0x63436d46U;
-static const uint8_t  VERSION = 0;
+static const uint8_t  VERSION = 1;
  static const uint32_t MAX_MANIFEST_ENTRIES = 100;
  
  #define ccache_static_assert(e) \
@@ -76,6 +80,10 @@ struct file_info {
         uint8_t hash[16];
         /* Size of referenced file. */
         uint32_t size;
+       /* mtime of referenced file. */
+       int64_t mtime;
+       /* ctime of referenced file. */
+       int64_t ctime;
  };
  
  struct object {
@@ -110,10 +118,16 @@ struct manifest {
         struct object *objects;
  };
  
+struct file_stats {
+       uint32_t size;
+       int64_t mtime;
+       int64_t ctime;
+};
+
  static unsigned int
  hash_from_file_info(void *key)
  {
-       ccache_static_assert(sizeof(struct file_info) == 24); /* No padding. */
+       ccache_static_assert(sizeof(struct file_info) == 40); /* No padding. */
         return murmurhashneutral2(key, sizeof(struct file_info), 0);
  }
  
@@ -124,7 +138,9 @@ file_infos_equal(void *key1, void *key2)
         struct file_info *fi2 = (struct file_info *)key2;
         return fi1->index == fi2->index
                && memcmp(fi1->hash, fi2->hash, 16) == 0
-              && fi1->size == fi2->size;
+              && fi1->size == fi2->size
+              && fi1->mtime == fi2->mtime
+              && fi1->ctime == fi2->ctime;
  }
  
  static void
@@ -263,6 +279,8 @@ read_manifest(gzFile f)
                 READ_INT(4, mf->file_infos[i].index);
                 READ_BYTES(mf->hash_size, mf->file_infos[i].hash);
                 READ_INT(4, mf->file_infos[i].size);
+               READ_INT(8, mf->file_infos[i].mtime);
+               READ_INT(8, mf->file_infos[i].ctime);
         }
  
         READ_INT(4, mf->n_objects);
@@ -336,6 +354,8 @@ write_manifest(gzFile f, const struct manifest *mf)
                 WRITE_INT(4, mf->file_infos[i].index);
                 WRITE_BYTES(mf->hash_size, mf->file_infos[i].hash);
                 WRITE_INT(4, mf->file_infos[i].size);
+               WRITE_INT(8, mf->file_infos[i].mtime);
+               WRITE_INT(8, mf->file_infos[i].ctime);
         }
  
         WRITE_INT(4, mf->n_objects);
@@ -357,23 +377,59 @@ error:
  
  static int
  verify_object(struct conf *conf, struct manifest *mf, struct object *obj,
-              struct hashtable *hashed_files)
+             struct hashtable *stated_files, struct hashtable *hashed_files)
  {
         uint32_t i;
         struct file_info *fi;
         struct file_hash *actual;
+       struct file_stats *st;
         struct mdfour hash;
         int result;
+       char *path;
  
         for (i = 0; i < obj->n_file_info_indexes; i++) {
                 fi = &mf->file_infos[obj->file_info_indexes[i]];
-               actual = hashtable_search(hashed_files, mf->files[fi->index]);
+               path = mf->files[fi->index];
+               st = hashtable_search(hashed_files, path);
+               if (!st) {
+                       struct stat file_stat;
+                       if (stat(path, &file_stat) == -1) {
+                               cc_log("Failed to stat include file %s: %s", path, strerror(errno));
+                               return 0;
+                       }
+                       st = x_malloc(sizeof(*st));
+                       st->size = file_stat.st_size;
+                       st->mtime = file_stat.st_mtime;
+                       st->ctime = file_stat.st_ctime;
+                       hashtable_insert(stated_files, x_strdup(path), st);
+               }
+
+               if (conf->sloppiness & SLOPPY_FILE_STAT_MATCHES) {
+                       /*
+                        * st->ctime is sometimes 0, so we can't check that both
+                        * st->ctime and st->mtime are greater than
+                        * time_of_compilation.  But it's sufficient to check that
+                        * either is.
+                        */
+                       if (fi->size == st->size
+                           && fi->mtime == st->mtime
+                           && fi->ctime == st->ctime
+                           && MAX(st->mtime, st->ctime) >= time_of_compilation) {
+                               cc_log("size/mtime/ctime hit for %s.", path);
+                               continue;
+                       }
+                       else {
+                               cc_log("size/mtime/ctime miss for %s.", path);
+                       }
+               }
+
+               actual = hashtable_search(hashed_files, path);
                 if (!actual) {
                         actual = x_malloc(sizeof(*actual));
                         hash_start(&hash);
-                       result = hash_source_code_file(conf, &hash, mf->files[fi->index]);
+                       result = hash_source_code_file(conf, &hash, path);
                         if (result & HASH_SOURCE_CODE_ERROR) {
-                               cc_log("Failed hashing %s", mf->files[fi->index]);
+                               cc_log("Failed hashing %s", path);
                                 free(actual);
                                 return 0;
                         }
@@ -383,7 +439,7 @@ verify_object(struct conf *conf, struct manifest *mf, struct object *obj,
                         }
                         hash_result_as_bytes(&hash, actual->hash);
                         actual->size = hash.totalN;
-                       hashtable_insert(hashed_files, x_strdup(mf->files[fi->index]), actual);
+                       hashtable_insert(hashed_files, x_strdup(path), actual);
                 }
                 if (memcmp(fi->hash, actual->hash, mf->hash_size) != 0
                     || fi->size != actual->size) {
@@ -459,11 +515,31 @@ get_file_hash_index(struct manifest *mf,
         struct file_info fi;
         uint32_t *fi_index;
         uint32_t n;
+       struct stat file_stat;
  
         fi.index = get_include_file_index(mf, path, mf_files);
         memcpy(fi.hash, file_hash->hash, sizeof(fi.hash));
         fi.size = file_hash->size;
  
+       /*
+        * file_stat.st_{m,c}time has a resolution of 1s, so we can cache the
+        * file's mtime and ctime only if they're at least one second older
+        * than time_of_compilation.
+        *
+        * st->ctime may be 0, so we have to check time_of_compilation against
+        * MAX(mtime, ctime).
+        */
+
+       if (stat(path, &file_stat) != -1
+           && time_of_compilation > MAX(file_stat.st_mtime, file_stat.st_ctime)) {
+               fi.mtime = file_stat.st_mtime;
+               fi.ctime = file_stat.st_ctime;
+       }
+       else {
+               fi.mtime = -1;
+               fi.ctime = -1;
+       }
+
         fi_index = hashtable_search(mf_file_infos, &fi);
         if (fi_index) {
                 return *fi_index;
@@ -541,6 +617,7 @@ manifest_get(struct conf *conf, const char *manifest_path)
         gzFile f = NULL;
         struct manifest *mf = NULL;
         struct hashtable *hashed_files = NULL; /* path --> struct file_hash */
+       struct hashtable *stated_files = NULL; /* path --> struct file_stats */
         uint32_t i;
         struct file_hash *fh = NULL;
  
@@ -563,10 +640,12 @@ manifest_get(struct conf *conf, const char *manifest_path)
         }
  
         hashed_files = create_hashtable(1000, hash_from_string, strings_equal);
+       stated_files = create_hashtable(1000, hash_from_string, strings_equal);
  
         /* Check newest object first since it's a bit more likely to match. */
         for (i = mf->n_objects; i > 0; i--) {
-               if (verify_object(conf, mf, &mf->objects[i - 1], hashed_files)) {
+               if (verify_object(conf, mf, &mf->objects[i - 1],
+                                 stated_files, hashed_files)) {
                         fh = x_malloc(sizeof(*fh));
                         *fh = mf->objects[i - 1].hash;
                         goto out;
@@ -577,6 +656,9 @@ out:
         if (hashed_files) {
                 hashtable_destroy(hashed_files, 1);
         }
+       if (stated_files) {
+               hashtable_destroy(stated_files, 1);
+       }
         if (f) {
                 gzclose(f);
         }
@@ -733,6 +815,8 @@ manifest_dump(const char *manifest_path, FILE *stream)
                 fprintf(stream, "    Hash: %s\n", hash);
                 free(hash);
                 fprintf(stream, "    Size: %u\n", mf->file_infos[i].size);
+               fprintf(stream, "    Mtime: %lld\n", (long long)mf->file_infos[i].mtime);
+               fprintf(stream, "    Ctime: %lld\n", (long long)mf->file_infos[i].ctime);
         }
         fprintf(stream, "Results (%u):\n", (unsigned)mf->n_objects);
         for (i = 0; i < mf->n_objects; ++i) {
diff --git a/test.sh b/test.sh

index d2af7e4ca448705414385f96bc538ec4b23dafc0..08eb08ab4fee90665c6fddc6fad759d98448f5c2 100755 (executable)
--- a/test.sh
+++ b/test.sh
@@ -1318,12 +1318,12 @@ EOF
      $CCACHE $COMPILER test.c -c -o test.o
      manifest=`find $CCACHE_DIR -name '*.manifest'`
      $CCACHE --dump-manifest $manifest |
-        perl -ape 's/:.*/: normalized/ if ($F[0] =~ "Mtime:") or ($F[0] =~ "(Hash|Size):" and ++$n > 6)' \
+        perl -ape 's/:.*/: normalized/ if ($F[0] =~ "(Mtime|Ctime):") or ($F[0] =~ "(Hash|Size):" and ++$n > 6)' \
          >manifest.dump
      if [ $COMPILER_TYPE_CLANG -eq 1 ]; then
          cat <<EOF >expected.dump
  Magic: cCmF
-Version: 0
+Version: 1
  Hash size: 16
  Reserved field: 0
  File paths (3):
@@ -1335,14 +1335,20 @@ File infos (3):
      Path index: 0
      Hash: c2f5392dbc7e8ff6138d01608445240a
      Size: 24
+    Mtime: normalized
+    Ctime: normalized
    1:
      Path index: 1
      Hash: e6b009695d072974f2c4d1dd7e7ed4fc
      Size: 95
+    Mtime: normalized
+    Ctime: normalized
    2:
      Path index: 2
      Hash: e94ceb9f1b196c387d098a5f1f4fe862
      Size: 11
+    Mtime: normalized
+    Ctime: normalized
  Results (1):
    0:
      File hash indexes: 0 1 2
@@ -1352,7 +1358,7 @@ EOF
      else
          cat <<EOF >expected.dump
  Magic: cCmF
-Version: 0
+Version: 1
  Hash size: 16
  Reserved field: 0
  File paths (3):
@@ -1364,14 +1370,20 @@ File infos (3):
      Path index: 0
      Hash: e94ceb9f1b196c387d098a5f1f4fe862
      Size: 11
+    Mtime: normalized
+    Ctime: normalized
    1:
      Path index: 1
      Hash: c2f5392dbc7e8ff6138d01608445240a
      Size: 24
+    Mtime: normalized
+    Ctime: normalized
    2:
      Path index: 2
      Hash: e6b009695d072974f2c4d1dd7e7ed4fc
      Size: 95
+    Mtime: normalized
+    Ctime: normalized
  Results (1):
    0:
      File hash indexes: 0 1 2
diff --git a/test/test_conf.c b/test/test_conf.c

index 048264fc91dd2ca1eb83c8e429e4771e36b52d4f..b06e477ac74109c29b4e1a87e50311866d919f0b 100644 (file)
--- a/test/test_conf.c
+++ b/test/test_conf.c
@@ -144,7 +144,8 @@ TEST(conf_read_valid_config)
         CHECK(conf->recache);
         CHECK(conf->run_second_cpp);
         CHECK_INT_EQ(SLOPPY_INCLUDE_FILE_MTIME|SLOPPY_INCLUDE_FILE_CTIME|
-                    SLOPPY_FILE_MACRO|SLOPPY_TIME_MACROS,
+                    SLOPPY_FILE_MACRO|SLOPPY_TIME_MACROS|
+                    SLOPPY_FILE_STAT_MATCHES,
                      conf->sloppiness);
         CHECK(!conf->stats);
         CHECK_STR_EQ_FREE1(format("%s_foo", user), conf->temporary_dir);
@@ -362,6 +363,8 @@ TEST(conf_print_items)
                 true,
                 SLOPPY_FILE_MACRO|SLOPPY_INCLUDE_FILE_MTIME|
                   SLOPPY_INCLUDE_FILE_CTIME|SLOPPY_TIME_MACROS,
+                 SLOPPY_INCLUDE_FILE_CTIME|SLOPPY_TIME_MACROS|
+                 SLOPPY_FILE_STAT_MATCHES,
                 false,
                 "td",
                 022,
author	Justin Lebar <justin.lebar@gmail.com>
	Tue, 25 Dec 2012 04:09:38 +0000 (23:09 -0500)
committer	Joel Rosdahl <joel@rosdahl.net>
	Sat, 2 Mar 2013 19:45:58 +0000 (20:45 +0100)
MANUAL.txt		patch \| blob \| blame \| history
ccache.h		patch \| blob \| blame \| history
conf.c		patch \| blob \| blame \| history
manifest.c		patch \| blob \| blame \| history
test.sh		patch \| blob \| blame \| history
test/test_conf.c		patch \| blob \| blame \| history